aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.clang-format1
-rw-r--r--.mailmap1
-rw-r--r--Documentation/ABI/stable/sysfs-class-infiniband17
-rw-r--r--Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x725
-rw-r--r--Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci38
-rw-r--r--Documentation/ABI/testing/sysfs-class-power8
-rw-r--r--Documentation/ABI/testing/sysfs-fs-f2fs3
-rw-r--r--Documentation/RCU/Design/Data-Structures/Data-Structures.rst2
-rw-r--r--Documentation/RCU/Design/Requirements/Requirements.rst4
-rw-r--r--Documentation/RCU/whatisRCU.rst2
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt162
-rw-r--r--Documentation/admin-guide/pm/cpufreq.rst11
-rw-r--r--Documentation/admin-guide/pstore-blk.rst10
-rw-r--r--Documentation/admin-guide/xfs.rst32
-rw-r--r--Documentation/block/blk-mq.rst12
-rw-r--r--Documentation/block/inline-encryption.rst8
-rw-r--r--Documentation/conf.py65
-rw-r--r--Documentation/core-api/genericirq.rst2
-rw-r--r--Documentation/core-api/kernel-api.rst6
-rw-r--r--Documentation/core-api/workqueue.rst2
-rw-r--r--Documentation/core-api/xarray.rst16
-rw-r--r--Documentation/dev-tools/kgdb.rst6
-rw-r--r--Documentation/dev-tools/kunit/index.rst1
-rw-r--r--Documentation/dev-tools/kunit/style.rst205
-rw-r--r--Documentation/dev-tools/kunit/usage.rst5
-rw-r--r--Documentation/devicetree/bindings/display/tilcdc/tilcdc.txt2
-rw-r--r--Documentation/devicetree/bindings/mailbox/arm,mhu.yaml135
-rw-r--r--Documentation/devicetree/bindings/mailbox/arm-mhu.txt43
-rw-r--r--Documentation/devicetree/bindings/mailbox/omap-mailbox.txt2
-rw-r--r--Documentation/devicetree/bindings/media/i2c/tvp5150.txt2
-rw-r--r--Documentation/devicetree/bindings/mips/ingenic/devices.yaml5
-rw-r--r--Documentation/devicetree/bindings/mtd/nand-controller.yaml31
-rw-r--r--Documentation/devicetree/bindings/power/reset/ocelot-reset.txt7
-rw-r--r--Documentation/devicetree/bindings/power/reset/reboot-mode.txt25
-rw-r--r--Documentation/devicetree/bindings/power/reset/reboot-mode.yaml47
-rw-r--r--Documentation/devicetree/bindings/power/supply/battery.yaml24
-rw-r--r--Documentation/devicetree/bindings/power/supply/bq25890.txt4
-rw-r--r--Documentation/devicetree/bindings/power/supply/bq25980.yaml114
-rw-r--r--Documentation/devicetree/bindings/power/supply/bq27xxx.yaml1
-rw-r--r--Documentation/devicetree/bindings/power/supply/charger-manager.txt30
-rw-r--r--Documentation/devicetree/bindings/power/supply/gpio-charger.yaml31
-rw-r--r--Documentation/devicetree/bindings/power/supply/ingenic,battery.txt31
-rw-r--r--Documentation/devicetree/bindings/power/supply/ingenic,battery.yaml61
-rw-r--r--Documentation/devicetree/bindings/power/supply/max17040_battery.txt21
-rw-r--r--Documentation/devicetree/bindings/power/supply/summit,smb347-charger.yaml152
-rw-r--r--Documentation/devicetree/bindings/pwm/google,cros-ec-pwm.yaml2
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,smd-rpm.yaml2
-rw-r--r--Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml2
-rw-r--r--Documentation/devicetree/bindings/thermal/allwinner,sun8i-a83t-ths.yaml6
-rw-r--r--Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.yaml1
-rw-r--r--Documentation/doc-guide/kernel-doc.rst8
-rw-r--r--Documentation/driver-api/basics.rst19
-rw-r--r--Documentation/driver-api/device_link.rst13
-rw-r--r--Documentation/driver-api/fpga/fpga-bridge.rst4
-rw-r--r--Documentation/driver-api/fpga/fpga-mgr.rst6
-rw-r--r--Documentation/driver-api/fpga/fpga-programming.rst2
-rw-r--r--Documentation/driver-api/fpga/fpga-region.rst6
-rw-r--r--Documentation/driver-api/iio/buffers.rst2
-rw-r--r--Documentation/driver-api/iio/core.rst6
-rw-r--r--Documentation/driver-api/iio/hw-consumer.rst2
-rw-r--r--Documentation/driver-api/iio/triggered-buffers.rst2
-rw-r--r--Documentation/driver-api/iio/triggers.rst4
-rw-r--r--Documentation/driver-api/index.rst1
-rw-r--r--Documentation/driver-api/infrastructure.rst4
-rw-r--r--Documentation/driver-api/libata.rst2
-rw-r--r--Documentation/driver-api/media/cec-core.rst2
-rw-r--r--Documentation/driver-api/media/dtv-frontend.rst4
-rw-r--r--Documentation/driver-api/media/mc-core.rst24
-rw-r--r--Documentation/driver-api/media/v4l2-controls.rst2
-rw-r--r--Documentation/driver-api/media/v4l2-dev.rst8
-rw-r--r--Documentation/driver-api/media/v4l2-device.rst6
-rw-r--r--Documentation/driver-api/media/v4l2-event.rst10
-rw-r--r--Documentation/driver-api/media/v4l2-fh.rst16
-rw-r--r--Documentation/driver-api/media/v4l2-subdev.rst2
-rw-r--r--Documentation/driver-api/mei/mei.rst2
-rw-r--r--Documentation/driver-api/pm/cpuidle.rst65
-rw-r--r--Documentation/driver-api/pm/devices.rst26
-rw-r--r--Documentation/driver-api/regulator.rst4
-rw-r--r--Documentation/driver-api/sound.rst54
-rw-r--r--Documentation/driver-api/target.rst12
-rw-r--r--Documentation/driver-api/usb/URB.rst2
-rw-r--r--Documentation/driver-api/usb/gadget.rst10
-rw-r--r--Documentation/driver-api/usb/hotplug.rst2
-rw-r--r--Documentation/driver-api/usb/typec_bus.rst8
-rw-r--r--Documentation/fault-injection/fault-injection.rst7
-rw-r--r--Documentation/features/vm/ioremap_prot/arch-support.txt2
-rw-r--r--Documentation/filesystems/f2fs.rst82
-rw-r--r--Documentation/filesystems/fscrypt.rst74
-rw-r--r--Documentation/filesystems/fsverity.rst2
-rw-r--r--Documentation/filesystems/fuse.rst2
-rw-r--r--Documentation/filesystems/overlayfs.rst19
-rw-r--r--Documentation/filesystems/zonefs.rst15
-rw-r--r--Documentation/gpu/amdgpu.rst4
-rw-r--r--Documentation/gpu/i915.rst29
-rw-r--r--Documentation/networking/ieee802154.rst18
-rw-r--r--Documentation/powerpc/isa-versions.rst4
-rw-r--r--Documentation/powerpc/ptrace.rst1
-rw-r--r--Documentation/powerpc/syscall64-abi.rst32
-rw-r--r--Documentation/sound/designs/tracepoints.rst22
-rw-r--r--Documentation/sound/kernel-api/alsa-driver-api.rst1
-rw-r--r--Documentation/sound/kernel-api/writing-an-alsa-driver.rst110
-rw-r--r--Documentation/sphinx/automarkup.py108
-rw-r--r--Documentation/sphinx/cdomain.py93
-rw-r--r--Documentation/sphinx/kerneldoc.py15
-rwxr-xr-xDocumentation/sphinx/parse-headers.pl2
-rw-r--r--Documentation/trace/ftrace-uses.rst12
-rw-r--r--Documentation/translations/it_IT/kernel-hacking/hacking.rst2
-rw-r--r--Documentation/translations/it_IT/kernel-hacking/locking.rst2
-rw-r--r--Documentation/translations/zh_CN/arm64/amu.rst4
-rw-r--r--Documentation/userspace-api/media/cec/cec-func-close.rst10
-rw-r--r--Documentation/userspace-api/media/cec/cec-func-ioctl.rst11
-rw-r--r--Documentation/userspace-api/media/cec/cec-func-open.rst10
-rw-r--r--Documentation/userspace-api/media/cec/cec-func-poll.rst14
-rw-r--r--Documentation/userspace-api/media/cec/cec-ioc-adap-g-caps.rst10
-rw-r--r--Documentation/userspace-api/media/cec/cec-ioc-adap-g-conn-info.rst11
-rw-r--r--Documentation/userspace-api/media/cec/cec-ioc-adap-g-log-addrs.rst20
-rw-r--r--Documentation/userspace-api/media/cec/cec-ioc-adap-g-phys-addr.rst15
-rw-r--r--Documentation/userspace-api/media/cec/cec-ioc-dqevent.rst15
-rw-r--r--Documentation/userspace-api/media/cec/cec-ioc-g-mode.rst16
-rw-r--r--Documentation/userspace-api/media/cec/cec-ioc-receive.rst18
-rw-r--r--Documentation/userspace-api/media/dvb/audio-bilingual-channel-select.rst7
-rw-r--r--Documentation/userspace-api/media/dvb/audio-channel-select.rst8
-rw-r--r--Documentation/userspace-api/media/dvb/audio-clear-buffer.rst8
-rw-r--r--Documentation/userspace-api/media/dvb/audio-continue.rst7
-rw-r--r--Documentation/userspace-api/media/dvb/audio-fclose.rst7
-rw-r--r--Documentation/userspace-api/media/dvb/audio-fopen.rst7
-rw-r--r--Documentation/userspace-api/media/dvb/audio-fwrite.rst7
-rw-r--r--Documentation/userspace-api/media/dvb/audio-get-capabilities.rst8
-rw-r--r--Documentation/userspace-api/media/dvb/audio-get-status.rst8
-rw-r--r--Documentation/userspace-api/media/dvb/audio-pause.rst9
-rw-r--r--Documentation/userspace-api/media/dvb/audio-play.rst7
-rw-r--r--Documentation/userspace-api/media/dvb/audio-select-source.rst8
-rw-r--r--Documentation/userspace-api/media/dvb/audio-set-av-sync.rst8
-rw-r--r--Documentation/userspace-api/media/dvb/audio-set-bypass-mode.rst9
-rw-r--r--Documentation/userspace-api/media/dvb/audio-set-id.rst9
-rw-r--r--Documentation/userspace-api/media/dvb/audio-set-mixer.rst9
-rw-r--r--Documentation/userspace-api/media/dvb/audio-set-mute.rst8
-rw-r--r--Documentation/userspace-api/media/dvb/audio-set-streamtype.rst10
-rw-r--r--Documentation/userspace-api/media/dvb/audio-stop.rst9
-rw-r--r--Documentation/userspace-api/media/dvb/ca-fclose.rst7
-rw-r--r--Documentation/userspace-api/media/dvb/ca-fopen.rst7
-rw-r--r--Documentation/userspace-api/media/dvb/ca-get-cap.rst8
-rw-r--r--Documentation/userspace-api/media/dvb/ca-get-descr-info.rst9
-rw-r--r--Documentation/userspace-api/media/dvb/ca-get-msg.rst10
-rw-r--r--Documentation/userspace-api/media/dvb/ca-get-slot-info.rst9
-rw-r--r--Documentation/userspace-api/media/dvb/ca-reset.rst9
-rw-r--r--Documentation/userspace-api/media/dvb/ca-send-msg.rst9
-rw-r--r--Documentation/userspace-api/media/dvb/ca-set-descr.rst8
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-add-pid.rst10
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-expbuf.rst11
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-fclose.rst9
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-fopen.rst5
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-fread.rst7
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-fwrite.rst6
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-get-pes-pids.rst12
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-get-stc.rst12
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-mmap.rst15
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-munmap.rst14
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-qbuf.rst17
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-querybuf.rst9
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-remove-pid.rst10
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-reqbufs.rst9
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-set-buffer-size.rst10
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-set-filter.rst12
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-set-pes-filter.rst13
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-start.rst11
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-stop.rst9
-rw-r--r--Documentation/userspace-api/media/dvb/fe-diseqc-recv-slave-reply.rst9
-rw-r--r--Documentation/userspace-api/media/dvb/fe-diseqc-reset-overload.rst9
-rw-r--r--Documentation/userspace-api/media/dvb/fe-diseqc-send-burst.rst10
-rw-r--r--Documentation/userspace-api/media/dvb/fe-diseqc-send-master-cmd.rst9
-rw-r--r--Documentation/userspace-api/media/dvb/fe-dishnetwork-send-legacy-cmd.rst10
-rw-r--r--Documentation/userspace-api/media/dvb/fe-enable-high-lnb-voltage.rst10
-rw-r--r--Documentation/userspace-api/media/dvb/fe-get-event.rst12
-rw-r--r--Documentation/userspace-api/media/dvb/fe-get-frontend.rst12
-rw-r--r--Documentation/userspace-api/media/dvb/fe-get-info.rst11
-rw-r--r--Documentation/userspace-api/media/dvb/fe-get-property.rst15
-rw-r--r--Documentation/userspace-api/media/dvb/fe-read-ber.rst9
-rw-r--r--Documentation/userspace-api/media/dvb/fe-read-signal-strength.rst9
-rw-r--r--Documentation/userspace-api/media/dvb/fe-read-snr.rst9
-rw-r--r--Documentation/userspace-api/media/dvb/fe-read-status.rst11
-rw-r--r--Documentation/userspace-api/media/dvb/fe-read-uncorrected-blocks.rst9
-rw-r--r--Documentation/userspace-api/media/dvb/fe-set-frontend-tune-mode.rst10
-rw-r--r--Documentation/userspace-api/media/dvb/fe-set-frontend.rst11
-rw-r--r--Documentation/userspace-api/media/dvb/fe-set-tone.rst10
-rw-r--r--Documentation/userspace-api/media/dvb/fe-set-voltage.rst10
-rw-r--r--Documentation/userspace-api/media/dvb/frontend_f_close.rst8
-rw-r--r--Documentation/userspace-api/media/dvb/frontend_f_open.rst10
-rw-r--r--Documentation/userspace-api/media/dvb/net-add-if.rst9
-rw-r--r--Documentation/userspace-api/media/dvb/net-get-if.rst10
-rw-r--r--Documentation/userspace-api/media/dvb/net-remove-if.rst10
-rw-r--r--Documentation/userspace-api/media/dvb/video-clear-buffer.rst8
-rw-r--r--Documentation/userspace-api/media/dvb/video-command.rst10
-rw-r--r--Documentation/userspace-api/media/dvb/video-continue.rst8
-rw-r--r--Documentation/userspace-api/media/dvb/video-fast-forward.rst10
-rw-r--r--Documentation/userspace-api/media/dvb/video-fclose.rst6
-rw-r--r--Documentation/userspace-api/media/dvb/video-fopen.rst6
-rw-r--r--Documentation/userspace-api/media/dvb/video-freeze.rst8
-rw-r--r--Documentation/userspace-api/media/dvb/video-fwrite.rst6
-rw-r--r--Documentation/userspace-api/media/dvb/video-get-capabilities.rst8
-rw-r--r--Documentation/userspace-api/media/dvb/video-get-event.rst8
-rw-r--r--Documentation/userspace-api/media/dvb/video-get-frame-count.rst8
-rw-r--r--Documentation/userspace-api/media/dvb/video-get-pts.rst8
-rw-r--r--Documentation/userspace-api/media/dvb/video-get-size.rst8
-rw-r--r--Documentation/userspace-api/media/dvb/video-get-status.rst7
-rw-r--r--Documentation/userspace-api/media/dvb/video-play.rst8
-rw-r--r--Documentation/userspace-api/media/dvb/video-select-source.rst7
-rw-r--r--Documentation/userspace-api/media/dvb/video-set-blank.rst8
-rw-r--r--Documentation/userspace-api/media/dvb/video-set-display-format.rst8
-rw-r--r--Documentation/userspace-api/media/dvb/video-set-format.rst9
-rw-r--r--Documentation/userspace-api/media/dvb/video-set-streamtype.rst8
-rw-r--r--Documentation/userspace-api/media/dvb/video-slowmotion.rst10
-rw-r--r--Documentation/userspace-api/media/dvb/video-stillpicture.rst8
-rw-r--r--Documentation/userspace-api/media/dvb/video-stop.rst8
-rw-r--r--Documentation/userspace-api/media/dvb/video-try-command.rst8
-rw-r--r--Documentation/userspace-api/media/mediactl/media-func-close.rst10
-rw-r--r--Documentation/userspace-api/media/mediactl/media-func-ioctl.rst10
-rw-r--r--Documentation/userspace-api/media/mediactl/media-func-open.rst10
-rw-r--r--Documentation/userspace-api/media/mediactl/media-ioc-device-info.rst13
-rw-r--r--Documentation/userspace-api/media/mediactl/media-ioc-enum-entities.rst11
-rw-r--r--Documentation/userspace-api/media/mediactl/media-ioc-enum-links.rst13
-rw-r--r--Documentation/userspace-api/media/mediactl/media-ioc-g-topology.rst14
-rw-r--r--Documentation/userspace-api/media/mediactl/media-ioc-request-alloc.rst11
-rw-r--r--Documentation/userspace-api/media/mediactl/media-ioc-setup-link.rst10
-rw-r--r--Documentation/userspace-api/media/mediactl/media-request-ioc-queue.rst7
-rw-r--r--Documentation/userspace-api/media/mediactl/media-request-ioc-reinit.rst8
-rw-r--r--Documentation/userspace-api/media/mediactl/request-api.rst5
-rw-r--r--Documentation/userspace-api/media/mediactl/request-func-close.rst8
-rw-r--r--Documentation/userspace-api/media/mediactl/request-func-ioctl.rst8
-rw-r--r--Documentation/userspace-api/media/mediactl/request-func-poll.rst12
-rw-r--r--Documentation/userspace-api/media/rc/lirc-get-features.rst9
-rw-r--r--Documentation/userspace-api/media/rc/lirc-get-rec-mode.rst12
-rw-r--r--Documentation/userspace-api/media/rc/lirc-get-rec-resolution.rst8
-rw-r--r--Documentation/userspace-api/media/rc/lirc-get-send-mode.rst14
-rw-r--r--Documentation/userspace-api/media/rc/lirc-get-timeout.rst13
-rw-r--r--Documentation/userspace-api/media/rc/lirc-read.rst10
-rw-r--r--Documentation/userspace-api/media/rc/lirc-set-measure-carrier-mode.rst8
-rw-r--r--Documentation/userspace-api/media/rc/lirc-set-rec-carrier-range.rst6
-rw-r--r--Documentation/userspace-api/media/rc/lirc-set-rec-carrier.rst8
-rw-r--r--Documentation/userspace-api/media/rc/lirc-set-rec-timeout-reports.rst8
-rw-r--r--Documentation/userspace-api/media/rc/lirc-set-rec-timeout.rst13
-rw-r--r--Documentation/userspace-api/media/rc/lirc-set-send-carrier.rst8
-rw-r--r--Documentation/userspace-api/media/rc/lirc-set-send-duty-cycle.rst8
-rw-r--r--Documentation/userspace-api/media/rc/lirc-set-transmitter-mask.rst8
-rw-r--r--Documentation/userspace-api/media/rc/lirc-set-wideband-receiver.rst8
-rw-r--r--Documentation/userspace-api/media/rc/lirc-write.rst7
-rw-r--r--Documentation/userspace-api/media/v4l/buffer.rst28
-rw-r--r--Documentation/userspace-api/media/v4l/dev-capture.rst7
-rw-r--r--Documentation/userspace-api/media/v4l/dev-output.rst7
-rw-r--r--Documentation/userspace-api/media/v4l/dev-raw-vbi.rst19
-rw-r--r--Documentation/userspace-api/media/v4l/dev-rds.rst12
-rw-r--r--Documentation/userspace-api/media/v4l/dev-sliced-vbi.rst31
-rw-r--r--Documentation/userspace-api/media/v4l/diff-v4l.rst39
-rw-r--r--Documentation/userspace-api/media/v4l/dmabuf.rst8
-rw-r--r--Documentation/userspace-api/media/v4l/format.rst7
-rw-r--r--Documentation/userspace-api/media/v4l/func-close.rst8
-rw-r--r--Documentation/userspace-api/media/v4l/func-ioctl.rst10
-rw-r--r--Documentation/userspace-api/media/v4l/func-mmap.rst18
-rw-r--r--Documentation/userspace-api/media/v4l/func-munmap.rst14
-rw-r--r--Documentation/userspace-api/media/v4l/func-open.rst14
-rw-r--r--Documentation/userspace-api/media/v4l/func-poll.rst40
-rw-r--r--Documentation/userspace-api/media/v4l/func-read.rst39
-rw-r--r--Documentation/userspace-api/media/v4l/func-select.rst42
-rw-r--r--Documentation/userspace-api/media/v4l/func-write.rst13
-rw-r--r--Documentation/userspace-api/media/v4l/hist-v4l2.rst70
-rw-r--r--Documentation/userspace-api/media/v4l/io.rst6
-rw-r--r--Documentation/userspace-api/media/v4l/libv4l-introduction.rst30
-rw-r--r--Documentation/userspace-api/media/v4l/mmap.rst26
-rw-r--r--Documentation/userspace-api/media/v4l/open.rst15
-rw-r--r--Documentation/userspace-api/media/v4l/rw.rst18
-rw-r--r--Documentation/userspace-api/media/v4l/streaming-par.rst5
-rw-r--r--Documentation/userspace-api/media/v4l/userp.rst11
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-create-bufs.rst11
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-cropcap.rst11
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-dbg-g-chip-info.rst13
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-dbg-g-register.rst18
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-decoder-cmd.rst19
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-dqevent.rst19
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-dv-timings-cap.rst17
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-encoder-cmd.rst25
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-enum-dv-timings.rst16
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-enum-fmt.rst12
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-enum-frameintervals.rst15
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-enum-framesizes.rst16
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-enum-freq-bands.rst12
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-enumaudio.rst10
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-enumaudioout.rst10
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-enuminput.rst14
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-enumoutput.rst13
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-enumstd.rst23
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-expbuf.rst14
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-audio.rst18
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-audioout.rst16
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-crop.rst16
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-ctrl.rst16
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-dv-timings.rst26
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-edid.rst25
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-enc-index.rst13
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-ext-ctrls.rst21
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-fbuf.rst18
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-fmt.rst22
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-frequency.rst16
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-input.rst15
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-jpegcomp.rst17
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-modulator.rst17
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-output.rst15
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-parm.rst30
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-priority.rst16
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-selection.rst15
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-sliced-vbi-cap.rst12
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-std.rst24
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-tuner.rst19
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-log-status.rst10
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-overlay.rst10
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-prepare-buf.rst10
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-qbuf.rst17
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-query-dv-timings.rst15
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-querybuf.rst10
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-querycap.rst16
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-queryctrl.rst22
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-querystd.rst15
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-reqbufs.rst10
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-s-hw-freq-seek.rst11
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-streamon.rst14
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-subdev-enum-frame-interval.rst10
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-subdev-enum-frame-size.rst11
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-subdev-enum-mbus-code.rst10
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-subdev-g-crop.rst16
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-subdev-g-fmt.rst17
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-subdev-g-frame-interval.rst16
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-subdev-g-selection.rst17
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-subdev-querycap.rst9
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-subscribe-event.rst17
-rw-r--r--Documentation/virt/uml/user_mode_linux_howto_v2.rst1
-rw-r--r--Documentation/vm/hmm.rst2
-rw-r--r--Documentation/vm/ksm.rst2
-rw-r--r--Documentation/vm/memory-model.rst6
-rw-r--r--MAINTAINERS60
-rw-r--r--arch/Kconfig7
-rw-r--r--arch/alpha/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/arc/Kconfig2
-rw-r--r--arch/arc/Makefile5
-rw-r--r--arch/arc/boot/dts/axc001.dtsi2
-rw-r--r--arch/arc/boot/dts/axc003.dtsi2
-rw-r--r--arch/arc/boot/dts/axc003_idu.dtsi2
-rw-r--r--arch/arc/boot/dts/eznps.dts84
-rw-r--r--arch/arc/boot/dts/vdk_axc003.dtsi2
-rw-r--r--arch/arc/boot/dts/vdk_axc003_idu.dtsi2
-rw-r--r--arch/arc/configs/nps_defconfig80
-rw-r--r--arch/arc/include/asm/atomic.h108
-rw-r--r--arch/arc/include/asm/barrier.h9
-rw-r--r--arch/arc/include/asm/bitops.h58
-rw-r--r--arch/arc/include/asm/cmpxchg.h72
-rw-r--r--arch/arc/include/asm/entry-compact.h27
-rw-r--r--arch/arc/include/asm/processor.h37
-rw-r--r--arch/arc/include/asm/ptrace.h5
-rw-r--r--arch/arc/include/asm/setup.h4
-rw-r--r--arch/arc/include/asm/spinlock.h6
-rw-r--r--arch/arc/include/asm/switch_to.h9
-rw-r--r--arch/arc/kernel/ctx_sw.c13
-rw-r--r--arch/arc/kernel/devtree.c2
-rw-r--r--arch/arc/kernel/process.c15
-rw-r--r--arch/arc/kernel/smp.c2
-rw-r--r--arch/arc/mm/tlbex.S7
-rw-r--r--arch/arc/plat-eznps/Kconfig58
-rw-r--r--arch/arc/plat-eznps/Makefile8
-rw-r--r--arch/arc/plat-eznps/ctop.c21
-rw-r--r--arch/arc/plat-eznps/entry.S60
-rw-r--r--arch/arc/plat-eznps/include/plat/ctop.h208
-rw-r--r--arch/arc/plat-eznps/include/plat/mtm.h49
-rw-r--r--arch/arc/plat-eznps/include/plat/smp.h15
-rw-r--r--arch/arc/plat-eznps/mtm.c166
-rw-r--r--arch/arc/plat-eznps/platform.c91
-rw-r--r--arch/arc/plat-eznps/smp.c138
-rw-r--r--arch/arc/plat-hsdk/Kconfig1
-rw-r--r--arch/arm/Kconfig.debug16
-rw-r--r--arch/arm/Makefile3
-rw-r--r--arch/arm/boot/compressed/Makefile7
-rw-r--r--arch/arm/boot/compressed/debug.S5
-rw-r--r--arch/arm/boot/compressed/head.S74
-rw-r--r--arch/arm/boot/compressed/vmlinux.lds.S4
-rw-r--r--arch/arm/include/debug/8250.S7
-rw-r--r--arch/arm/include/debug/asm9260.S5
-rw-r--r--arch/arm/include/debug/at91.S5
-rw-r--r--arch/arm/include/debug/bcm63xx.S5
-rw-r--r--arch/arm/include/debug/brcmstb.S5
-rw-r--r--arch/arm/include/debug/clps711x.S5
-rw-r--r--arch/arm/include/debug/dc21285.S5
-rw-r--r--arch/arm/include/debug/digicolor.S5
-rw-r--r--arch/arm/include/debug/efm32.S5
-rw-r--r--arch/arm/include/debug/icedcc.S15
-rw-r--r--arch/arm/include/debug/imx.S5
-rw-r--r--arch/arm/include/debug/meson.S5
-rw-r--r--arch/arm/include/debug/msm.S5
-rw-r--r--arch/arm/include/debug/omap2plus.S5
-rw-r--r--arch/arm/include/debug/pl01x.S5
-rw-r--r--arch/arm/include/debug/renesas-scif.S5
-rw-r--r--arch/arm/include/debug/sa1100.S5
-rw-r--r--arch/arm/include/debug/samsung.S5
-rw-r--r--arch/arm/include/debug/sirf.S5
-rw-r--r--arch/arm/include/debug/sti.S5
-rw-r--r--arch/arm/include/debug/stm32.S5
-rw-r--r--arch/arm/include/debug/tegra.S7
-rw-r--r--arch/arm/include/debug/vf.S5
-rw-r--r--arch/arm/include/debug/vt8500.S5
-rw-r--r--arch/arm/include/debug/zynq.S5
-rw-r--r--arch/arm/kernel/debug.S11
-rw-r--r--arch/arm/kernel/hw_breakpoint.c100
-rw-r--r--arch/arm/mach-davinci/board-da830-evm.c2
-rw-r--r--arch/arm/mach-davinci/board-da850-evm.c2
-rw-r--r--arch/arm/mach-davinci/board-dm355-evm.c2
-rw-r--r--arch/arm/mach-davinci/board-dm355-leopard.c3
-rw-r--r--arch/arm/mach-davinci/board-dm365-evm.c2
-rw-r--r--arch/arm/mach-davinci/board-dm644x-evm.c2
-rw-r--r--arch/arm/mach-davinci/board-dm646x-evm.c2
-rw-r--r--arch/arm/mach-davinci/board-mityomapl138.c2
-rw-r--r--arch/arm/mach-davinci/board-neuros-osd2.c2
-rw-r--r--arch/arm/mach-davinci/board-omapl138-hawk.c2
-rw-r--r--arch/arm/mach-pxa/tosa.c12
-rw-r--r--arch/arm/mach-s3c24xx/common-smdk.c2
-rw-r--r--arch/arm/mach-s3c24xx/mach-anubis.c2
-rw-r--r--arch/arm/mach-s3c24xx/mach-at2440evb.c2
-rw-r--r--arch/arm/mach-s3c24xx/mach-bast.c2
-rw-r--r--arch/arm/mach-s3c24xx/mach-gta02.c2
-rw-r--r--arch/arm/mach-s3c24xx/mach-jive.c2
-rw-r--r--arch/arm/mach-s3c24xx/mach-mini2440.c2
-rw-r--r--arch/arm/mach-s3c24xx/mach-osiris.c2
-rw-r--r--arch/arm/mach-s3c24xx/mach-qt2410.c2
-rw-r--r--arch/arm/mach-s3c24xx/mach-rx1950.c2
-rw-r--r--arch/arm/mach-s3c24xx/mach-rx3715.c2
-rw-r--r--arch/arm/mach-s3c24xx/mach-vstms.c2
-rw-r--r--arch/arm/mach-s3c64xx/mach-hmt.c2
-rw-r--r--arch/arm/mach-s3c64xx/mach-mini6410.c2
-rw-r--r--arch/arm/mach-s3c64xx/mach-real6410.c2
-rw-r--r--arch/arm/mach-sa1100/collie.c14
-rw-r--r--arch/arm/mm/cache-l2x0.c16
-rw-r--r--arch/arm/mm/mmu.c1
-rw-r--r--arch/arm/tools/syscall.tbl1
-rw-r--r--arch/arm64/include/asm/unistd.h2
-rw-r--r--arch/arm64/include/asm/unistd32.h2
-rw-r--r--arch/ia64/kernel/Makefile2
-rw-r--r--arch/ia64/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/ia64/mm/init.c4
-rw-r--r--arch/m68k/Kconfig1
-rw-r--r--arch/m68k/coldfire/device.c6
-rw-r--r--arch/m68k/include/asm/uaccess.h398
-rw-r--r--arch/m68k/include/asm/uaccess_mm.h390
-rw-r--r--arch/m68k/include/asm/uaccess_no.h160
-rw-r--r--arch/m68k/kernel/signal.c6
-rw-r--r--arch/m68k/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/microblaze/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/mips/Kbuild.platforms2
-rw-r--r--arch/mips/Kconfig158
-rw-r--r--arch/mips/alchemy/Kconfig11
-rw-r--r--arch/mips/alchemy/board-gpr.c17
-rw-r--r--arch/mips/alchemy/board-mtx1.c17
-rw-r--r--arch/mips/alchemy/board-xxs1500.c18
-rw-r--r--arch/mips/alchemy/common/prom.c21
-rw-r--r--arch/mips/alchemy/devboards/db1300.c7
-rw-r--r--arch/mips/alchemy/devboards/platform.c17
-rw-r--r--arch/mips/ar7/memory.c2
-rw-r--r--arch/mips/ath25/ar2315.c3
-rw-r--r--arch/mips/ath25/ar5312.c3
-rw-r--r--arch/mips/bcm47xx/prom.c3
-rw-r--r--arch/mips/bcm47xx/setup.c2
-rw-r--r--arch/mips/bcm63xx/boards/board_bcm963xx.c625
-rw-r--r--arch/mips/bcm63xx/setup.c2
-rw-r--r--arch/mips/boot/compressed/Makefile8
-rw-r--r--arch/mips/boot/compressed/decompress.c4
-rw-r--r--arch/mips/boot/compressed/string.c17
-rw-r--r--arch/mips/boot/dts/ingenic/jz4725b.dtsi14
-rw-r--r--arch/mips/boot/dts/ingenic/jz4740.dtsi14
-rw-r--r--arch/mips/boot/dts/ingenic/jz4770.dtsi15
-rw-r--r--arch/mips/boot/dts/ingenic/jz4780.dtsi23
-rw-r--r--arch/mips/boot/dts/ingenic/qi_lb60.dts137
-rw-r--r--arch/mips/boot/dts/ingenic/x1000.dtsi14
-rw-r--r--arch/mips/boot/dts/ingenic/x1830.dtsi14
-rw-r--r--arch/mips/boot/dts/loongson/ls7a-pch.dtsi39
-rw-r--r--arch/mips/cavium-octeon/setup.c26
-rw-r--r--arch/mips/cobalt/setup.c3
-rw-r--r--arch/mips/configs/ci20_defconfig4
-rw-r--r--arch/mips/configs/cu1000-neo_defconfig15
-rw-r--r--arch/mips/configs/cu1830-neo_defconfig15
-rw-r--r--arch/mips/configs/gcw0_defconfig2
-rw-r--r--arch/mips/configs/loongson3_defconfig2
-rw-r--r--arch/mips/configs/pnx8335_stb225_defconfig77
-rw-r--r--arch/mips/configs/qi_lb60_defconfig7
-rw-r--r--arch/mips/configs/rs90_defconfig4
-rw-r--r--arch/mips/dec/prom/memory.c12
-rw-r--r--arch/mips/dec/setup.c9
-rw-r--r--arch/mips/fw/arc/memory.c28
-rw-r--r--arch/mips/fw/sni/sniprom.c4
-rw-r--r--arch/mips/generic/Kconfig8
-rw-r--r--arch/mips/generic/Makefile1
-rw-r--r--arch/mips/generic/Platform4
-rw-r--r--arch/mips/generic/board-ingenic.c120
-rw-r--r--arch/mips/generic/init.c11
-rw-r--r--arch/mips/generic/proc.c5
-rw-r--r--arch/mips/include/asm/bootinfo.h9
-rw-r--r--arch/mips/include/asm/cpu-features.h3
-rw-r--r--arch/mips/include/asm/cpu.h1
-rw-r--r--arch/mips/include/asm/futex.h4
-rw-r--r--arch/mips/include/asm/idle.h2
-rw-r--r--arch/mips/include/asm/llsc.h2
-rw-r--r--arch/mips/include/asm/local.h4
-rw-r--r--arch/mips/include/asm/m48t37.h36
-rw-r--r--arch/mips/include/asm/mach-au1x00/cpu-feature-overrides.h1
-rw-r--r--arch/mips/include/asm/mach-au1x00/gpio-au1300.h137
-rw-r--r--arch/mips/include/asm/mach-bcm47xx/bcm47xx.h4
-rw-r--r--arch/mips/include/asm/mach-cavium-octeon/war.h27
-rw-r--r--arch/mips/include/asm/mach-generic/irq.h2
-rw-r--r--arch/mips/include/asm/mach-generic/war.h23
-rw-r--r--arch/mips/include/asm/mach-ingenic/cpu-feature-overrides.h (renamed from arch/mips/include/asm/mach-jz4740/cpu-feature-overrides.h)0
-rw-r--r--arch/mips/include/asm/mach-ip22/war.h27
-rw-r--r--arch/mips/include/asm/mach-ip27/kmalloc.h8
-rw-r--r--arch/mips/include/asm/mach-ip27/war.h23
-rw-r--r--arch/mips/include/asm/mach-ip28/cpu-feature-overrides.h2
-rw-r--r--arch/mips/include/asm/mach-ip28/war.h23
-rw-r--r--arch/mips/include/asm/mach-ip30/irq.h87
-rw-r--r--arch/mips/include/asm/mach-ip30/war.h24
-rw-r--r--arch/mips/include/asm/mach-ip32/war.h23
-rw-r--r--arch/mips/include/asm/mach-jz4740/irq.h13
-rw-r--r--arch/mips/include/asm/mach-loongson2ef/mc146818rtc.h36
-rw-r--r--arch/mips/include/asm/mach-loongson64/irq.h3
-rw-r--r--arch/mips/include/asm/mach-loongson64/mmzone.h6
-rw-r--r--arch/mips/include/asm/mach-malta/malta-dtshim.h25
-rw-r--r--arch/mips/include/asm/mach-malta/malta-pm.h33
-rw-r--r--arch/mips/include/asm/mach-malta/war.h23
-rw-r--r--arch/mips/include/asm/mach-paravirt/cpu-feature-overrides.h35
-rw-r--r--arch/mips/include/asm/mach-paravirt/irq.h19
-rw-r--r--arch/mips/include/asm/mach-paravirt/kernel-entry-init.h52
-rw-r--r--arch/mips/include/asm/mach-pnx833x/gpio.h159
-rw-r--r--arch/mips/include/asm/mach-pnx833x/irq-mapping.h112
-rw-r--r--arch/mips/include/asm/mach-pnx833x/irq.h40
-rw-r--r--arch/mips/include/asm/mach-pnx833x/pnx833x.h189
-rw-r--r--arch/mips/include/asm/mach-rc32434/war.h23
-rw-r--r--arch/mips/include/asm/mach-rm/war.h27
-rw-r--r--arch/mips/include/asm/mach-sibyte/war.h38
-rw-r--r--arch/mips/include/asm/mach-tx49xx/war.h23
-rw-r--r--arch/mips/include/asm/mips-boards/malta.h2
-rw-r--r--arch/mips/include/asm/mipsregs.h23
-rw-r--r--arch/mips/include/asm/netlogic/psb-bootinfo.h16
-rw-r--r--arch/mips/include/asm/octeon/cvmx-bootinfo.h4
-rw-r--r--arch/mips/include/asm/pgtable-bits.h5
-rw-r--r--arch/mips/include/asm/pgtable.h2
-rw-r--r--arch/mips/include/asm/processor.h1
-rw-r--r--arch/mips/include/asm/r4k-timer.h6
-rw-r--r--arch/mips/include/asm/sgi/heart.h51
-rw-r--r--arch/mips/include/asm/stackframe.h6
-rw-r--r--arch/mips/include/asm/switch_to.h4
-rw-r--r--arch/mips/include/asm/txx9/tx4939.h1
-rw-r--r--arch/mips/include/asm/war.h150
-rw-r--r--arch/mips/ingenic/Kconfig (renamed from arch/mips/jz4740/Kconfig)16
-rw-r--r--arch/mips/jz4740/Makefile9
-rw-r--r--arch/mips/jz4740/Platform3
-rw-r--r--arch/mips/jz4740/setup.c145
-rw-r--r--arch/mips/kernel/Makefile9
-rw-r--r--arch/mips/kernel/branch.c2
-rw-r--r--arch/mips/kernel/cpu-probe.c344
-rw-r--r--arch/mips/kernel/cpu-r3k-probe.c171
-rw-r--r--arch/mips/kernel/fpu-probe.c321
-rw-r--r--arch/mips/kernel/fpu-probe.h40
-rw-r--r--arch/mips/kernel/ftrace.c4
-rw-r--r--arch/mips/kernel/head.S2
-rw-r--r--arch/mips/kernel/mips-mt-fpaff.c4
-rw-r--r--arch/mips/kernel/process.c21
-rw-r--r--arch/mips/kernel/prom.c25
-rw-r--r--arch/mips/kernel/setup.c76
-rw-r--r--arch/mips/kernel/signal.c8
-rw-r--r--arch/mips/kernel/syscall.c2
-rw-r--r--arch/mips/kernel/syscalls/syscall_n32.tbl1
-rw-r--r--arch/mips/kernel/syscalls/syscall_n64.tbl1
-rw-r--r--arch/mips/kernel/syscalls/syscall_o32.tbl1
-rw-r--r--arch/mips/kernel/traps.c2
-rw-r--r--arch/mips/lantiq/xway/sysctrl.c10
-rw-r--r--arch/mips/loongson2ef/common/mem.c12
-rw-r--r--arch/mips/loongson32/common/prom.c4
-rw-r--r--arch/mips/loongson64/numa.c29
-rw-r--r--arch/mips/loongson64/reset.c5
-rw-r--r--arch/mips/mm/c-r4k.c17
-rw-r--r--arch/mips/mm/page.c16
-rw-r--r--arch/mips/mm/sc-mips.c2
-rw-r--r--arch/mips/mm/tlbex.c8
-rw-r--r--arch/mips/mm/uasm.c2
-rw-r--r--arch/mips/mti-malta/malta-setup.c1
-rw-r--r--arch/mips/netlogic/xlp/setup.c2
-rw-r--r--arch/mips/netlogic/xlr/setup.c5
-rw-r--r--arch/mips/pci/pci-ar2315.c5
-rw-r--r--arch/mips/pci/pci-ar71xx.c4
-rw-r--r--arch/mips/pci/pci-ar724x.c9
-rw-r--r--arch/mips/pnx833x/Makefile4
-rw-r--r--arch/mips/pnx833x/Platform4
-rw-r--r--arch/mips/pnx833x/common/Makefile2
-rw-r--r--arch/mips/pnx833x/common/interrupts.c303
-rw-r--r--arch/mips/pnx833x/common/platform.c224
-rw-r--r--arch/mips/pnx833x/common/prom.c51
-rw-r--r--arch/mips/pnx833x/common/reset.c31
-rw-r--r--arch/mips/pnx833x/common/setup.c48
-rw-r--r--arch/mips/pnx833x/stb22x/Makefile2
-rw-r--r--arch/mips/pnx833x/stb22x/board.c120
-rw-r--r--arch/mips/ralink/of.c3
-rw-r--r--arch/mips/rb532/prom.c2
-rw-r--r--arch/mips/sgi-ip30/ip30-common.h14
-rw-r--r--arch/mips/sgi-ip30/ip30-irq.c2
-rw-r--r--arch/mips/sgi-ip32/ip32-memory.c3
-rw-r--r--arch/mips/sgi-ip32/ip32-setup.c2
-rw-r--r--arch/mips/sibyte/common/cfe.c16
-rw-r--r--arch/mips/txx9/generic/setup_tx4939.c17
-rw-r--r--arch/mips/txx9/jmr3927/prom.c4
-rw-r--r--arch/mips/txx9/rbtx4927/prom.c5
-rw-r--r--arch/mips/txx9/rbtx4938/prom.c3
-rw-r--r--arch/mips/txx9/rbtx4939/prom.c14
-rw-r--r--arch/parisc/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/powerpc/Kconfig21
-rw-r--r--arch/powerpc/Makefile3
-rw-r--r--arch/powerpc/Makefile.postlink2
-rw-r--r--arch/powerpc/boot/Makefile2
-rw-r--r--arch/powerpc/boot/dts/fsl/t1024rdb.dts1
-rw-r--r--arch/powerpc/boot/dts/fsl/t4240rdb.dts1
-rw-r--r--arch/powerpc/boot/util.S15
-rw-r--r--arch/powerpc/configs/85xx/mpc85xx_cds_defconfig6
-rw-r--r--arch/powerpc/configs/85xx/tqm8540_defconfig6
-rw-r--r--arch/powerpc/configs/85xx/tqm8541_defconfig6
-rw-r--r--arch/powerpc/configs/85xx/tqm8555_defconfig6
-rw-r--r--arch/powerpc/configs/85xx/tqm8560_defconfig6
-rw-r--r--arch/powerpc/include/asm/asm-prototypes.h5
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash-4k.h18
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash-64k.h13
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu-hash.h4
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu.h17
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h36
-rw-r--r--arch/powerpc/include/asm/book3s/64/radix.h16
-rw-r--r--arch/powerpc/include/asm/cacheflush.h10
-rw-r--r--arch/powerpc/include/asm/cputable.h18
-rw-r--r--arch/powerpc/include/asm/cputhreads.h1
-rw-r--r--arch/powerpc/include/asm/delay.h2
-rw-r--r--arch/powerpc/include/asm/drmem.h43
-rw-r--r--arch/powerpc/include/asm/eeh.h9
-rw-r--r--arch/powerpc/include/asm/hvcall.h38
-rw-r--r--arch/powerpc/include/asm/hw_breakpoint.h12
-rw-r--r--arch/powerpc/include/asm/hw_irq.h11
-rw-r--r--arch/powerpc/include/asm/icswx.h6
-rw-r--r--arch/powerpc/include/asm/irq.h1
-rw-r--r--arch/powerpc/include/asm/machdep.h3
-rw-r--r--arch/powerpc/include/asm/mmu_context.h2
-rw-r--r--arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h14
-rw-r--r--arch/powerpc/include/asm/nohash/32/pgtable.h20
-rw-r--r--arch/powerpc/include/asm/nohash/pgtable.h5
-rw-r--r--arch/powerpc/include/asm/pnv-ocxl.h3
-rw-r--r--arch/powerpc/include/asm/ppc_asm.h13
-rw-r--r--arch/powerpc/include/asm/processor.h9
-rw-r--r--arch/powerpc/include/asm/ptrace.h4
-rw-r--r--arch/powerpc/include/asm/reg.h20
-rw-r--r--arch/powerpc/include/asm/reg_booke.h1
-rw-r--r--arch/powerpc/include/asm/smp.h24
-rw-r--r--arch/powerpc/include/asm/svm.h4
-rw-r--r--arch/powerpc/include/asm/synch.h19
-rw-r--r--arch/powerpc/include/asm/time.h86
-rw-r--r--arch/powerpc/include/asm/timex.h3
-rw-r--r--arch/powerpc/include/asm/tlb.h13
-rw-r--r--arch/powerpc/include/asm/topology.h20
-rw-r--r--arch/powerpc/include/asm/uaccess.h75
-rw-r--r--arch/powerpc/include/uapi/asm/ptrace.h1
-rw-r--r--arch/powerpc/kernel/Makefile6
-rw-r--r--arch/powerpc/kernel/asm-offsets.c1
-rw-r--r--arch/powerpc/kernel/btext.c17
-rw-r--r--arch/powerpc/kernel/cputable.c16
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c1
-rw-r--r--arch/powerpc/kernel/eeh.c145
-rw-r--r--arch/powerpc/kernel/eeh_pe.c50
-rw-r--r--arch/powerpc/kernel/entry_32.S35
-rw-r--r--arch/powerpc/kernel/entry_64.S8
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S11
-rw-r--r--arch/powerpc/kernel/fadump.c2
-rw-r--r--arch/powerpc/kernel/fpu.S16
-rw-r--r--arch/powerpc/kernel/head_32.h73
-rw-r--r--arch/powerpc/kernel/head_40x.S1
-rw-r--r--arch/powerpc/kernel/head_64.S7
-rw-r--r--arch/powerpc/kernel/head_book3s_32.S (renamed from arch/powerpc/kernel/head_32.S)93
-rw-r--r--arch/powerpc/kernel/head_booke.h1
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c149
-rw-r--r--arch/powerpc/kernel/hw_breakpoint_constraints.c162
-rw-r--r--arch/powerpc/kernel/idle.c8
-rw-r--r--arch/powerpc/kernel/irq.c73
-rw-r--r--arch/powerpc/kernel/l2cr_6xx.S3
-rw-r--r--arch/powerpc/kernel/misc_32.S48
-rw-r--r--arch/powerpc/kernel/misc_64.S1
-rw-r--r--arch/powerpc/kernel/process.c149
-rw-r--r--arch/powerpc/kernel/prom.c5
-rw-r--r--arch/powerpc/kernel/prom_init.c17
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-noadv.c9
-rw-r--r--arch/powerpc/kernel/rtas.c153
-rw-r--r--arch/powerpc/kernel/security.c34
-rw-r--r--arch/powerpc/kernel/setup_32.c2
-rw-r--r--arch/powerpc/kernel/setup_64.c105
-rw-r--r--arch/powerpc/kernel/smp.c374
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/powerpc/kernel/sysfs.c49
-rw-r--r--arch/powerpc/kernel/tau_6xx.c147
-rw-r--r--arch/powerpc/kernel/time.c62
-rw-r--r--arch/powerpc/kernel/tm.S35
-rw-r--r--arch/powerpc/kernel/traps.c4
-rw-r--r--arch/powerpc/kernel/vdso32/datapage.S2
-rw-r--r--arch/powerpc/kernel/vdso32/vdso32.lds.S2
-rw-r--r--arch/powerpc/kvm/book3s_hv.c7
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S8
-rw-r--r--arch/powerpc/lib/code-patching.c17
-rw-r--r--arch/powerpc/lib/sstep.c9
-rw-r--r--arch/powerpc/mm/book3s32/hash_low.S21
-rw-r--r--arch/powerpc/mm/book3s32/mmu.c94
-rw-r--r--arch/powerpc/mm/book3s64/hash_native.c8
-rw-r--r--arch/powerpc/mm/book3s64/hash_utils.c12
-rw-r--r--arch/powerpc/mm/book3s64/internal.h2
-rw-r--r--arch/powerpc/mm/book3s64/mmu_context.c4
-rw-r--r--arch/powerpc/mm/book3s64/radix_pgtable.c10
-rw-r--r--arch/powerpc/mm/book3s64/radix_tlb.c35
-rw-r--r--arch/powerpc/mm/book3s64/slb.c4
-rw-r--r--arch/powerpc/mm/drmem.c6
-rw-r--r--arch/powerpc/mm/hugetlbpage.c20
-rw-r--r--arch/powerpc/mm/init_64.c39
-rw-r--r--arch/powerpc/mm/kasan/kasan_init_32.c31
-rw-r--r--arch/powerpc/mm/mem.c6
-rw-r--r--arch/powerpc/mm/nohash/8xx.c7
-rw-r--r--arch/powerpc/mm/nohash/fsl_booke.c16
-rw-r--r--arch/powerpc/mm/nohash/tlb.c4
-rw-r--r--arch/powerpc/mm/numa.c101
-rw-r--r--arch/powerpc/mm/pgtable.c11
-rw-r--r--arch/powerpc/mm/ptdump/8xx.c5
-rw-r--r--arch/powerpc/mm/ptdump/bats.c59
-rw-r--r--arch/powerpc/oprofile/cell/spu_task_sync.c2
-rw-r--r--arch/powerpc/perf/hv-gpci-requests.h6
-rw-r--r--arch/powerpc/perf/hv-gpci.c73
-rw-r--r--arch/powerpc/perf/hv-gpci.h27
-rw-r--r--arch/powerpc/perf/imc-pmu.c3
-rw-r--r--arch/powerpc/perf/isa207-common.c10
-rw-r--r--arch/powerpc/perf/isa207-common.h2
-rw-r--r--arch/powerpc/perf/power10-pmu.c1
-rw-r--r--arch/powerpc/perf/power5+-pmu.c2
-rw-r--r--arch/powerpc/perf/power5-pmu.c2
-rw-r--r--arch/powerpc/perf/power6-pmu.c2
-rw-r--r--arch/powerpc/perf/power7-pmu.c2
-rw-r--r--arch/powerpc/perf/ppc970-pmu.c2
-rw-r--r--arch/powerpc/platforms/44x/machine_check.c1
-rw-r--r--arch/powerpc/platforms/44x/ppc476.c5
-rw-r--r--arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c4
-rw-r--r--arch/powerpc/platforms/85xx/smp.c4
-rw-r--r--arch/powerpc/platforms/Kconfig29
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype18
-rw-r--r--arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c3
-rw-r--r--arch/powerpc/platforms/embedded6xx/storcenter.c3
-rw-r--r--arch/powerpc/platforms/powermac/pmac.h2
-rw-r--r--arch/powerpc/platforms/powermac/setup.c2
-rw-r--r--arch/powerpc/platforms/powermac/sleep.S15
-rw-r--r--arch/powerpc/platforms/powermac/smp.c12
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c98
-rw-r--r--arch/powerpc/platforms/powernv/idle.c302
-rw-r--r--arch/powerpc/platforms/powernv/memtrace.c2
-rw-r--r--arch/powerpc/platforms/powernv/ocxl.c30
-rw-r--r--arch/powerpc/platforms/powernv/opal-core.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-elog.c33
-rw-r--r--arch/powerpc/platforms/powernv/opal-msglog.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-prd.c2
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c8
-rw-r--r--arch/powerpc/platforms/powernv/powernv.h7
-rw-r--r--arch/powerpc/platforms/powernv/rng.c2
-rw-r--r--arch/powerpc/platforms/powernv/setup.c24
-rw-r--r--arch/powerpc/platforms/powernv/smp.c6
-rw-r--r--arch/powerpc/platforms/powernv/vas-window.c9
-rw-r--r--arch/powerpc/platforms/ps3/spu.c4
-rw-r--r--arch/powerpc/platforms/pseries/eeh_pseries.c376
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c6
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c82
-rw-r--r--arch/powerpc/platforms/pseries/hvCall_inst.c23
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c242
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c2
-rw-r--r--arch/powerpc/platforms/pseries/lparcfg.c35
-rw-r--r--arch/powerpc/platforms/pseries/papr_scm.c10
-rw-r--r--arch/powerpc/platforms/pseries/rng.c1
-rw-r--r--arch/powerpc/platforms/pseries/setup.c6
-rw-r--r--arch/powerpc/platforms/pseries/svm.c26
-rw-r--r--arch/powerpc/sysdev/xics/icp-hv.c1
-rw-r--r--arch/powerpc/sysdev/xive/common.c4
-rwxr-xr-xarch/powerpc/tools/checkpatch.sh1
-rwxr-xr-xarch/powerpc/tools/unrel_branch_check.sh125
-rw-r--r--arch/powerpc/xmon/xmon.c1
-rw-r--r--arch/riscv/Kconfig25
-rw-r--r--arch/riscv/Makefile1
-rw-r--r--arch/riscv/configs/defconfig1
-rw-r--r--arch/riscv/include/asm/Kbuild1
-rw-r--r--arch/riscv/include/asm/cacheinfo.h5
-rw-r--r--arch/riscv/include/asm/efi.h55
-rw-r--r--arch/riscv/include/asm/elf.h13
-rw-r--r--arch/riscv/include/asm/fixmap.h16
-rw-r--r--arch/riscv/include/asm/io.h1
-rw-r--r--arch/riscv/include/asm/mmu.h2
-rw-r--r--arch/riscv/include/asm/pgtable.h5
-rw-r--r--arch/riscv/include/asm/sections.h13
-rw-r--r--arch/riscv/include/uapi/asm/auxvec.h24
-rw-r--r--arch/riscv/kernel/Makefile2
-rw-r--r--arch/riscv/kernel/cacheinfo.c98
-rw-r--r--arch/riscv/kernel/efi-header.S111
-rw-r--r--arch/riscv/kernel/efi.c96
-rw-r--r--arch/riscv/kernel/head.S18
-rw-r--r--arch/riscv/kernel/head.h2
-rw-r--r--arch/riscv/kernel/image-vars.h51
-rw-r--r--arch/riscv/kernel/setup.c18
-rw-r--r--arch/riscv/kernel/vmlinux.lds.S23
-rw-r--r--arch/riscv/mm/fault.c356
-rw-r--r--arch/riscv/mm/init.c190
-rw-r--r--arch/riscv/mm/ptdump.c48
-rw-r--r--arch/s390/Kconfig5
-rw-r--r--arch/s390/Kconfig.debug12
-rw-r--r--arch/s390/boot/Makefile4
-rw-r--r--arch/s390/boot/compressed/Makefile4
-rw-r--r--arch/s390/boot/compressed/decompressor.c1
-rw-r--r--arch/s390/boot/compressed/vmlinux.lds.S22
-rw-r--r--arch/s390/boot/head.S21
-rw-r--r--arch/s390/boot/ipl_parm.c60
-rw-r--r--arch/s390/boot/kaslr.c138
-rw-r--r--arch/s390/boot/pgm_check_info.c11
-rw-r--r--arch/s390/boot/startup.c5
-rw-r--r--arch/s390/boot/text_dma.S17
-rw-r--r--arch/s390/boot/uv.c3
-rw-r--r--arch/s390/configs/debug_defconfig3
-rw-r--r--arch/s390/configs/defconfig3
-rw-r--r--arch/s390/include/asm/checksum.h105
-rw-r--r--arch/s390/include/asm/cio.h2
-rw-r--r--arch/s390/include/asm/clocksource.h7
-rw-r--r--arch/s390/include/asm/clp.h3
-rw-r--r--arch/s390/include/asm/gmap.h2
-rw-r--r--arch/s390/include/asm/io.h8
-rw-r--r--arch/s390/include/asm/ipl.h7
-rw-r--r--arch/s390/include/asm/kasan.h1
-rw-r--r--arch/s390/include/asm/pci.h6
-rw-r--r--arch/s390/include/asm/pci_clp.h19
-rw-r--r--arch/s390/include/asm/pgalloc.h2
-rw-r--r--arch/s390/include/asm/pgtable.h10
-rw-r--r--arch/s390/include/asm/ptdump.h14
-rw-r--r--arch/s390/include/asm/qdio.h10
-rw-r--r--arch/s390/include/asm/sclp.h5
-rw-r--r--arch/s390/include/asm/set_memory.h4
-rw-r--r--arch/s390/include/asm/setup.h7
-rw-r--r--arch/s390/include/asm/smp.h1
-rw-r--r--arch/s390/include/asm/stp.h100
-rw-r--r--arch/s390/include/asm/tlbflush.h2
-rw-r--r--arch/s390/include/asm/uaccess.h126
-rw-r--r--arch/s390/include/asm/uv.h7
-rw-r--r--arch/s390/include/asm/vdso.h27
-rw-r--r--arch/s390/include/asm/vdso/clocksource.h8
-rw-r--r--arch/s390/include/asm/vdso/data.h13
-rw-r--r--arch/s390/include/asm/vdso/gettimeofday.h71
-rw-r--r--arch/s390/include/asm/vdso/processor.h7
-rw-r--r--arch/s390/include/asm/vdso/vdso.h0
-rw-r--r--arch/s390/include/asm/vdso/vsyscall.h26
-rw-r--r--arch/s390/include/asm/vtimer.h2
-rw-r--r--arch/s390/include/uapi/asm/pkey.h77
-rw-r--r--arch/s390/include/uapi/asm/sie.h2
-rw-r--r--arch/s390/kernel/Makefile1
-rw-r--r--arch/s390/kernel/asm-offsets.c20
-rw-r--r--arch/s390/kernel/crash_dump.c16
-rw-r--r--arch/s390/kernel/diag.c13
-rw-r--r--arch/s390/kernel/dis.c22
-rw-r--r--arch/s390/kernel/early.c13
-rw-r--r--arch/s390/kernel/early_printk.c2
-rw-r--r--arch/s390/kernel/entry.S6
-rw-r--r--arch/s390/kernel/entry.h6
-rw-r--r--arch/s390/kernel/ipl.c119
-rw-r--r--arch/s390/kernel/kprobes.c59
-rw-r--r--arch/s390/kernel/kprobes_insn_page.S22
-rw-r--r--arch/s390/kernel/setup.c62
-rw-r--r--arch/s390/kernel/smp.c12
-rw-r--r--arch/s390/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/s390/kernel/time.c318
-rw-r--r--arch/s390/kernel/uv.c66
-rw-r--r--arch/s390/kernel/vdso.c29
-rw-r--r--arch/s390/kernel/vdso64/Makefile21
-rw-r--r--arch/s390/kernel/vdso64/clock_getres.S50
-rw-r--r--arch/s390/kernel/vdso64/clock_gettime.S163
-rw-r--r--arch/s390/kernel/vdso64/gettimeofday.S71
-rw-r--r--arch/s390/kernel/vdso64/vdso64_generic.c18
-rw-r--r--arch/s390/kernel/vdso64/vdso_user_wrapper.S38
-rw-r--r--arch/s390/lib/string.c2
-rw-r--r--arch/s390/mm/Makefile2
-rw-r--r--arch/s390/mm/dump_pagetables.c360
-rw-r--r--arch/s390/mm/gmap.c2
-rw-r--r--arch/s390/mm/init.c2
-rw-r--r--arch/s390/mm/kasan_init.c44
-rw-r--r--arch/s390/mm/pageattr.c2
-rw-r--r--arch/s390/mm/pgtable.c20
-rw-r--r--arch/s390/pci/Makefile1
-rw-r--r--arch/s390/pci/pci.c59
-rw-r--r--arch/s390/pci/pci_bus.c66
-rw-r--r--arch/s390/pci/pci_bus.h13
-rw-r--r--arch/s390/pci/pci_clp.c85
-rw-r--r--arch/s390/pci/pci_event.c3
-rw-r--r--arch/s390/pci/pci_iov.c99
-rw-r--r--arch/s390/pci/pci_iov.h30
-rw-r--r--arch/s390/scripts/Makefile.chkbss20
-rw-r--r--arch/sh/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/sparc/kernel/smp_64.c65
-rw-r--r--arch/sparc/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/um/Kconfig6
-rw-r--r--arch/um/drivers/Kconfig6
-rw-r--r--arch/um/drivers/daemon_user.c1
-rw-r--r--arch/um/drivers/pcap_user.c12
-rw-r--r--arch/um/drivers/slip_user.c2
-rw-r--r--arch/um/drivers/vector_kern.c4
-rw-r--r--arch/um/drivers/vector_user.c71
-rw-r--r--arch/um/kernel/sigio.c6
-rw-r--r--arch/um/kernel/sysrq.c4
-rw-r--r--arch/um/kernel/time.c15
-rw-r--r--arch/um/os-Linux/umid.c8
-rw-r--r--arch/um/os-Linux/util.c2
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl1
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl1
-rw-r--r--arch/x86/kvm/mmu/page_track.c6
-rw-r--r--arch/x86/um/ptrace_64.c13
-rw-r--r--arch/x86/um/user-offsets.c2
-rw-r--r--arch/x86/xen/grant-table.c27
-rw-r--r--arch/xtensa/kernel/syscalls/syscall.tbl1
-rw-r--r--block/bio.c4
-rw-r--r--drivers/acpi/acpi_memhotplug.c3
-rw-r--r--drivers/base/core.c7
-rw-r--r--drivers/base/memory.c3
-rw-r--r--drivers/base/node.c33
-rw-r--r--drivers/block/xen-blkback/blkback.c22
-rw-r--r--drivers/block/xen-blkback/xenbus.c5
-rw-r--r--drivers/block/zram/zram_drv.c2
-rw-r--r--drivers/cpufreq/powernv-cpufreq.c9
-rw-r--r--drivers/cpuidle/cpuidle-powernv.c2
-rw-r--r--drivers/crypto/Kconfig24
-rw-r--r--drivers/dax/kmem.c50
-rw-r--r--drivers/dax/super.c3
-rw-r--r--drivers/firmware/broadcom/bcm47xx_sprom.c1
-rw-r--r--drivers/firmware/efi/Kconfig3
-rw-r--r--drivers/firmware/efi/Makefile2
-rw-r--r--drivers/firmware/efi/libstub/Makefile10
-rw-r--r--drivers/firmware/efi/libstub/efi-stub.c11
-rw-r--r--drivers/firmware/efi/libstub/riscv-stub.c109
-rw-r--r--drivers/firmware/efi/riscv-runtime.c143
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c2
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h2
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc.c10
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c2
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c7
-rw-r--r--drivers/gpu/drm/drm_prime.c25
-rw-r--r--drivers/gpu/drm/i915/Kconfig1
-rw-r--r--drivers/gpu/drm/i915/display/intel_ddi.c2
-rw-r--r--drivers/gpu/drm/i915/display/intel_display.c17
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pages.c132
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_userptr.c12
-rw-r--r--drivers/gpu/drm/i915/gt/shmem_utils.c76
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c15
-rw-r--r--drivers/hv/hv_balloon.c2
-rw-r--r--drivers/i3c/master.c144
-rw-r--r--drivers/i3c/master/i3c-master-cdns.c4
-rw-r--r--drivers/infiniband/Kconfig1
-rw-r--r--drivers/infiniband/core/Makefile2
-rw-r--r--drivers/infiniband/core/addr.c11
-rw-r--r--drivers/infiniband/core/cache.c72
-rw-r--r--drivers/infiniband/core/cm.c126
-rw-r--r--drivers/infiniband/core/cm_trace.c15
-rw-r--r--drivers/infiniband/core/cm_trace.h414
-rw-r--r--drivers/infiniband/core/cma.c635
-rw-r--r--drivers/infiniband/core/cma_configfs.c9
-rw-r--r--drivers/infiniband/core/cma_trace.h40
-rw-r--r--drivers/infiniband/core/core_priv.h13
-rw-r--r--drivers/infiniband/core/counters.c15
-rw-r--r--drivers/infiniband/core/cq.c39
-rw-r--r--drivers/infiniband/core/device.c77
-rw-r--r--drivers/infiniband/core/rdma_core.c34
-rw-r--r--drivers/infiniband/core/restrack.c161
-rw-r--r--drivers/infiniband/core/restrack.h10
-rw-r--r--drivers/infiniband/core/sysfs.c15
-rw-r--r--drivers/infiniband/core/ucma.c542
-rw-r--r--drivers/infiniband/core/umem.c139
-rw-r--r--drivers/infiniband/core/umem_odp.c291
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c93
-rw-r--r--drivers/infiniband/core/uverbs_main.c10
-rw-r--r--drivers/infiniband/core/uverbs_std_types.c15
-rw-r--r--drivers/infiniband/core/uverbs_std_types_counters.c4
-rw-r--r--drivers/infiniband/core/uverbs_std_types_cq.c8
-rw-r--r--drivers/infiniband/core/uverbs_std_types_device.c199
-rw-r--r--drivers/infiniband/core/uverbs_std_types_wq.c2
-rw-r--r--drivers/infiniband/core/verbs.c114
-rw-r--r--drivers/infiniband/hw/bnxt_re/bnxt_re.h2
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c90
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.h8
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c3
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.c7
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.c11
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.c30
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.h3
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c4
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c3
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h7
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c40
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c11
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c3
-rw-r--r--drivers/infiniband/hw/efa/efa.h14
-rw-r--r--drivers/infiniband/hw/efa/efa_admin_cmds_defs.h69
-rw-r--r--drivers/infiniband/hw/efa/efa_com_cmd.c28
-rw-r--r--drivers/infiniband/hw/efa/efa_com_cmd.h18
-rw-r--r--drivers/infiniband/hw/efa/efa_main.c4
-rw-r--r--drivers/infiniband/hw/efa/efa_verbs.c258
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c22
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_ah.c23
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_alloc.c3
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cq.c27
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h74
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.c8
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.c51
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.h4
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c534
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h43
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c19
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c81
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_pd.c3
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c80
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_srq.c5
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw.h9
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.c10
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_hw.c4
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_main.c16
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_pble.c4
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_type.h3
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_utils.c63
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c64
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.h3
-rw-r--r--drivers/infiniband/hw/mlx4/ah.c5
-rw-r--r--drivers/infiniband/hw/mlx4/cm.c152
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c4
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c158
-rw-r--r--drivers/infiniband/hw/mlx4/main.c45
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h62
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c35
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c345
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c8
-rw-r--r--drivers/infiniband/hw/mlx5/ah.c9
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c8
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h4
-rw-r--r--drivers/infiniband/hw/mlx5/counters.c7
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c16
-rw-r--r--drivers/infiniband/hw/mlx5/fs.c148
-rw-r--r--drivers/infiniband/hw/mlx5/gsi.c154
-rw-r--r--drivers/infiniband/hw/mlx5/main.c70
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c4
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h100
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c189
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c56
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c182
-rw-r--r--drivers/infiniband/hw/mlx5/qp.h4
-rw-r--r--drivers/infiniband/hw/mlx5/qpc.c5
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c23
-rw-r--r--drivers/infiniband/hw/mlx5/srq.h2
-rw-r--r--drivers/infiniband/hw/mlx5/srq_cmd.c22
-rw-r--r--drivers/infiniband/hw/mlx5/wr.c27
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c39
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.h27
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c75
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma.h1
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.c3
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.h2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c5
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c4
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c38
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.h6
-rw-r--r--drivers/infiniband/hw/qedr/main.c31
-rw-r--r--drivers/infiniband/hw/qedr/qedr.h33
-rw-r--r--drivers/infiniband/hw/qedr/qedr_iw_cm.c6
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c438
-rw-r--r--drivers/infiniband/hw/qedr/verbs.h11
-rw-r--r--drivers/infiniband/hw/qib/qib.h6
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c7
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.c52
-rw-r--r--drivers/infiniband/hw/qib/qib_sdma.c10
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_main.c5
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.c18
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.h6
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c7
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c4
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c9
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c3
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c9
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c7
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c15
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h10
-rw-r--r--drivers/infiniband/sw/rdmavt/ah.c3
-rw-r--r--drivers/infiniband/sw/rdmavt/ah.h2
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.c3
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.h2
-rw-r--r--drivers/infiniband/sw/rdmavt/pd.c3
-rw-r--r--drivers/infiniband/sw/rdmavt/pd.h2
-rw-r--r--drivers/infiniband/sw/rdmavt/srq.c3
-rw-r--r--drivers/infiniband/sw/rdmavt/srq.h2
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.c10
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c43
-rw-r--r--drivers/infiniband/sw/rxe/rxe.h29
-rw-r--r--drivers/infiniband/sw/rxe/rxe_av.c29
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c32
-rw-r--r--drivers/infiniband/sw/rxe/rxe_cq.c35
-rw-r--r--drivers/infiniband/sw/rxe/rxe_hdr.h29
-rw-r--r--drivers/infiniband/sw/rxe/rxe_hw_counters.c29
-rw-r--r--drivers/infiniband/sw/rxe/rxe_hw_counters.h29
-rw-r--r--drivers/infiniband/sw/rxe/rxe_icrc.c29
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h29
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mcast.c29
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mmap.c29
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mr.c54
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c39
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.h29
-rw-r--r--drivers/infiniband/sw/rxe/rxe_opcode.c29
-rw-r--r--drivers/infiniband/sw/rxe/rxe_opcode.h29
-rw-r--r--drivers/infiniband/sw/rxe/rxe_param.h29
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.c89
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.h36
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c32
-rw-r--r--drivers/infiniband/sw/rxe/rxe_queue.c29
-rw-r--r--drivers/infiniband/sw/rxe/rxe_queue.h29
-rw-r--r--drivers/infiniband/sw/rxe/rxe_recv.c68
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c33
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c29
-rw-r--r--drivers/infiniband/sw/rxe/rxe_srq.c29
-rw-r--r--drivers/infiniband/sw/rxe/rxe_sysfs.c35
-rw-r--r--drivers/infiniband/sw/rxe/rxe_task.c37
-rw-r--r--drivers/infiniband/sw/rxe/rxe_task.h33
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c52
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h48
-rw-r--r--drivers/infiniband/sw/siw/siw_main.c8
-rw-r--r--drivers/infiniband/sw/siw/siw_verbs.c9
-rw-r--r--drivers/infiniband/sw/siw/siw_verbs.h6
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c6
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_fs.c50
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_netlink.c11
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_vlan.c2
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c15
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c6
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-pri.h1
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c4
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv.c76
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv.h7
-rw-r--r--drivers/iommu/intel/dmar.c2
-rw-r--r--drivers/macintosh/smu.c4
-rw-r--r--drivers/macintosh/windfarm_lm75_sensor.c2
-rw-r--r--drivers/macintosh/windfarm_lm87_sensor.c2
-rw-r--r--drivers/macintosh/windfarm_smu_sat.c3
-rw-r--r--drivers/macintosh/windfarm_smu_sensors.c3
-rw-r--r--drivers/mailbox/Makefile2
-rw-r--r--drivers/mailbox/arm_mhu.c3
-rw-r--r--drivers/mailbox/arm_mhu_db.c354
-rw-r--r--drivers/mailbox/bcm-pdc-mailbox.c6
-rw-r--r--drivers/mailbox/mailbox.c12
-rw-r--r--drivers/mailbox/mtk-cmdq-mailbox.c8
-rw-r--r--drivers/misc/cxl/pci.c4
-rw-r--r--drivers/misc/ocxl/Kconfig2
-rw-r--r--drivers/misc/ocxl/afu_irq.c12
-rw-r--r--drivers/misc/ocxl/link.c15
-rw-r--r--drivers/mtd/devices/Kconfig2
-rw-r--r--drivers/mtd/devices/lart.c10
-rw-r--r--drivers/mtd/devices/spear_smi.c4
-rw-r--r--drivers/mtd/hyperbus/Kconfig7
-rw-r--r--drivers/mtd/hyperbus/Makefile1
-rw-r--r--drivers/mtd/hyperbus/hbmc-am654.c144
-rw-r--r--drivers/mtd/hyperbus/rpc-if.c170
-rw-r--r--drivers/mtd/lpddr/lpddr2_nvm.c35
-rw-r--r--drivers/mtd/lpddr/lpddr_cmds.c28
-rw-r--r--drivers/mtd/maps/Kconfig11
-rw-r--r--drivers/mtd/maps/Makefile1
-rw-r--r--drivers/mtd/maps/physmap-bt1-rom.c126
-rw-r--r--drivers/mtd/maps/physmap-bt1-rom.h17
-rw-r--r--drivers/mtd/maps/physmap-core.c8
-rw-r--r--drivers/mtd/maps/vmu-flash.c11
-rw-r--r--drivers/mtd/mtdconcat.c43
-rw-r--r--drivers/mtd/mtdcore.c28
-rw-r--r--drivers/mtd/mtdoops.c11
-rw-r--r--drivers/mtd/nand/Kconfig8
-rw-r--r--drivers/mtd/nand/Makefile2
-rw-r--r--drivers/mtd/nand/ecc.c484
-rw-r--r--drivers/mtd/nand/onenand/onenand_base.c9
-rw-r--r--drivers/mtd/nand/onenand/onenand_omap2.c5
-rw-r--r--drivers/mtd/nand/raw/Kconfig1
-rw-r--r--drivers/mtd/nand/raw/ams-delta.c6
-rw-r--r--drivers/mtd/nand/raw/arasan-nand-controller.c16
-rw-r--r--drivers/mtd/nand/raw/atmel/nand-controller.c457
-rw-r--r--drivers/mtd/nand/raw/au1550nd.c4
-rw-r--r--drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c3
-rw-r--r--drivers/mtd/nand/raw/brcmnand/brcmnand.c28
-rw-r--r--drivers/mtd/nand/raw/cadence-nand-controller.c12
-rw-r--r--drivers/mtd/nand/raw/cafe_nand.c3
-rw-r--r--drivers/mtd/nand/raw/cs553x_nand.c2
-rw-r--r--drivers/mtd/nand/raw/davinci_nand.c38
-rw-r--r--drivers/mtd/nand/raw/denali.c3
-rw-r--r--drivers/mtd/nand/raw/denali_pci.c2
-rw-r--r--drivers/mtd/nand/raw/diskonchip.c3
-rw-r--r--drivers/mtd/nand/raw/fsl_elbc_nand.c20
-rw-r--r--drivers/mtd/nand/raw/fsl_ifc_nand.c12
-rw-r--r--drivers/mtd/nand/raw/fsl_upm.c4
-rw-r--r--drivers/mtd/nand/raw/fsmc_nand.c14
-rw-r--r--drivers/mtd/nand/raw/gpio.c4
-rw-r--r--drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c21
-rw-r--r--drivers/mtd/nand/raw/hisi504_nand.c6
-rw-r--r--drivers/mtd/nand/raw/ingenic/ingenic_nand_drv.c20
-rw-r--r--drivers/mtd/nand/raw/lpc32xx_mlc.c2
-rw-r--r--drivers/mtd/nand/raw/lpc32xx_slc.c3
-rw-r--r--drivers/mtd/nand/raw/marvell_nand.c101
-rw-r--r--drivers/mtd/nand/raw/meson_nand.c2
-rw-r--r--drivers/mtd/nand/raw/mpc5121_nfc.c4
-rw-r--r--drivers/mtd/nand/raw/mtk_nand.c12
-rw-r--r--drivers/mtd/nand/raw/mxc_nand.c25
-rw-r--r--drivers/mtd/nand/raw/nand_base.c554
-rw-r--r--drivers/mtd/nand/raw/nand_bch.c1
-rw-r--r--drivers/mtd/nand/raw/nand_esmt.c15
-rw-r--r--drivers/mtd/nand/raw/nand_hynix.c44
-rw-r--r--drivers/mtd/nand/raw/nand_jedec.c9
-rw-r--r--drivers/mtd/nand/raw/nand_micron.c23
-rw-r--r--drivers/mtd/nand/raw/nand_onfi.c17
-rw-r--r--drivers/mtd/nand/raw/nand_samsung.c22
-rw-r--r--drivers/mtd/nand/raw/nand_toshiba.c19
-rw-r--r--drivers/mtd/nand/raw/nandsim.c8
-rw-r--r--drivers/mtd/nand/raw/ndfc.c2
-rw-r--r--drivers/mtd/nand/raw/omap2.c22
-rw-r--r--drivers/mtd/nand/raw/orion_nand.c4
-rw-r--r--drivers/mtd/nand/raw/pasemi_nand.c6
-rw-r--r--drivers/mtd/nand/raw/plat_nand.c4
-rw-r--r--drivers/mtd/nand/raw/qcom_nandc.c26
-rw-r--r--drivers/mtd/nand/raw/r852.c3
-rw-r--r--drivers/mtd/nand/raw/s3c2410.c20
-rw-r--r--drivers/mtd/nand/raw/sh_flctl.c6
-rw-r--r--drivers/mtd/nand/raw/sharpsl.c2
-rw-r--r--drivers/mtd/nand/raw/socrates_nand.c5
-rw-r--r--drivers/mtd/nand/raw/stm32_fmc2_nand.c11
-rw-r--r--drivers/mtd/nand/raw/sunxi_nand.c27
-rw-r--r--drivers/mtd/nand/raw/tango_nand.c4
-rw-r--r--drivers/mtd/nand/raw/tegra_nand.c37
-rw-r--r--drivers/mtd/nand/raw/tmio_nand.c2
-rw-r--r--drivers/mtd/nand/raw/txx9ndfmc.c2
-rw-r--r--drivers/mtd/nand/raw/vf610_nfc.c17
-rw-r--r--drivers/mtd/nand/raw/xway_nand.c4
-rw-r--r--drivers/mtd/nand/spi/core.c12
-rw-r--r--drivers/mtd/nand/spi/gigadevice.c63
-rw-r--r--drivers/mtd/nand/spi/macronix.c27
-rw-r--r--drivers/mtd/nand/spi/toshiba.c6
-rw-r--r--drivers/mtd/parsers/Kconfig2
-rw-r--r--drivers/mtd/spi-nor/controllers/intel-spi-pci.c1
-rw-r--r--drivers/mtd/spi-nor/macronix.c2
-rw-r--r--drivers/mtd/spi-nor/winbond.c9
-rw-r--r--drivers/mtd/ubi/wl.c13
-rw-r--r--drivers/net/appletalk/Kconfig2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c31
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/port.c23
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_rdma.c15
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_ethtool.c4
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_rdma.c17
-rw-r--r--drivers/net/hamradio/scc.c2
-rw-r--r--drivers/net/xen-netback/common.h15
-rw-r--r--drivers/net/xen-netback/interface.c61
-rw-r--r--drivers/net/xen-netback/netback.c11
-rw-r--r--drivers/net/xen-netback/rx.c13
-rw-r--r--drivers/power/reset/Kconfig4
-rw-r--r--drivers/power/reset/ocelot-reset.c55
-rw-r--r--drivers/power/supply/Kconfig36
-rw-r--r--drivers/power/supply/Makefile2
-rw-r--r--drivers/power/supply/ab8500_fg.c4
-rw-r--r--drivers/power/supply/bq24257_charger.c2
-rw-r--r--drivers/power/supply/bq2515x_charger.c14
-rw-r--r--drivers/power/supply/bq25890_charger.c17
-rw-r--r--drivers/power/supply/bq25980_charger.c1314
-rw-r--r--drivers/power/supply/bq25980_charger.h178
-rw-r--r--drivers/power/supply/bq27xxx_battery.c102
-rw-r--r--drivers/power/supply/bq27xxx_battery_hdq.c9
-rw-r--r--drivers/power/supply/bq27xxx_battery_i2c.c12
-rw-r--r--drivers/power/supply/charger-manager.c578
-rw-r--r--drivers/power/supply/cpcap-battery.c7
-rw-r--r--drivers/power/supply/ds2780_battery.c6
-rw-r--r--drivers/power/supply/ds2781_battery.c6
-rw-r--r--drivers/power/supply/goldfish_battery.c2
-rw-r--r--drivers/power/supply/gpio-charger.c172
-rw-r--r--drivers/power/supply/ingenic-battery.c8
-rw-r--r--drivers/power/supply/lego_ev3_battery.c24
-rw-r--r--drivers/power/supply/ltc2941-battery-gauge.c3
-rw-r--r--drivers/power/supply/max17040_battery.c489
-rw-r--r--drivers/power/supply/pm2301_charger.c7
-rw-r--r--drivers/power/supply/power_supply_core.c19
-rw-r--r--drivers/power/supply/power_supply_sysfs.c1
-rw-r--r--drivers/power/supply/rn5t618_power.c556
-rw-r--r--drivers/power/supply/rt9455_charger.c2
-rw-r--r--drivers/power/supply/sbs-battery.c125
-rw-r--r--drivers/power/supply/smb347-charger.c692
-rw-r--r--drivers/power/supply/test_power.c26
-rw-r--r--drivers/power/supply/ucs1002_power.c75
-rw-r--r--drivers/rapidio/devices/rio_mport_cdev.c18
-rw-r--r--drivers/s390/char/Makefile2
-rw-r--r--drivers/s390/char/con3215.c7
-rw-r--r--drivers/s390/char/raw3270.h1
-rw-r--r--drivers/s390/char/sclp.h4
-rw-r--r--drivers/s390/char/sclp_ap.c63
-rw-r--r--drivers/s390/char/sclp_cmd.c2
-rw-r--r--drivers/s390/char/sclp_early_core.c15
-rw-r--r--drivers/s390/char/sclp_rw.c18
-rw-r--r--drivers/s390/char/sclp_rw.h2
-rw-r--r--drivers/s390/char/sclp_sdias.c8
-rw-r--r--drivers/s390/char/tape.h3
-rw-r--r--drivers/s390/char/tape_std.h12
-rw-r--r--drivers/s390/char/zcore.c17
-rw-r--r--drivers/s390/cio/chsc.c21
-rw-r--r--drivers/s390/cio/css.c14
-rw-r--r--drivers/s390/cio/device.h1
-rw-r--r--drivers/s390/cio/qdio_main.c43
-rw-r--r--drivers/s390/cio/qdio_setup.c38
-rw-r--r--drivers/s390/crypto/ap_bus.c411
-rw-r--r--drivers/s390/crypto/ap_bus.h54
-rw-r--r--drivers/s390/crypto/ap_card.c34
-rw-r--r--drivers/s390/crypto/ap_debug.h8
-rw-r--r--drivers/s390/crypto/ap_queue.c252
-rw-r--r--drivers/s390/crypto/pkey_api.c262
-rw-r--r--drivers/s390/crypto/zcrypt_api.c416
-rw-r--r--drivers/s390/crypto/zcrypt_api.h49
-rw-r--r--drivers/s390/crypto/zcrypt_card.c12
-rw-r--r--drivers/s390/crypto/zcrypt_ccamisc.c411
-rw-r--r--drivers/s390/crypto/zcrypt_ccamisc.h74
-rw-r--r--drivers/s390/crypto/zcrypt_cex2a.c6
-rw-r--r--drivers/s390/crypto/zcrypt_cex2c.c45
-rw-r--r--drivers/s390/crypto/zcrypt_cex4.c97
-rw-r--r--drivers/s390/crypto/zcrypt_debug.h8
-rw-r--r--drivers/s390/crypto/zcrypt_ep11misc.c312
-rw-r--r--drivers/s390/crypto/zcrypt_ep11misc.h63
-rw-r--r--drivers/s390/crypto/zcrypt_error.h88
-rw-r--r--drivers/s390/crypto/zcrypt_msgtype50.c131
-rw-r--r--drivers/s390/crypto/zcrypt_msgtype6.c264
-rw-r--r--drivers/s390/crypto/zcrypt_msgtype6.h4
-rw-r--r--drivers/s390/crypto/zcrypt_queue.c11
-rw-r--r--drivers/s390/scsi/zfcp_qdio.c2
-rw-r--r--drivers/scsi/cxlflash/ocxl_hw.c21
-rw-r--r--drivers/scsi/cxlflash/ocxl_hw.h1
-rw-r--r--drivers/thermal/Kconfig6
-rw-r--r--drivers/thermal/cpufreq_cooling.c8
-rw-r--r--drivers/thermal/cpuidle_cooling.c2
-rw-r--r--drivers/thermal/devfreq_cooling.c3
-rw-r--r--drivers/thermal/gov_power_allocator.c6
-rw-r--r--drivers/thermal/imx8mm_thermal.c10
-rw-r--r--drivers/thermal/imx_thermal.c22
-rw-r--r--drivers/thermal/intel/int340x_thermal/int3400_thermal.c51
-rw-r--r--drivers/thermal/rcar_thermal.c4
-rw-r--r--drivers/thermal/st/Kconfig2
-rw-r--r--drivers/thermal/st/stm_thermal.c7
-rw-r--r--drivers/thermal/sun8i_thermal.c16
-rw-r--r--drivers/thermal/thermal_core.c13
-rw-r--r--drivers/thermal/thermal_core.h4
-rw-r--r--drivers/thermal/thermal_netlink.c3
-rw-r--r--drivers/thermal/thermal_sysfs.c2
-rw-r--r--drivers/thermal/ti-soc-thermal/ti-bandgap.c54
-rw-r--r--drivers/thermal/ti-soc-thermal/ti-bandgap.h6
-rw-r--r--drivers/tty/serial/sb1250-duart.c9
-rw-r--r--drivers/vfio/pci/vfio_pci.c38
-rw-r--r--drivers/virtio/virtio_mem.c3
-rw-r--r--drivers/xen/balloon.c2
-rw-r--r--drivers/xen/events/events_2l.c9
-rw-r--r--drivers/xen/events/events_base.c423
-rw-r--r--drivers/xen/events/events_fifo.c83
-rw-r--r--drivers/xen/events/events_internal.h20
-rw-r--r--drivers/xen/evtchn.c7
-rw-r--r--drivers/xen/pvcalls-back.c76
-rw-r--r--drivers/xen/xen-pciback/pci_stub.c13
-rw-r--r--drivers/xen/xen-pciback/pciback.h12
-rw-r--r--drivers/xen/xen-pciback/pciback_ops.c48
-rw-r--r--drivers/xen/xen-pciback/xenbus.c2
-rw-r--r--drivers/xen/xen-scsiback.c23
-rw-r--r--drivers/xen/xenbus/xenbus_client.c30
-rw-r--r--fs/afs/cell.c328
-rw-r--r--fs/afs/dynroot.c23
-rw-r--r--fs/afs/internal.h20
-rw-r--r--fs/afs/main.c2
-rw-r--r--fs/afs/mntpt.c4
-rw-r--r--fs/afs/proc.c23
-rw-r--r--fs/afs/server.c7
-rw-r--r--fs/afs/super.c18
-rw-r--r--fs/afs/vl_alias.c8
-rw-r--r--fs/afs/vl_rotate.c2
-rw-r--r--fs/afs/volume.c6
-rw-r--r--fs/autofs/dev-ioctl.c8
-rw-r--r--fs/binfmt_elf.c266
-rw-r--r--fs/binfmt_elf_fdpic.c162
-rw-r--r--fs/buffer.c6
-rw-r--r--fs/configfs/dir.c2
-rw-r--r--fs/configfs/file.c2
-rw-r--r--fs/coredump.c236
-rw-r--r--fs/dax.c29
-rw-r--r--fs/exec.c17
-rw-r--r--fs/ext4/verity.c4
-rw-r--r--fs/f2fs/acl.c6
-rw-r--r--fs/f2fs/checkpoint.c17
-rw-r--r--fs/f2fs/compress.c242
-rw-r--r--fs/f2fs/data.c119
-rw-r--r--fs/f2fs/debug.c18
-rw-r--r--fs/f2fs/dir.c109
-rw-r--r--fs/f2fs/extent_cache.c37
-rw-r--r--fs/f2fs/f2fs.h118
-rw-r--r--fs/f2fs/file.c88
-rw-r--r--fs/f2fs/gc.c413
-rw-r--r--fs/f2fs/gc.h69
-rw-r--r--fs/f2fs/inline.c4
-rw-r--r--fs/f2fs/inode.c21
-rw-r--r--fs/f2fs/namei.c2
-rw-r--r--fs/f2fs/node.c7
-rw-r--r--fs/f2fs/segment.c522
-rw-r--r--fs/f2fs/segment.h71
-rw-r--r--fs/f2fs/super.c168
-rw-r--r--fs/f2fs/sysfs.c22
-rw-r--r--fs/f2fs/verity.c4
-rw-r--r--fs/f2fs/xattr.c8
-rw-r--r--fs/fuse/Kconfig16
-rw-r--r--fs/fuse/Makefile6
-rw-r--r--fs/fuse/control.c20
-rw-r--r--fs/fuse/cuse.c21
-rw-r--r--fs/fuse/dax.c1365
-rw-r--r--fs/fuse/dev.c189
-rw-r--r--fs/fuse/dir.c220
-rw-r--r--fs/fuse/file.c255
-rw-r--r--fs/fuse/fuse_i.h185
-rw-r--r--fs/fuse/inode.c391
-rw-r--r--fs/fuse/readdir.c10
-rw-r--r--fs/fuse/virtio_fs.c378
-rw-r--r--fs/fuse/xattr.c34
-rw-r--r--fs/inode.c2
-rw-r--r--fs/io-wq.c51
-rw-r--r--fs/io-wq.h18
-rw-r--r--fs/io_uring.c650
-rw-r--r--fs/libfs.c87
-rw-r--r--fs/lockd/mon.c2
-rw-r--r--fs/nfs/fs_context.c1
-rw-r--r--fs/nfs/namespace.c12
-rw-r--r--fs/nfs/nfs42xattr.c5
-rw-r--r--fs/nfs/nfs42xdr.c167
-rw-r--r--fs/nfs/nfs4_fs.h8
-rw-r--r--fs/nfs/nfs4client.c2
-rw-r--r--fs/nfs/nfs4file.c3
-rw-r--r--fs/nfs/nfs4idmap.c15
-rw-r--r--fs/nfs/nfs4proc.c272
-rw-r--r--fs/nfs/nfs4trace.h1
-rw-r--r--fs/nfs/nfs4xdr.c7
-rw-r--r--fs/nfs/pnfs.c2
-rw-r--r--fs/nfs/super.c2
-rw-r--r--fs/nfs/sysfs.c11
-rw-r--r--fs/nfs/sysfs.h2
-rw-r--r--fs/nilfs2/bmap.c2
-rw-r--r--fs/nilfs2/cpfile.c6
-rw-r--r--fs/nilfs2/page.c1
-rw-r--r--fs/nilfs2/sufile.c4
-rw-r--r--fs/notify/fanotify/fanotify.c5
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c5
-rw-r--r--fs/overlayfs/copy_up.c59
-rw-r--r--fs/overlayfs/dir.c2
-rw-r--r--fs/overlayfs/export.c2
-rw-r--r--fs/overlayfs/file.c88
-rw-r--r--fs/overlayfs/inode.c32
-rw-r--r--fs/overlayfs/namei.c57
-rw-r--r--fs/overlayfs/overlayfs.h92
-rw-r--r--fs/overlayfs/ovl_entry.h6
-rw-r--r--fs/overlayfs/readdir.c76
-rw-r--r--fs/overlayfs/super.c117
-rw-r--r--fs/overlayfs/util.c96
-rw-r--r--fs/proc/base.c4
-rw-r--r--fs/proc/task_mmu.c18
-rw-r--r--fs/ramfs/file-nommu.c2
-rw-r--r--fs/romfs/super.c1
-rw-r--r--fs/ubifs/auth.c2
-rw-r--r--fs/ubifs/debug.c1
-rw-r--r--fs/ubifs/gc.c4
-rw-r--r--fs/ubifs/ioctl.c1
-rw-r--r--fs/ubifs/journal.c7
-rw-r--r--fs/ubifs/orphan.c2
-rw-r--r--fs/ubifs/replay.c2
-rw-r--r--fs/ubifs/super.c44
-rw-r--r--fs/ubifs/tnc.c6
-rw-r--r--fs/ubifs/xattr.c2
-rw-r--r--fs/unicode/utf8-core.c23
-rw-r--r--fs/userfaultfd.c28
-rw-r--r--fs/xfs/Kconfig25
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.c2
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c19
-rw-r--r--fs/xfs/libxfs/xfs_da_format.h18
-rw-r--r--fs/xfs/libxfs/xfs_defer.c232
-rw-r--r--fs/xfs/libxfs/xfs_defer.h37
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.h2
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c27
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.c11
-rw-r--r--fs/xfs/scrub/dabtree.c14
-rw-r--r--fs/xfs/xfs_bmap_item.c132
-rw-r--r--fs/xfs/xfs_buf_item_recover.c2
-rw-r--r--fs/xfs/xfs_dquot.c4
-rw-r--r--fs/xfs/xfs_extfree_item.c44
-rw-r--r--fs/xfs/xfs_filestream.c34
-rw-r--r--fs/xfs/xfs_fsmap.c48
-rw-r--r--fs/xfs/xfs_fsmap.h6
-rw-r--r--fs/xfs/xfs_inode.c123
-rw-r--r--fs/xfs/xfs_ioctl.c144
-rw-r--r--fs/xfs/xfs_iops.c4
-rw-r--r--fs/xfs/xfs_linux.h1
-rw-r--r--fs/xfs/xfs_log.c44
-rw-r--r--fs/xfs/xfs_log.h2
-rw-r--r--fs/xfs/xfs_log_recover.c221
-rw-r--r--fs/xfs/xfs_qm.c16
-rw-r--r--fs/xfs/xfs_refcount_item.c51
-rw-r--r--fs/xfs/xfs_rmap_item.c42
-rw-r--r--fs/xfs/xfs_rtalloc.c31
-rw-r--r--fs/xfs/xfs_stats.c4
-rw-r--r--fs/xfs/xfs_stats.h1
-rw-r--r--fs/xfs/xfs_super.c44
-rw-r--r--fs/xfs/xfs_sysctl.c36
-rw-r--r--fs/xfs/xfs_trace.h1
-rw-r--r--fs/xfs/xfs_trans.c2
-rw-r--r--fs/xfs/xfs_trans.h33
-rw-r--r--fs/xfs/xfs_trans_dquot.c43
-rw-r--r--fs/zonefs/super.c221
-rw-r--r--fs/zonefs/zonefs.h10
-rw-r--r--include/asm-generic/vmlinux.lds.h10
-rw-r--r--include/dt-bindings/power/summit,smb347-charger.h19
-rw-r--r--include/kunit/test.h90
-rw-r--r--include/linux/bcm47xx_sprom.h10
-rw-r--r--include/linux/bcm963xx_tag.h2
-rw-r--r--include/linux/bitops.h13
-rw-r--r--include/linux/blkdev.h1
-rw-r--r--include/linux/bvec.h6
-rw-r--r--include/linux/coredump.h11
-rw-r--r--include/linux/cpuhotplug.h1
-rw-r--r--include/linux/dax.h6
-rw-r--r--include/linux/devfreq.h7
-rw-r--r--include/linux/f2fs_fs.h3
-rw-r--r--include/linux/fault-inject-usercopy.h22
-rw-r--r--include/linux/fs.h44
-rw-r--r--include/linux/idle_inject.h2
-rw-r--r--include/linux/idr.h13
-rw-r--r--include/linux/input/sparse-keymap.h1
-rw-r--r--include/linux/io_uring.h27
-rw-r--r--include/linux/ioport.h11
-rw-r--r--include/linux/jiffies.h3
-rw-r--r--include/linux/kernel.h150
-rw-r--r--include/linux/kgdb.h18
-rw-r--r--include/linux/list.h29
-rw-r--r--include/linux/math64.h8
-rw-r--r--include/linux/memcontrol.h12
-rw-r--r--include/linux/memory_hotplug.h42
-rw-r--r--include/linux/minmax.h153
-rw-r--r--include/linux/mlx5/mlx5_ifc.h6
-rw-r--r--include/linux/mlx5/port.h15
-rw-r--r--include/linux/mm.h7
-rw-r--r--include/linux/mmzone.h17
-rw-r--r--include/linux/mtd/hyperbus.h13
-rw-r--r--include/linux/mtd/nand.h188
-rw-r--r--include/linux/mtd/pfow.h33
-rw-r--r--include/linux/mtd/rawnand.h34
-rw-r--r--include/linux/nfs4.h2
-rw-r--r--include/linux/nfs_fs_sb.h1
-rw-r--r--include/linux/nfs_xdr.h8
-rw-r--r--include/linux/node.h16
-rw-r--r--include/linux/nodemask.h2
-rw-r--r--include/linux/overflow.h1
-rw-r--r--include/linux/page-flags.h6
-rw-r--r--include/linux/page_owner.h6
-rw-r--r--include/linux/pagemap.h107
-rw-r--r--include/linux/pid.h1
-rw-r--r--include/linux/platform_data/mtd-davinci.h9
-rw-r--r--include/linux/platform_data/mtd-nand-s3c2410.h2
-rw-r--r--include/linux/power/bq27xxx_battery.h1
-rw-r--r--include/linux/power/charger-manager.h41
-rw-r--r--include/linux/power/gpio-charger.h6
-rw-r--r--include/linux/power/smb347-charger.h114
-rw-r--r--include/linux/power_supply.h7
-rw-r--r--include/linux/qed/qed_rdma_if.h2
-rw-r--r--include/linux/qed/qede_rdma.h4
-rw-r--r--include/linux/radix-tree.h3
-rw-r--r--include/linux/rculist.h48
-rw-r--r--include/linux/rcupdate.h19
-rw-r--r--include/linux/rcutiny.h1
-rw-r--r--include/linux/rcutree.h1
-rw-r--r--include/linux/scatterlist.h38
-rw-r--r--include/linux/sched.h2
-rw-r--r--include/linux/sched/mm.h64
-rw-r--r--include/linux/smp.h3
-rw-r--r--include/linux/smp_types.h3
-rw-r--r--include/linux/sunrpc/bc_xprt.h2
-rw-r--r--include/linux/sunrpc/cache.h3
-rw-r--r--include/linux/sunrpc/msg_prot.h2
-rw-r--r--include/linux/sunrpc/xdr.h3
-rw-r--r--include/linux/syscalls.h2
-rw-r--r--include/linux/thermal.h10
-rw-r--r--include/linux/topology.h2
-rw-r--r--include/linux/uaccess.h12
-rw-r--r--include/linux/unicode.h3
-rw-r--r--include/linux/usb/typec_altmode.h16
-rw-r--r--include/linux/vmalloc.h7
-rw-r--r--include/linux/vmstat.h2
-rw-r--r--include/linux/xarray.h56
-rw-r--r--include/misc/ocxl.h10
-rw-r--r--include/net/cfg80211.h1
-rw-r--r--include/ras/ras_event.h3
-rw-r--r--include/rdma/ib_cache.h3
-rw-r--r--include/rdma/ib_cm.h3
-rw-r--r--include/rdma/ib_umem.h46
-rw-r--r--include/rdma/ib_umem_odp.h21
-rw-r--r--include/rdma/ib_verbs.h212
-rw-r--r--include/rdma/rdma_cm.h46
-rw-r--r--include/rdma/restrack.h21
-rw-r--r--include/trace/events/afs.h109
-rw-r--r--include/trace/events/f2fs.h10
-rw-r--r--include/trace/events/rcu.h54
-rw-r--r--include/trace/events/rdma.h41
-rw-r--r--include/trace/events/rpcrdma.h64
-rw-r--r--include/trace/events/sunrpc.h286
-rw-r--r--include/uapi/asm-generic/hugetlb_encode.h1
-rw-r--r--include/uapi/asm-generic/unistd.h4
-rw-r--r--include/uapi/linux/fuse.h50
-rw-r--r--include/uapi/linux/mman.h1
-rw-r--r--include/uapi/linux/nfs4.h3
-rw-r--r--include/uapi/linux/virtio_fs.h3
-rw-r--r--include/uapi/rdma/efa-abi.h1
-rw-r--r--include/uapi/rdma/hns-abi.h4
-rw-r--r--include/uapi/rdma/ib_user_ioctl_cmds.h16
-rw-r--r--include/uapi/rdma/ib_user_ioctl_verbs.h15
-rw-r--r--include/uapi/rdma/ib_user_verbs.h11
-rw-r--r--include/uapi/rdma/rdma_user_rxe.h12
-rw-r--r--include/xen/events.h21
-rw-r--r--init/main.c4
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/acct.c10
-rw-r--r--kernel/cgroup/cpuset.c2
-rw-r--r--kernel/debug/debug_core.c48
-rw-r--r--kernel/debug/gdbstub.c5
-rw-r--r--kernel/debug/kdb/kdb_bp.c9
-rw-r--r--kernel/debug/kdb/kdb_bt.c4
-rw-r--r--kernel/debug/kdb/kdb_debugger.c2
-rw-r--r--kernel/debug/kdb/kdb_io.c22
-rw-r--r--kernel/debug/kdb/kdb_main.c8
-rw-r--r--kernel/debug/kdb/kdb_private.h4
-rw-r--r--kernel/dma/direct.c2
-rw-r--r--kernel/entry/common.c2
-rw-r--r--kernel/exit.c19
-rw-r--r--kernel/fork.c4
-rw-r--r--kernel/futex.c2
-rw-r--r--kernel/irq/timings.c2
-rw-r--r--kernel/jump_label.c2
-rw-r--r--kernel/kcsan/encoding.h2
-rw-r--r--kernel/kexec_core.c2
-rw-r--r--kernel/kexec_file.c2
-rw-r--r--kernel/kthread.c2
-rw-r--r--kernel/livepatch/state.c2
-rw-r--r--kernel/locking/locktorture.c2
-rw-r--r--kernel/panic.c12
-rw-r--r--kernel/pid.c19
-rw-r--r--kernel/pid_namespace.c2
-rw-r--r--kernel/power/snapshot.c2
-rw-r--r--kernel/printk/printk_ringbuffer.c5
-rw-r--r--kernel/range.c3
-rw-r--r--kernel/rcu/Kconfig8
-rw-r--r--kernel/rcu/Kconfig.debug17
-rw-r--r--kernel/rcu/Makefile2
-rw-r--r--kernel/rcu/rcu_segcblist.c10
-rw-r--r--kernel/rcu/rcuscale.c (renamed from kernel/rcu/rcuperf.c)330
-rw-r--r--kernel/rcu/rcutorture.c61
-rw-r--r--kernel/rcu/refscale.c8
-rw-r--r--kernel/rcu/srcutree.c13
-rw-r--r--kernel/rcu/tree.c179
-rw-r--r--kernel/rcu/tree.h2
-rw-r--r--kernel/rcu/tree_exp.h6
-rw-r--r--kernel/rcu/tree_plugin.h40
-rw-r--r--kernel/rcu/tree_stall.h8
-rw-r--r--kernel/rcu/update.c13
-rw-r--r--kernel/relay.c2
-rw-r--r--kernel/resource.c110
-rw-r--r--kernel/scftorture.c575
-rw-r--r--kernel/smp.c136
-rw-r--r--kernel/sys.c2
-rw-r--r--kernel/sys_ni.c1
-rw-r--r--kernel/time/tick-sched.c2
-rw-r--r--kernel/trace/trace_events.c2
-rw-r--r--kernel/user_namespace.c2
-rw-r--r--kernel/workqueue.c3
-rw-r--r--lib/Kconfig.debug51
-rw-r--r--lib/Kconfig.kgdb15
-rw-r--r--lib/Kconfig.ubsan14
-rw-r--r--lib/Makefile3
-rw-r--r--lib/bitfield_kunit.c (renamed from lib/test_bitfield.c)90
-rw-r--r--lib/bitmap.c2
-rw-r--r--lib/crc32.c2
-rw-r--r--lib/decompress_bunzip2.c2
-rw-r--r--lib/decompress_unzstd.c7
-rw-r--r--lib/dynamic_queue_limits.c4
-rw-r--r--lib/earlycpio.c2
-rw-r--r--lib/fault-inject-usercopy.c39
-rw-r--r--lib/find_bit.c1
-rw-r--r--lib/hexdump.c1
-rw-r--r--lib/idr.c10
-rw-r--r--lib/iov_iter.c5
-rw-r--r--lib/kunit/Makefile3
-rw-r--r--lib/kunit/executor.c43
-rw-r--r--lib/kunit/test.c13
-rw-r--r--lib/libcrc32c.c2
-rw-r--r--lib/math/rational.c2
-rw-r--r--lib/math/reciprocal_div.c1
-rw-r--r--lib/mpi/mpi-bit.c2
-rw-r--r--lib/nmi_backtrace.c6
-rw-r--r--lib/percpu_counter.c2
-rw-r--r--lib/radix-tree.c3
-rw-r--r--lib/scatterlist.c135
-rw-r--r--lib/strncpy_from_user.c3
-rw-r--r--lib/syscall.c2
-rw-r--r--lib/test_hmm.c2
-rw-r--r--lib/test_sysctl.c2
-rw-r--r--lib/test_xarray.c97
-rw-r--r--lib/usercopy.c5
-rw-r--r--lib/xarray.c233
-rw-r--r--mm/Kconfig5
-rw-r--r--mm/compaction.c6
-rw-r--r--mm/debug_vm_pgtable.c207
-rw-r--r--mm/filemap.c66
-rw-r--r--mm/gup.c61
-rw-r--r--mm/gup_benchmark.c15
-rw-r--r--mm/highmem.c4
-rw-r--r--mm/huge_memory.c45
-rw-r--r--mm/hwpoison-inject.c18
-rw-r--r--mm/internal.h27
-rw-r--r--mm/khugepaged.c2
-rw-r--r--mm/ksm.c2
-rw-r--r--mm/madvise.c177
-rw-r--r--mm/memblock.c8
-rw-r--r--mm/memcontrol.c75
-rw-r--r--mm/memory-failure.c329
-rw-r--r--mm/memory.c23
-rw-r--r--mm/memory_hotplug.c257
-rw-r--r--mm/memremap.c3
-rw-r--r--mm/migrate.c82
-rw-r--r--mm/mmap.c81
-rw-r--r--mm/mmu_notifier.c2
-rw-r--r--mm/nommu.c7
-rw-r--r--mm/page-writeback.c1
-rw-r--r--mm/page_alloc.c241
-rw-r--r--mm/page_isolation.c16
-rw-r--r--mm/page_owner.c10
-rw-r--r--mm/page_poison.c20
-rw-r--r--mm/page_reporting.c4
-rw-r--r--mm/percpu.c3
-rw-r--r--mm/readahead.c148
-rw-r--r--mm/rmap.c10
-rw-r--r--mm/shmem.c2
-rw-r--r--mm/shuffle.c2
-rw-r--r--mm/slab.c2
-rw-r--r--mm/slab.h4
-rw-r--r--mm/slub.c2
-rw-r--r--mm/sparse.c2
-rw-r--r--mm/swap_state.c2
-rw-r--r--mm/truncate.c6
-rw-r--r--mm/util.c3
-rw-r--r--mm/vmalloc.c147
-rw-r--r--mm/vmscan.c5
-rw-r--r--mm/vmstat.c8
-rw-r--r--mm/workingset.c15
-rw-r--r--mm/zsmalloc.c10
-rw-r--r--net/sunrpc/backchannel_rqst.c2
-rw-r--r--net/sunrpc/clnt.c78
-rw-r--r--net/sunrpc/rpcb_clnt.c129
-rw-r--r--net/sunrpc/sched.c52
-rw-r--r--net/sunrpc/sunrpc.h2
-rw-r--r--net/sunrpc/xdr.c305
-rw-r--r--net/sunrpc/xprt.c24
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c2
-rw-r--r--net/sunrpc/xprtrdma/transport.c7
-rw-r--r--net/sunrpc/xprtsock.c5
-rw-r--r--scripts/Makefile.ubsan10
-rwxr-xr-xscripts/checkpatch.pl238
-rwxr-xr-xscripts/coccicheck17
-rw-r--r--scripts/coccinelle/api/alloc/zalloc-simple.cocci16
-rw-r--r--scripts/coccinelle/api/kfree_mismatch.cocci228
-rw-r--r--scripts/coccinelle/api/kfree_sensitive.cocci (renamed from scripts/coccinelle/api/kzfree.cocci)35
-rw-r--r--scripts/coccinelle/api/kobj_to_dev.cocci45
-rw-r--r--scripts/coccinelle/api/kvmalloc.cocci256
-rw-r--r--scripts/coccinelle/free/ifnullfree.cocci11
-rw-r--r--scripts/coccinelle/iterators/for_each_child.cocci358
-rw-r--r--scripts/coccinelle/misc/excluded_middle.cocci39
-rw-r--r--scripts/coccinelle/misc/flexible_array.cocci88
-rw-r--r--scripts/coccinelle/misc/uninitialized_var.cocci51
-rw-r--r--scripts/const_structs.checkpatch3
-rw-r--r--scripts/gdb/linux/proc.py15
-rw-r--r--scripts/gdb/linux/tasks.py9
-rwxr-xr-xscripts/get_maintainer.pl9
-rwxr-xr-xscripts/kernel-doc305
-rw-r--r--tools/build/Makefile.feature7
-rw-r--r--tools/build/feature/Makefile6
-rw-r--r--tools/build/feature/test-all.c15
-rw-r--r--tools/build/feature/test-libbfd-buildid.c8
-rw-r--r--tools/lib/perf/evlist.c3
-rw-r--r--tools/lib/perf/include/perf/event.h16
-rw-r--r--tools/lib/traceevent/event-parse-api.c8
-rw-r--r--tools/lib/traceevent/event-parse-local.h24
-rw-r--r--tools/lib/traceevent/event-parse.c125
-rw-r--r--tools/lib/traceevent/event-parse.h8
-rw-r--r--tools/lib/traceevent/event-plugin.c2
-rw-r--r--tools/lib/traceevent/parse-filter.c23
-rw-r--r--tools/perf/Documentation/perf-c2c.txt34
-rw-r--r--tools/perf/Documentation/perf-config.txt5
-rw-r--r--tools/perf/Documentation/perf-diff.txt4
-rw-r--r--tools/perf/Documentation/perf-ftrace.txt3
-rw-r--r--tools/perf/Documentation/perf-inject.txt6
-rw-r--r--tools/perf/Documentation/perf-intel-pt.txt53
-rw-r--r--tools/perf/Documentation/perf-list.txt1
-rw-r--r--tools/perf/Documentation/perf-record.txt56
-rw-r--r--tools/perf/Documentation/perf-stat.txt60
-rw-r--r--tools/perf/Documentation/perf.txt69
-rw-r--r--tools/perf/Documentation/topdown.txt256
-rw-r--r--tools/perf/Makefile.config25
-rw-r--r--tools/perf/Makefile.perf24
-rw-r--r--tools/perf/arch/arm64/util/Build1
-rw-r--r--tools/perf/arch/arm64/util/tsc.c21
-rw-r--r--tools/perf/arch/powerpc/util/header.c7
-rw-r--r--tools/perf/arch/x86/annotate/instructions.c1
-rw-r--r--tools/perf/arch/x86/util/Build2
-rw-r--r--tools/perf/arch/x86/util/group.c28
-rw-r--r--tools/perf/arch/x86/util/topdown.c63
-rw-r--r--tools/perf/arch/x86/util/tsc.c73
-rw-r--r--tools/perf/bench/Build1
-rw-r--r--tools/perf/bench/bench.h1
-rw-r--r--tools/perf/bench/inject-buildid.c476
-rw-r--r--tools/perf/bench/numa.c67
-rw-r--r--tools/perf/builtin-bench.c1
-rw-r--r--tools/perf/builtin-buildid-cache.c25
-rw-r--r--tools/perf/builtin-c2c.c85
-rw-r--r--tools/perf/builtin-diff.c119
-rw-r--r--tools/perf/builtin-ftrace.c84
-rw-r--r--tools/perf/builtin-inject.c203
-rw-r--r--tools/perf/builtin-list.c7
-rw-r--r--tools/perf/builtin-record.c68
-rw-r--r--tools/perf/builtin-sched.c24
-rw-r--r--tools/perf/builtin-script.c10
-rw-r--r--tools/perf/builtin-stat.c136
-rw-r--r--tools/perf/builtin-trace.c6
-rw-r--r--tools/perf/builtin-version.c1
-rwxr-xr-xtools/perf/check-headers.sh22
-rw-r--r--tools/perf/perf-sys.h22
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power8/cache.json10
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power8/frontend.json12
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power8/marked.json10
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power8/other.json16
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power8/translation.json2
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json35
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen1/branch.json5
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen1/cache.json41
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen1/data-fabric.json98
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen1/recommended.json178
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen2/cache.json23
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen2/data-fabric.json98
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen2/recommended.json178
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/cache.json28
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json153
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/frontend.json34
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/memory.json704
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/other.json1100
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json10
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json12
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/uncore-other.json21
-rw-r--r--tools/perf/pmu-events/arch/x86/mapfile.csv1
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/cache.json2342
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/floating-point.json96
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/frontend.json656
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/memory.json1973
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/other.json172
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/pipeline.json1200
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json141
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json26
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json730
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json358
-rw-r--r--tools/perf/pmu-events/jevents.c237
-rw-r--r--tools/perf/pmu-events/jevents.h23
-rw-r--r--tools/perf/pmu-events/pmu-events.h6
-rw-r--r--tools/perf/scripts/python/futex-contention.py51
-rw-r--r--tools/perf/tests/Build2
-rw-r--r--tools/perf/tests/attr.c2
-rw-r--r--tools/perf/tests/builtin-test.c8
-rw-r--r--tools/perf/tests/expand-cgroup.c241
-rw-r--r--tools/perf/tests/make2
-rw-r--r--tools/perf/tests/parse-events.c58
-rw-r--r--tools/perf/tests/parse-metric.c4
-rw-r--r--tools/perf/tests/pe-file-parsing.c98
-rw-r--r--tools/perf/tests/pe-file.c14
-rw-r--r--tools/perf/tests/pe-file.exebin0 -> 75595 bytes
-rw-r--r--tools/perf/tests/pe-file.exe.debugbin0 -> 141644 bytes
-rw-r--r--tools/perf/tests/python-use.c1
-rw-r--r--tools/perf/tests/sdt.c6
-rwxr-xr-xtools/perf/tests/shell/buildid.sh101
-rwxr-xr-xtools/perf/tests/shell/test_arm_coresight.sh183
-rw-r--r--tools/perf/tests/tests.h2
-rw-r--r--tools/perf/trace/beauty/mmap.c74
-rwxr-xr-xtools/perf/trace/beauty/mmap_flags.sh16
-rwxr-xr-xtools/perf/trace/beauty/mmap_prot.sh30
-rwxr-xr-xtools/perf/trace/beauty/mremap_flags.sh18
-rw-r--r--tools/perf/util/Build2
-rw-r--r--tools/perf/util/annotate.c5
-rw-r--r--tools/perf/util/build-id.c48
-rw-r--r--tools/perf/util/build-id.h12
-rw-r--r--tools/perf/util/callchain.c99
-rw-r--r--tools/perf/util/callchain.h9
-rw-r--r--tools/perf/util/cgroup.c115
-rw-r--r--tools/perf/util/cgroup.h3
-rw-r--r--tools/perf/util/config.c2
-rw-r--r--tools/perf/util/config.h2
-rw-r--r--tools/perf/util/dso.c23
-rw-r--r--tools/perf/util/dso.h7
-rw-r--r--tools/perf/util/dsos.c9
-rw-r--r--tools/perf/util/event.c2
-rw-r--r--tools/perf/util/evlist.c123
-rw-r--r--tools/perf/util/evlist.h8
-rw-r--r--tools/perf/util/evsel.c109
-rw-r--r--tools/perf/util/evsel.h93
-rw-r--r--tools/perf/util/group.h8
-rw-r--r--tools/perf/util/header.c15
-rw-r--r--tools/perf/util/intel-pt.c8
-rw-r--r--tools/perf/util/jitdump.c14
-rw-r--r--tools/perf/util/machine.c12
-rw-r--r--tools/perf/util/machine.h4
-rw-r--r--tools/perf/util/map.c21
-rw-r--r--tools/perf/util/map.h14
-rw-r--r--tools/perf/util/metricgroup.c177
-rw-r--r--tools/perf/util/metricgroup.h9
-rw-r--r--tools/perf/util/parse-events.c23
-rw-r--r--tools/perf/util/parse-events.h2
-rw-r--r--tools/perf/util/parse-events.l2
-rw-r--r--tools/perf/util/parse-events.y8
-rw-r--r--tools/perf/util/print_binary.c2
-rw-r--r--tools/perf/util/probe-event.c69
-rw-r--r--tools/perf/util/probe-finder.c63
-rw-r--r--tools/perf/util/probe-finder.h7
-rw-r--r--tools/perf/util/python.c21
-rw-r--r--tools/perf/util/record.c3
-rw-r--r--tools/perf/util/record.h1
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c2
-rw-r--r--tools/perf/util/stat-display.c1
-rw-r--r--tools/perf/util/stat-shadow.c89
-rw-r--r--tools/perf/util/stat.c4
-rw-r--r--tools/perf/util/stat.h10
-rw-r--r--tools/perf/util/stream.c342
-rw-r--r--tools/perf/util/stream.h41
-rw-r--r--tools/perf/util/symbol-elf.c107
-rw-r--r--tools/perf/util/symbol-minimal.c31
-rw-r--r--tools/perf/util/symbol.c155
-rw-r--r--tools/perf/util/symbol.h9
-rw-r--r--tools/perf/util/synthetic-events.c10
-rw-r--r--tools/perf/util/topdown.c58
-rw-r--r--tools/perf/util/topdown.h12
-rw-r--r--tools/perf/util/tsc.c81
-rw-r--r--tools/perf/util/tsc.h5
-rw-r--r--tools/perf/util/util.h6
-rw-r--r--tools/testing/kunit/kunit_parser.py76
-rw-r--r--tools/testing/kunit/test_data/test_is_test_passed-all_passed.log1
-rw-r--r--tools/testing/kunit/test_data/test_is_test_passed-crash.log1
-rw-r--r--tools/testing/kunit/test_data/test_is_test_passed-failure.log1
-rw-r--r--tools/testing/radix-tree/idr-test.c29
-rw-r--r--tools/testing/radix-tree/linux/kernel.h1
-rw-r--r--tools/testing/radix-tree/linux/local_lock.h8
-rw-r--r--tools/testing/radix-tree/test.h4
-rw-r--r--tools/testing/scatterlist/Makefile3
-rw-r--r--tools/testing/scatterlist/linux/mm.h35
-rw-r--r--tools/testing/scatterlist/main.c53
-rw-r--r--tools/testing/selftests/exec/.gitignore1
-rw-r--r--tools/testing/selftests/exec/Makefile9
-rw-r--r--tools/testing/selftests/exec/load_address.c68
-rw-r--r--tools/testing/selftests/powerpc/alignment/alignment_handler.c12
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/context_switch.c6
-rw-r--r--tools/testing/selftests/powerpc/dscr/Makefile2
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_default_test.c2
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c2
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c2
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c2
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c2
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c2
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_user_test.c2
-rwxr-xr-xtools/testing/selftests/powerpc/eeh/eeh-basic.sh9
-rw-r--r--tools/testing/selftests/powerpc/include/utils.h2
-rw-r--r--tools/testing/selftests/powerpc/mm/bad_accesses.c1
-rw-r--r--tools/testing/selftests/powerpc/pmu/count_stcx_fail.c1
-rw-r--r--tools/testing/selftests/powerpc/pmu/l3_bank_test.c3
-rw-r--r--tools/testing/selftests/powerpc/pmu/per_event_excludes.c2
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c48
-rw-r--r--tools/testing/selftests/powerpc/security/rfi_flush.c38
-rw-r--r--tools/testing/selftests/powerpc/security/spectre_v2.c3
-rw-r--r--tools/testing/selftests/powerpc/stringloops/memcmp.c2
-rw-r--r--tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S23
-rw-r--r--tools/testing/selftests/powerpc/syscalls/Makefile2
-rw-r--r--tools/testing/selftests/powerpc/syscalls/rtas_filter.c285
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-poison.c11
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-tmspr.c10
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-trap.c10
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-unavailable.c9
-rw-r--r--tools/testing/selftests/powerpc/tm/tm.h3
-rw-r--r--tools/testing/selftests/powerpc/utils.c39
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale-ftrace.sh (renamed from tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh)6
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh (renamed from tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh)14
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh38
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh33
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh36
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-console.sh11
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE051
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/CFLIST (renamed from tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST)0
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/TINY (renamed from tools/testing/selftests/rcutorture/configs/rcuperf/TINY)0
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/TREE (renamed from tools/testing/selftests/rcutorture/configs/rcuperf/TREE)0
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/TREE54 (renamed from tools/testing/selftests/rcutorture/configs/rcuperf/TREE54)0
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh (renamed from tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh)4
-rw-r--r--tools/testing/selftests/rcutorture/configs/scf/CFLIST2
-rw-r--r--tools/testing/selftests/rcutorture/configs/scf/CFcommon2
-rw-r--r--tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT9
-rw-r--r--tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/scf/PREEMPT9
-rw-r--r--tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh30
-rw-r--r--tools/testing/selftests/rcutorture/doc/initrd.txt36
-rw-r--r--tools/testing/selftests/rcutorture/doc/rcu-test-image.txt41
-rw-r--r--tools/testing/selftests/vm/hmm-tests.c2
2024 files changed, 50881 insertions, 31710 deletions
diff --git a/.clang-format b/.clang-format
index 6cbd6ee51610..10dc5a9a61b3 100644
--- a/.clang-format
+++ b/.clang-format
@@ -429,6 +429,7 @@ ForEachMacros:
- 'rbtree_postorder_for_each_entry_safe'
- 'rdma_for_each_block'
- 'rdma_for_each_port'
+ - 'rdma_umem_for_each_dma_block'
- 'resource_list_for_each_entry'
- 'resource_list_for_each_entry_safe'
- 'rhl_for_each_entry_rcu'
diff --git a/.mailmap b/.mailmap
index e4ccac4e2f88..1e14566a3d56 100644
--- a/.mailmap
+++ b/.mailmap
@@ -133,6 +133,7 @@ James Ketrenos <jketreno@io.(none)>
Jan Glauber <jan.glauber@gmail.com> <jang@de.ibm.com>
Jan Glauber <jan.glauber@gmail.com> <jang@linux.vnet.ibm.com>
Jan Glauber <jan.glauber@gmail.com> <jglauber@cavium.com>
+Jarkko Sakkinen <jarkko@kernel.org> <jarkko.sakkinen@linux.intel.com>
Jason Gunthorpe <jgg@ziepe.ca> <jgg@mellanox.com>
Jason Gunthorpe <jgg@ziepe.ca> <jgg@nvidia.com>
Jason Gunthorpe <jgg@ziepe.ca> <jgunthorpe@obsidianresearch.com>
diff --git a/Documentation/ABI/stable/sysfs-class-infiniband b/Documentation/ABI/stable/sysfs-class-infiniband
index 96dfe1926b76..87b11f91b425 100644
--- a/Documentation/ABI/stable/sysfs-class-infiniband
+++ b/Documentation/ABI/stable/sysfs-class-infiniband
@@ -258,23 +258,6 @@ Description:
userspace ABI compatibility of umad & issm devices.
-What: /sys/class/infiniband_cm/ucmN/ibdev
-Date: Oct, 2005
-KernelVersion: v2.6.14
-Contact: linux-rdma@vger.kernel.org
-Description:
- (RO) Display Infiniband (IB) device name
-
-
-What: /sys/class/infiniband_cm/abi_version
-Date: Oct, 2005
-KernelVersion: v2.6.14
-Contact: linux-rdma@vger.kernel.org
-Description:
- (RO) Value is incremented if any changes are made that break
- userspace ABI compatibility of ucm devices.
-
-
What: /sys/class/infiniband_verbs/uverbsN/ibdev
What: /sys/class/infiniband_verbs/uverbsN/abi_version
Date: Sept, 2005
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7 b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7
index e82fc37be802..2273627df190 100644
--- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7
@@ -1,3 +1,28 @@
+What: /sys/bus/event_source/devices/hv_24x7/format
+Date: September 2020
+Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description: Read-only. Attribute group to describe the magic bits
+ that go into perf_event_attr.config for a particular pmu.
+ (See ABI/testing/sysfs-bus-event_source-devices-format).
+
+ Each attribute under this group defines a bit range of the
+ perf_event_attr.config. All supported attributes are listed
+ below.
+
+ chip = "config:16-31"
+ core = "config:16-31"
+ domain = "config:0-3"
+ lpar = "config:0-15"
+ offset = "config:32-63"
+ vcpu = "config:16-31"
+
+ For example,
+
+ PM_PB_CYC = "domain=1,offset=0x80,chip=?,lpar=0x0"
+
+ In this event, '?' after chip specifies that
+ this value will be provided by user while running this event.
+
What: /sys/bus/event_source/devices/hv_24x7/interface/catalog
Date: February 2014
Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci
index 3ca4e554d2f9..6a023b42486c 100644
--- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci
@@ -1,3 +1,34 @@
+What: /sys/bus/event_source/devices/hv_gpci/format
+Date: September 2020
+Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description: Read-only. Attribute group to describe the magic bits
+ that go into perf_event_attr.config for a particular pmu.
+ (See ABI/testing/sysfs-bus-event_source-devices-format).
+
+ Each attribute under this group defines a bit range of the
+ perf_event_attr.config. All supported attributes are listed
+ below.
+
+ counter_info_version = "config:16-23"
+ length = "config:24-31"
+ partition_id = "config:32-63"
+ request = "config:0-31"
+ sibling_part_id = "config:32-63"
+ hw_chip_id = "config:32-63"
+ offset = "config:32-63"
+ phys_processor_idx = "config:32-63"
+ secondary_index = "config:0-15"
+ starting_index = "config:32-63"
+
+ For example,
+
+ processor_core_utilization_instructions_completed = "request=0x94,
+ phys_processor_idx=?,counter_info_version=0x8,
+ length=8,offset=0x18"
+
+ In this event, '?' after phys_processor_idx specifies this value
+ this value will be provided by user while running this event.
+
What: /sys/bus/event_source/devices/hv_gpci/interface/collect_privileged
Date: February 2014
Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
@@ -41,3 +72,10 @@ Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
Description:
A number indicating the latest version of the gpci interface
that the kernel is aware of.
+
+What: /sys/devices/hv_gpci/cpumask
+Date: October 2020
+Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description: read only
+ This sysfs file exposes the cpumask which is designated to make
+ HCALLs to retrieve hv-gpci pmu event counter data.
diff --git a/Documentation/ABI/testing/sysfs-class-power b/Documentation/ABI/testing/sysfs-class-power
index 40213c73bc9c..dbccb2fcd7ce 100644
--- a/Documentation/ABI/testing/sysfs-class-power
+++ b/Documentation/ABI/testing/sysfs-class-power
@@ -34,7 +34,7 @@ Description:
Describes the main type of the supply.
Access: Read
- Valid values: "Battery", "UPS", "Mains", "USB"
+ Valid values: "Battery", "UPS", "Mains", "USB", "Wireless"
===== Battery Properties =====
@@ -108,7 +108,8 @@ Description:
which they average readings to smooth out the reported value.
Access: Read
- Valid values: Represented in microamps
+ Valid values: Represented in microamps. Negative values are used
+ for discharging batteries, positive values for charging batteries.
What: /sys/class/power_supply/<supply_name>/current_max
Date: October 2010
@@ -127,7 +128,8 @@ Description:
This value is not averaged/smoothed.
Access: Read
- Valid values: Represented in microamps
+ Valid values: Represented in microamps. Negative values are used
+ for discharging batteries, positive values for charging batteries.
What: /sys/class/power_supply/<supply_name>/charge_control_limit
Date: Oct 2012
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 7f730c4c8df2..834d0becae6d 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -22,7 +22,8 @@ Contact: "Namjae Jeon" <namjae.jeon@samsung.com>
Description: Controls the victim selection policy for garbage collection.
Setting gc_idle = 0(default) will disable this option. Setting
gc_idle = 1 will select the Cost Benefit approach & setting
- gc_idle = 2 will select the greedy approach.
+ gc_idle = 2 will select the greedy approach & setting
+ gc_idle = 3 will select the age-threshold based approach.
What: /sys/fs/f2fs/<disk>/reclaim_segments
Date: October 2013
diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.rst b/Documentation/RCU/Design/Data-Structures/Data-Structures.rst
index 4a48e20a46f2..f4efd6897b09 100644
--- a/Documentation/RCU/Design/Data-Structures/Data-Structures.rst
+++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.rst
@@ -963,7 +963,7 @@ exit and perhaps also vice versa. Therefore, whenever the
``->dynticks_nesting`` field is incremented up from zero, the
``->dynticks_nmi_nesting`` field is set to a large positive number, and
whenever the ``->dynticks_nesting`` field is decremented down to zero,
-the the ``->dynticks_nmi_nesting`` field is set to zero. Assuming that
+the ``->dynticks_nmi_nesting`` field is set to zero. Assuming that
the number of misnested interrupts is not sufficient to overflow the
counter, this approach corrects the ``->dynticks_nmi_nesting`` field
every time the corresponding CPU enters the idle loop from process
diff --git a/Documentation/RCU/Design/Requirements/Requirements.rst b/Documentation/RCU/Design/Requirements/Requirements.rst
index 8f41ad0aa753..1ae79a10a8de 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.rst
+++ b/Documentation/RCU/Design/Requirements/Requirements.rst
@@ -2162,7 +2162,7 @@ scheduling-clock interrupt be enabled when RCU needs it to be:
this sort of thing.
#. If a CPU is in a portion of the kernel that is absolutely positively
no-joking guaranteed to never execute any RCU read-side critical
- sections, and RCU believes this CPU to to be idle, no problem. This
+ sections, and RCU believes this CPU to be idle, no problem. This
sort of thing is used by some architectures for light-weight
exception handlers, which can then avoid the overhead of
``rcu_irq_enter()`` and ``rcu_irq_exit()`` at exception entry and
@@ -2431,7 +2431,7 @@ However, there are legitimate preemptible-RCU implementations that do
not have this property, given that any point in the code outside of an
RCU read-side critical section can be a quiescent state. Therefore,
*RCU-sched* was created, which follows “classic” RCU in that an
-RCU-sched grace period waits for for pre-existing interrupt and NMI
+RCU-sched grace period waits for pre-existing interrupt and NMI
handlers. In kernels built with ``CONFIG_PREEMPT=n``, the RCU and
RCU-sched APIs have identical implementations, while kernels built with
``CONFIG_PREEMPT=y`` provide a separate implementation for each.
diff --git a/Documentation/RCU/whatisRCU.rst b/Documentation/RCU/whatisRCU.rst
index c7f147b8034f..fb3ff76c3e73 100644
--- a/Documentation/RCU/whatisRCU.rst
+++ b/Documentation/RCU/whatisRCU.rst
@@ -360,7 +360,7 @@ order to amortize their overhead over many uses of the corresponding APIs.
There are at least three flavors of RCU usage in the Linux kernel. The diagram
above shows the most common one. On the updater side, the rcu_assign_pointer(),
-sychronize_rcu() and call_rcu() primitives used are the same for all three
+synchronize_rcu() and call_rcu() primitives used are the same for all three
flavors. However for protection (on the reader side), the primitives used vary
depending on the flavor:
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 0f1fb7e86237..02d4adbf98d2 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1343,6 +1343,7 @@
current integrity status.
failslab=
+ fail_usercopy=
fail_page_alloc=
fail_make_request=[KNL]
General fault injection mechanism.
@@ -3094,6 +3095,10 @@
and gids from such clients. This is intended to ease
migration from NFSv2/v3.
+ nmi_backtrace.backtrace_idle [KNL]
+ Dump stacks even of idle CPUs in response to an
+ NMI stack-backtrace request.
+
nmi_debug= [KNL,SH] Specify one or more actions to take
when a NMI is triggered.
Format: [state][,regs][,debounce][,die]
@@ -4173,46 +4178,55 @@
This wake_up() will be accompanied by a
WARN_ONCE() splat and an ftrace_dump().
+ rcutree.rcu_unlock_delay= [KNL]
+ In CONFIG_RCU_STRICT_GRACE_PERIOD=y kernels,
+ this specifies an rcu_read_unlock()-time delay
+ in microseconds. This defaults to zero.
+ Larger delays increase the probability of
+ catching RCU pointer leaks, that is, buggy use
+ of RCU-protected pointers after the relevant
+ rcu_read_unlock() has completed.
+
rcutree.sysrq_rcu= [KNL]
Commandeer a sysrq key to dump out Tree RCU's
rcu_node tree with an eye towards determining
why a new grace period has not yet started.
- rcuperf.gp_async= [KNL]
+ rcuscale.gp_async= [KNL]
Measure performance of asynchronous
grace-period primitives such as call_rcu().
- rcuperf.gp_async_max= [KNL]
+ rcuscale.gp_async_max= [KNL]
Specify the maximum number of outstanding
callbacks per writer thread. When a writer
thread exceeds this limit, it invokes the
corresponding flavor of rcu_barrier() to allow
previously posted callbacks to drain.
- rcuperf.gp_exp= [KNL]
+ rcuscale.gp_exp= [KNL]
Measure performance of expedited synchronous
grace-period primitives.
- rcuperf.holdoff= [KNL]
+ rcuscale.holdoff= [KNL]
Set test-start holdoff period. The purpose of
this parameter is to delay the start of the
test until boot completes in order to avoid
interference.
- rcuperf.kfree_rcu_test= [KNL]
+ rcuscale.kfree_rcu_test= [KNL]
Set to measure performance of kfree_rcu() flooding.
- rcuperf.kfree_nthreads= [KNL]
+ rcuscale.kfree_nthreads= [KNL]
The number of threads running loops of kfree_rcu().
- rcuperf.kfree_alloc_num= [KNL]
+ rcuscale.kfree_alloc_num= [KNL]
Number of allocations and frees done in an iteration.
- rcuperf.kfree_loops= [KNL]
- Number of loops doing rcuperf.kfree_alloc_num number
+ rcuscale.kfree_loops= [KNL]
+ Number of loops doing rcuscale.kfree_alloc_num number
of allocations and frees.
- rcuperf.nreaders= [KNL]
+ rcuscale.nreaders= [KNL]
Set number of RCU readers. The value -1 selects
N, where N is the number of CPUs. A value
"n" less than -1 selects N-n+1, where N is again
@@ -4221,23 +4235,23 @@
A value of "n" less than or equal to -N selects
a single reader.
- rcuperf.nwriters= [KNL]
+ rcuscale.nwriters= [KNL]
Set number of RCU writers. The values operate
- the same as for rcuperf.nreaders.
+ the same as for rcuscale.nreaders.
N, where N is the number of CPUs
- rcuperf.perf_type= [KNL]
+ rcuscale.perf_type= [KNL]
Specify the RCU implementation to test.
- rcuperf.shutdown= [KNL]
+ rcuscale.shutdown= [KNL]
Shut the system down after performance tests
complete. This is useful for hands-off automated
testing.
- rcuperf.verbose= [KNL]
+ rcuscale.verbose= [KNL]
Enable additional printk() statements.
- rcuperf.writer_holdoff= [KNL]
+ rcuscale.writer_holdoff= [KNL]
Write-side holdoff between grace periods,
in microseconds. The default of zero says
no holdoff.
@@ -4290,6 +4304,18 @@
are zero, rcutorture acts as if is interpreted
they are all non-zero.
+ rcutorture.irqreader= [KNL]
+ Run RCU readers from irq handlers, or, more
+ accurately, from a timer handler. Not all RCU
+ flavors take kindly to this sort of thing.
+
+ rcutorture.leakpointer= [KNL]
+ Leak an RCU-protected pointer out of the reader.
+ This can of course result in splats, and is
+ intended to test the ability of things like
+ CONFIG_RCU_STRICT_GRACE_PERIOD=y to detect
+ such leaks.
+
rcutorture.n_barrier_cbs= [KNL]
Set callbacks/threads for rcu_barrier() testing.
@@ -4511,8 +4537,8 @@
refscale.shutdown= [KNL]
Shut down the system at the end of the performance
test. This defaults to 1 (shut it down) when
- rcuperf is built into the kernel and to 0 (leave
- it running) when rcuperf is built as a module.
+ refscale is built into the kernel and to 0 (leave
+ it running) when refscale is built as a module.
refscale.verbose= [KNL]
Enable additional printk() statements.
@@ -4658,6 +4684,98 @@
Format: integer between 0 and 10
Default is 0.
+ scftorture.holdoff= [KNL]
+ Number of seconds to hold off before starting
+ test. Defaults to zero for module insertion and
+ to 10 seconds for built-in smp_call_function()
+ tests.
+
+ scftorture.longwait= [KNL]
+ Request ridiculously long waits randomly selected
+ up to the chosen limit in seconds. Zero (the
+ default) disables this feature. Please note
+ that requesting even small non-zero numbers of
+ seconds can result in RCU CPU stall warnings,
+ softlockup complaints, and so on.
+
+ scftorture.nthreads= [KNL]
+ Number of kthreads to spawn to invoke the
+ smp_call_function() family of functions.
+ The default of -1 specifies a number of kthreads
+ equal to the number of CPUs.
+
+ scftorture.onoff_holdoff= [KNL]
+ Number seconds to wait after the start of the
+ test before initiating CPU-hotplug operations.
+
+ scftorture.onoff_interval= [KNL]
+ Number seconds to wait between successive
+ CPU-hotplug operations. Specifying zero (which
+ is the default) disables CPU-hotplug operations.
+
+ scftorture.shutdown_secs= [KNL]
+ The number of seconds following the start of the
+ test after which to shut down the system. The
+ default of zero avoids shutting down the system.
+ Non-zero values are useful for automated tests.
+
+ scftorture.stat_interval= [KNL]
+ The number of seconds between outputting the
+ current test statistics to the console. A value
+ of zero disables statistics output.
+
+ scftorture.stutter_cpus= [KNL]
+ The number of jiffies to wait between each change
+ to the set of CPUs under test.
+
+ scftorture.use_cpus_read_lock= [KNL]
+ Use use_cpus_read_lock() instead of the default
+ preempt_disable() to disable CPU hotplug
+ while invoking one of the smp_call_function*()
+ functions.
+
+ scftorture.verbose= [KNL]
+ Enable additional printk() statements.
+
+ scftorture.weight_single= [KNL]
+ The probability weighting to use for the
+ smp_call_function_single() function with a zero
+ "wait" parameter. A value of -1 selects the
+ default if all other weights are -1. However,
+ if at least one weight has some other value, a
+ value of -1 will instead select a weight of zero.
+
+ scftorture.weight_single_wait= [KNL]
+ The probability weighting to use for the
+ smp_call_function_single() function with a
+ non-zero "wait" parameter. See weight_single.
+
+ scftorture.weight_many= [KNL]
+ The probability weighting to use for the
+ smp_call_function_many() function with a zero
+ "wait" parameter. See weight_single.
+ Note well that setting a high probability for
+ this weighting can place serious IPI load
+ on the system.
+
+ scftorture.weight_many_wait= [KNL]
+ The probability weighting to use for the
+ smp_call_function_many() function with a
+ non-zero "wait" parameter. See weight_single
+ and weight_many.
+
+ scftorture.weight_all= [KNL]
+ The probability weighting to use for the
+ smp_call_function_all() function with a zero
+ "wait" parameter. See weight_single and
+ weight_many.
+
+ scftorture.weight_all_wait= [KNL]
+ The probability weighting to use for the
+ smp_call_function_all() function with a
+ non-zero "wait" parameter. See weight_single
+ and weight_many.
+
skew_tick= [KNL] Offset the periodic timer tick per cpu to mitigate
xtime_lock contention on larger systems, and/or RCU lock
contention on all systems with CONFIG_MAXSMP set.
@@ -5852,6 +5970,14 @@
improve timer resolution at the expense of processing
more timer interrupts.
+ xen.event_eoi_delay= [XEN]
+ How long to delay EOI handling in case of event
+ storms (jiffies). Default is 10.
+
+ xen.event_loop_timeout= [XEN]
+ After which time (jiffies) the event handling loop
+ should start to delay EOI handling. Default is 2.
+
nopv= [X86,XEN,KVM,HYPER_V,VMWARE]
Disables the PV optimizations forcing the guest to run
as generic guest with no PV drivers. Currently support
diff --git a/Documentation/admin-guide/pm/cpufreq.rst b/Documentation/admin-guide/pm/cpufreq.rst
index 368e612145d2..6adb7988e0eb 100644
--- a/Documentation/admin-guide/pm/cpufreq.rst
+++ b/Documentation/admin-guide/pm/cpufreq.rst
@@ -1,7 +1,6 @@
.. SPDX-License-Identifier: GPL-2.0
.. include:: <isonum.txt>
-.. |struct cpufreq_policy| replace:: :c:type:`struct cpufreq_policy <cpufreq_policy>`
.. |intel_pstate| replace:: :doc:`intel_pstate <intel_pstate>`
=======================
@@ -92,16 +91,16 @@ control the P-state of multiple CPUs at the same time and writing to it affects
all of those CPUs simultaneously.
Sets of CPUs sharing hardware P-state control interfaces are represented by
-``CPUFreq`` as |struct cpufreq_policy| objects. For consistency,
-|struct cpufreq_policy| is also used when there is only one CPU in the given
+``CPUFreq`` as struct cpufreq_policy objects. For consistency,
+struct cpufreq_policy is also used when there is only one CPU in the given
set.
-The ``CPUFreq`` core maintains a pointer to a |struct cpufreq_policy| object for
+The ``CPUFreq`` core maintains a pointer to a struct cpufreq_policy object for
every CPU in the system, including CPUs that are currently offline. If multiple
CPUs share the same hardware P-state control interface, all of the pointers
-corresponding to them point to the same |struct cpufreq_policy| object.
+corresponding to them point to the same struct cpufreq_policy object.
-``CPUFreq`` uses |struct cpufreq_policy| as its basic data type and the design
+``CPUFreq`` uses struct cpufreq_policy as its basic data type and the design
of its user space interface is based on the policy concept.
diff --git a/Documentation/admin-guide/pstore-blk.rst b/Documentation/admin-guide/pstore-blk.rst
index 296d5027787a..6898aba9fb5c 100644
--- a/Documentation/admin-guide/pstore-blk.rst
+++ b/Documentation/admin-guide/pstore-blk.rst
@@ -154,17 +154,11 @@ Configurations for driver
Only a block device driver cares about these configurations. A block device
driver uses ``register_pstore_blk`` to register to pstore/blk.
-.. kernel-doc:: fs/pstore/blk.c
- :identifiers: register_pstore_blk
-
A non-block device driver uses ``register_pstore_device`` with
``struct pstore_device_info`` to register to pstore/blk.
.. kernel-doc:: fs/pstore/blk.c
- :identifiers: register_pstore_device
-
-.. kernel-doc:: include/linux/pstore_blk.h
- :identifiers: pstore_device_info
+ :export:
Compression and header
----------------------
@@ -237,7 +231,7 @@ For developer reference, here are all the important structures and APIs:
:internal:
.. kernel-doc:: fs/pstore/blk.c
- :export:
+ :internal:
.. kernel-doc:: include/linux/pstore_blk.h
:internal:
diff --git a/Documentation/admin-guide/xfs.rst b/Documentation/admin-guide/xfs.rst
index f461d6c33534..86de8a1ad91c 100644
--- a/Documentation/admin-guide/xfs.rst
+++ b/Documentation/admin-guide/xfs.rst
@@ -210,6 +210,28 @@ When mounting an XFS filesystem, the following options are accepted.
inconsistent namespace presentation during or after a
failover event.
+Deprecation of V4 Format
+========================
+
+The V4 filesystem format lacks certain features that are supported by
+the V5 format, such as metadata checksumming, strengthened metadata
+verification, and the ability to store timestamps past the year 2038.
+Because of this, the V4 format is deprecated. All users should upgrade
+by backing up their files, reformatting, and restoring from the backup.
+
+Administrators and users can detect a V4 filesystem by running xfs_info
+against a filesystem mountpoint and checking for a string containing
+"crc=". If no such string is found, please upgrade xfsprogs to the
+latest version and try again.
+
+The deprecation will take place in two parts. Support for mounting V4
+filesystems can now be disabled at kernel build time via Kconfig option.
+The option will default to yes until September 2025, at which time it
+will be changed to default to no. In September 2030, support will be
+removed from the codebase entirely.
+
+Note: Distributors may choose to withdraw V4 format support earlier than
+the dates listed above.
Deprecated Mount Options
========================
@@ -217,6 +239,9 @@ Deprecated Mount Options
=========================== ================
Name Removal Schedule
=========================== ================
+Mounting with V4 filesystem September 2030
+ikeep/noikeep September 2025
+attr2/noattr2 September 2025
=========================== ================
@@ -331,7 +356,12 @@ The following sysctls are available for the XFS filesystem:
Deprecated Sysctls
==================
-None at present.
+=========================== ================
+ Name Removal Schedule
+=========================== ================
+fs.xfs.irix_sgid_inherit September 2025
+fs.xfs.irix_symlink_mode September 2025
+=========================== ================
Removed Sysctls
diff --git a/Documentation/block/blk-mq.rst b/Documentation/block/blk-mq.rst
index 88c56afcb070..a980d23af48c 100644
--- a/Documentation/block/blk-mq.rst
+++ b/Documentation/block/blk-mq.rst
@@ -63,10 +63,10 @@ Software staging queues
~~~~~~~~~~~~~~~~~~~~~~~
The block IO subsystem adds requests in the software staging queues
-(represented by struct :c:type:`blk_mq_ctx`) in case that they weren't sent
+(represented by struct blk_mq_ctx) in case that they weren't sent
directly to the driver. A request is one or more BIOs. They arrived at the
-block layer through the data structure struct :c:type:`bio`. The block layer
-will then build a new structure from it, the struct :c:type:`request` that will
+block layer through the data structure struct bio. The block layer
+will then build a new structure from it, the struct request that will
be used to communicate with the device driver. Each queue has its own lock and
the number of queues is defined by a per-CPU or per-node basis.
@@ -102,7 +102,7 @@ hardware queue will be drained in sequence according to their mapping.
Hardware dispatch queues
~~~~~~~~~~~~~~~~~~~~~~~~
-The hardware queue (represented by struct :c:type:`blk_mq_hw_ctx`) is a struct
+The hardware queue (represented by struct blk_mq_hw_ctx) is a struct
used by device drivers to map the device submission queues (or device DMA ring
buffer), and are the last step of the block layer submission code before the
low level device driver taking ownership of the request. To run this queue, the
@@ -110,9 +110,9 @@ block layer removes requests from the associated software queues and tries to
dispatch to the hardware.
If it's not possible to send the requests directly to hardware, they will be
-added to a linked list (:c:type:`hctx->dispatch`) of requests. Then,
+added to a linked list (``hctx->dispatch``) of requests. Then,
next time the block layer runs a queue, it will send the requests laying at the
-:c:type:`dispatch` list first, to ensure a fairness dispatch with those
+``dispatch`` list first, to ensure a fairness dispatch with those
requests that were ready to be sent first. The number of hardware queues
depends on the number of hardware contexts supported by the hardware and its
device driver, but it will not be more than the number of cores of the system.
diff --git a/Documentation/block/inline-encryption.rst b/Documentation/block/inline-encryption.rst
index 354817b80887..e75151e467d3 100644
--- a/Documentation/block/inline-encryption.rst
+++ b/Documentation/block/inline-encryption.rst
@@ -52,7 +52,7 @@ Constraints and notes
Design
======
-We add a :c:type:`struct bio_crypt_ctx` to :c:type:`struct bio` that can
+We add a struct bio_crypt_ctx to struct bio that can
represent an encryption context, because we need to be able to pass this
encryption context from the upper layers (like the fs layer) to the
device driver to act upon.
@@ -85,7 +85,7 @@ blk-mq changes, other block layer changes and blk-crypto-fallback
=================================================================
We add a pointer to a ``bi_crypt_context`` and ``keyslot`` to
-:c:type:`struct request`. These will be referred to as the ``crypto fields``
+struct request. These will be referred to as the ``crypto fields``
for the request. This ``keyslot`` is the keyslot into which the
``bi_crypt_context`` has been programmed in the KSM of the ``request_queue``
that this request is being sent to.
@@ -118,7 +118,7 @@ of the algorithm being used adheres to spec and functions correctly).
If a ``request queue``'s inline encryption hardware claimed to support the
encryption context specified with a bio, then it will not be handled by the
``blk-crypto-fallback``. We will eventually reach a point in blk-mq when a
-:c:type:`struct request` needs to be allocated for that bio. At that point,
+struct request needs to be allocated for that bio. At that point,
blk-mq tries to program the encryption context into the ``request_queue``'s
keyslot_manager, and obtain a keyslot, which it stores in its newly added
``keyslot`` field. This keyslot is released when the request is completed.
@@ -188,7 +188,7 @@ keyslots supported by the hardware.
The device driver also needs to tell the KSM how to actually manipulate the
IE hardware in the device to do things like programming the crypto key into
the IE hardware into a particular keyslot. All this is achieved through the
-:c:type:`struct blk_ksm_ll_ops` field in the KSM that the device driver
+struct blk_ksm_ll_ops field in the KSM that the device driver
must fill up after initing the ``blk_keyslot_manager``.
The KSM also handles runtime power management for the device when applicable
diff --git a/Documentation/conf.py b/Documentation/conf.py
index 0a102d57437d..376dd0ddf39c 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -47,9 +47,68 @@ extensions = ['kerneldoc', 'rstFlatTable', 'kernel_include',
#
if major >= 3:
sys.stderr.write('''WARNING: The kernel documentation build process
- does not work correctly with Sphinx v3.0 and above. Expect errors
- in the generated output.
- ''')
+ support for Sphinx v3.0 and above is brand new. Be prepared for
+ possible issues in the generated output.
+ ''')
+ if minor > 0 or patch >= 2:
+ # Sphinx c function parser is more pedantic with regards to type
+ # checking. Due to that, having macros at c:function cause problems.
+ # Those needed to be scaped by using c_id_attributes[] array
+ c_id_attributes = [
+ # GCC Compiler types not parsed by Sphinx:
+ "__restrict__",
+
+ # include/linux/compiler_types.h:
+ "__iomem",
+ "__kernel",
+ "noinstr",
+ "notrace",
+ "__percpu",
+ "__rcu",
+ "__user",
+
+ # include/linux/compiler_attributes.h:
+ "__alias",
+ "__aligned",
+ "__aligned_largest",
+ "__always_inline",
+ "__assume_aligned",
+ "__cold",
+ "__attribute_const__",
+ "__copy",
+ "__pure",
+ "__designated_init",
+ "__visible",
+ "__printf",
+ "__scanf",
+ "__gnu_inline",
+ "__malloc",
+ "__mode",
+ "__no_caller_saved_registers",
+ "__noclone",
+ "__nonstring",
+ "__noreturn",
+ "__packed",
+ "__pure",
+ "__section",
+ "__always_unused",
+ "__maybe_unused",
+ "__used",
+ "__weak",
+ "noinline",
+
+ # include/linux/memblock.h:
+ "__init_memblock",
+ "__meminit",
+
+ # include/linux/init.h:
+ "__init",
+ "__ref",
+
+ # include/linux/linkage.h:
+ "asmlinkage",
+ ]
+
else:
extensions.append('cdomain')
diff --git a/Documentation/core-api/genericirq.rst b/Documentation/core-api/genericirq.rst
index 8f06d885c310..f959c9b53f61 100644
--- a/Documentation/core-api/genericirq.rst
+++ b/Documentation/core-api/genericirq.rst
@@ -419,6 +419,7 @@ functions which are exported.
.. kernel-doc:: kernel/irq/manage.c
.. kernel-doc:: kernel/irq/chip.c
+ :export:
Internal Functions Provided
===========================
@@ -431,6 +432,7 @@ functions.
.. kernel-doc:: kernel/irq/handle.c
.. kernel-doc:: kernel/irq/chip.c
+ :internal:
Credits
=======
diff --git a/Documentation/core-api/kernel-api.rst b/Documentation/core-api/kernel-api.rst
index 4ac53a1363f6..741aa37dc181 100644
--- a/Documentation/core-api/kernel-api.rst
+++ b/Documentation/core-api/kernel-api.rst
@@ -231,12 +231,6 @@ Refer to the file kernel/module.c for more information.
Hardware Interfaces
===================
-Interrupt Handling
-------------------
-
-.. kernel-doc:: kernel/irq/manage.c
- :export:
-
DMA Channels
------------
diff --git a/Documentation/core-api/workqueue.rst b/Documentation/core-api/workqueue.rst
index 00a5ba51e63f..541d31de8926 100644
--- a/Documentation/core-api/workqueue.rst
+++ b/Documentation/core-api/workqueue.rst
@@ -396,3 +396,5 @@ Kernel Inline Documentations Reference
======================================
.. kernel-doc:: include/linux/workqueue.h
+
+.. kernel-doc:: kernel/workqueue.c
diff --git a/Documentation/core-api/xarray.rst b/Documentation/core-api/xarray.rst
index 640934b6f7b4..a137a0e6d068 100644
--- a/Documentation/core-api/xarray.rst
+++ b/Documentation/core-api/xarray.rst
@@ -475,13 +475,15 @@ or iterations will move the index to the first index in the range.
Each entry will only be returned once, no matter how many indices it
occupies.
-Using xas_next() or xas_prev() with a multi-index xa_state
-is not supported. Using either of these functions on a multi-index entry
-will reveal sibling entries; these should be skipped over by the caller.
-
-Storing ``NULL`` into any index of a multi-index entry will set the entry
-at every index to ``NULL`` and dissolve the tie. Splitting a multi-index
-entry into entries occupying smaller ranges is not yet supported.
+Using xas_next() or xas_prev() with a multi-index xa_state is not
+supported. Using either of these functions on a multi-index entry will
+reveal sibling entries; these should be skipped over by the caller.
+
+Storing ``NULL`` into any index of a multi-index entry will set the
+entry at every index to ``NULL`` and dissolve the tie. A multi-index
+entry can be split into entries occupying smaller ranges by calling
+xas_split_alloc() without the xa_lock held, followed by taking the lock
+and calling xas_split().
Functions and structures
========================
diff --git a/Documentation/dev-tools/kgdb.rst b/Documentation/dev-tools/kgdb.rst
index c908ef4d3f04..77b688e6a254 100644
--- a/Documentation/dev-tools/kgdb.rst
+++ b/Documentation/dev-tools/kgdb.rst
@@ -726,7 +726,7 @@ The kernel debugger is organized into a number of components:
- contains an arch-specific trap catcher which invokes
kgdb_handle_exception() to start kgdb about doing its work
- - translation to and from gdb specific packet format to :c:type:`pt_regs`
+ - translation to and from gdb specific packet format to struct pt_regs
- Registration and unregistration of architecture specific trap
hooks
@@ -846,7 +846,7 @@ invokes a callback in the serial core which in turn uses the callback in
the UART driver.
When using kgdboc with a UART, the UART driver must implement two
-callbacks in the :c:type:`struct uart_ops <uart_ops>`.
+callbacks in the struct uart_ops.
Example from ``drivers/8250.c``::
@@ -875,7 +875,7 @@ kernel when ``CONFIG_KDB_KEYBOARD=y`` is set in the kernel configuration.
The core polled keyboard driver for PS/2 type keyboards is in
``drivers/char/kdb_keyboard.c``. This driver is hooked into the debug core
when kgdboc populates the callback in the array called
-:c:type:`kdb_poll_funcs[]`. The kdb_get_kbd_char() is the top-level
+:c:expr:`kdb_poll_funcs[]`. The kdb_get_kbd_char() is the top-level
function which polls hardware for single character input.
kgdboc and kms
diff --git a/Documentation/dev-tools/kunit/index.rst b/Documentation/dev-tools/kunit/index.rst
index e93606ecfb01..c234a3ab3c34 100644
--- a/Documentation/dev-tools/kunit/index.rst
+++ b/Documentation/dev-tools/kunit/index.rst
@@ -11,6 +11,7 @@ KUnit - Unit Testing for the Linux Kernel
usage
kunit-tool
api/index
+ style
faq
What is KUnit?
diff --git a/Documentation/dev-tools/kunit/style.rst b/Documentation/dev-tools/kunit/style.rst
new file mode 100644
index 000000000000..da1d6f0ed6bc
--- /dev/null
+++ b/Documentation/dev-tools/kunit/style.rst
@@ -0,0 +1,205 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+Test Style and Nomenclature
+===========================
+
+To make finding, writing, and using KUnit tests as simple as possible, it's
+strongly encouraged that they are named and written according to the guidelines
+below. While it's possible to write KUnit tests which do not follow these rules,
+they may break some tooling, may conflict with other tests, and may not be run
+automatically by testing systems.
+
+It's recommended that you only deviate from these guidelines when:
+
+1. Porting tests to KUnit which are already known with an existing name, or
+2. Writing tests which would cause serious problems if automatically run (e.g.,
+ non-deterministically producing false positives or negatives, or taking an
+ extremely long time to run).
+
+Subsystems, Suites, and Tests
+=============================
+
+In order to make tests as easy to find as possible, they're grouped into suites
+and subsystems. A test suite is a group of tests which test a related area of
+the kernel, and a subsystem is a set of test suites which test different parts
+of the same kernel subsystem or driver.
+
+Subsystems
+----------
+
+Every test suite must belong to a subsystem. A subsystem is a collection of one
+or more KUnit test suites which test the same driver or part of the kernel. A
+rule of thumb is that a test subsystem should match a single kernel module. If
+the code being tested can't be compiled as a module, in many cases the subsystem
+should correspond to a directory in the source tree or an entry in the
+MAINTAINERS file. If unsure, follow the conventions set by tests in similar
+areas.
+
+Test subsystems should be named after the code being tested, either after the
+module (wherever possible), or after the directory or files being tested. Test
+subsystems should be named to avoid ambiguity where necessary.
+
+If a test subsystem name has multiple components, they should be separated by
+underscores. *Do not* include "test" or "kunit" directly in the subsystem name
+unless you are actually testing other tests or the kunit framework itself.
+
+Example subsystems could be:
+
+``ext4``
+ Matches the module and filesystem name.
+``apparmor``
+ Matches the module name and LSM name.
+``kasan``
+ Common name for the tool, prominent part of the path ``mm/kasan``
+``snd_hda_codec_hdmi``
+ Has several components (``snd``, ``hda``, ``codec``, ``hdmi``) separated by
+ underscores. Matches the module name.
+
+Avoid names like these:
+
+``linear-ranges``
+ Names should use underscores, not dashes, to separate words. Prefer
+ ``linear_ranges``.
+``qos-kunit-test``
+ As well as using underscores, this name should not have "kunit-test" as a
+ suffix, and ``qos`` is ambiguous as a subsystem name. ``power_qos`` would be a
+ better name.
+``pc_parallel_port``
+ The corresponding module name is ``parport_pc``, so this subsystem should also
+ be named ``parport_pc``.
+
+.. note::
+ The KUnit API and tools do not explicitly know about subsystems. They're
+ simply a way of categorising test suites and naming modules which
+ provides a simple, consistent way for humans to find and run tests. This
+ may change in the future, though.
+
+Suites
+------
+
+KUnit tests are grouped into test suites, which cover a specific area of
+functionality being tested. Test suites can have shared initialisation and
+shutdown code which is run for all tests in the suite.
+Not all subsystems will need to be split into multiple test suites (e.g. simple drivers).
+
+Test suites are named after the subsystem they are part of. If a subsystem
+contains several suites, the specific area under test should be appended to the
+subsystem name, separated by an underscore.
+
+In the event that there are multiple types of test using KUnit within a
+subsystem (e.g., both unit tests and integration tests), they should be put into
+separate suites, with the type of test as the last element in the suite name.
+Unless these tests are actually present, avoid using ``_test``, ``_unittest`` or
+similar in the suite name.
+
+The full test suite name (including the subsystem name) should be specified as
+the ``.name`` member of the ``kunit_suite`` struct, and forms the base for the
+module name (see below).
+
+Example test suites could include:
+
+``ext4_inode``
+ Part of the ``ext4`` subsystem, testing the ``inode`` area.
+``kunit_try_catch``
+ Part of the ``kunit`` implementation itself, testing the ``try_catch`` area.
+``apparmor_property_entry``
+ Part of the ``apparmor`` subsystem, testing the ``property_entry`` area.
+``kasan``
+ The ``kasan`` subsystem has only one suite, so the suite name is the same as
+ the subsystem name.
+
+Avoid names like:
+
+``ext4_ext4_inode``
+ There's no reason to state the subsystem twice.
+``property_entry``
+ The suite name is ambiguous without the subsystem name.
+``kasan_integration_test``
+ Because there is only one suite in the ``kasan`` subsystem, the suite should
+ just be called ``kasan``. There's no need to redundantly add
+ ``integration_test``. Should a separate test suite with, for example, unit
+ tests be added, then that suite could be named ``kasan_unittest`` or similar.
+
+Test Cases
+----------
+
+Individual tests consist of a single function which tests a constrained
+codepath, property, or function. In the test output, individual tests' results
+will show up as subtests of the suite's results.
+
+Tests should be named after what they're testing. This is often the name of the
+function being tested, with a description of the input or codepath being tested.
+As tests are C functions, they should be named and written in accordance with
+the kernel coding style.
+
+.. note::
+ As tests are themselves functions, their names cannot conflict with
+ other C identifiers in the kernel. This may require some creative
+ naming. It's a good idea to make your test functions `static` to avoid
+ polluting the global namespace.
+
+Example test names include:
+
+``unpack_u32_with_null_name``
+ Tests the ``unpack_u32`` function when a NULL name is passed in.
+``test_list_splice``
+ Tests the ``list_splice`` macro. It has the prefix ``test_`` to avoid a
+ name conflict with the macro itself.
+
+
+Should it be necessary to refer to a test outside the context of its test suite,
+the *fully-qualified* name of a test should be the suite name followed by the
+test name, separated by a colon (i.e. ``suite:test``).
+
+Test Kconfig Entries
+====================
+
+Every test suite should be tied to a Kconfig entry.
+
+This Kconfig entry must:
+
+* be named ``CONFIG_<name>_KUNIT_TEST``: where <name> is the name of the test
+ suite.
+* be listed either alongside the config entries for the driver/subsystem being
+ tested, or be under [Kernel Hacking]→[Kernel Testing and Coverage]
+* depend on ``CONFIG_KUNIT``
+* be visible only if ``CONFIG_KUNIT_ALL_TESTS`` is not enabled.
+* have a default value of ``CONFIG_KUNIT_ALL_TESTS``.
+* have a brief description of KUnit in the help text
+
+Unless there's a specific reason not to (e.g. the test is unable to be built as
+a module), Kconfig entries for tests should be tristate.
+
+An example Kconfig entry:
+
+.. code-block:: none
+
+ config FOO_KUNIT_TEST
+ tristate "KUnit test for foo" if !KUNIT_ALL_TESTS
+ depends on KUNIT
+ default KUNIT_ALL_TESTS
+ help
+ This builds unit tests for foo.
+
+ For more information on KUnit and unit tests in general, please refer
+ to the KUnit documentation in Documentation/dev-tools/kunit
+
+ If unsure, say N
+
+
+Test File and Module Names
+==========================
+
+KUnit tests can often be compiled as a module. These modules should be named
+after the test suite, followed by ``_test``. If this is likely to conflict with
+non-KUnit tests, the suffix ``_kunit`` can also be used.
+
+The easiest way of achieving this is to name the file containing the test suite
+``<suite>_test.c`` (or, as above, ``<suite>_kunit.c``). This file should be
+placed next to the code under test.
+
+If the suite name contains some or all of the name of the test's parent
+directory, it may make sense to modify the source filename to reduce redundancy.
+For example, a ``foo_firmware`` suite could be in the ``foo/firmware_test.c``
+file.
diff --git a/Documentation/dev-tools/kunit/usage.rst b/Documentation/dev-tools/kunit/usage.rst
index 3c3fe8b5fecc..961d3ea3ca19 100644
--- a/Documentation/dev-tools/kunit/usage.rst
+++ b/Documentation/dev-tools/kunit/usage.rst
@@ -211,6 +211,11 @@ KUnit test framework.
.. note::
A test case will only be run if it is associated with a test suite.
+``kunit_test_suite(...)`` is a macro which tells the linker to put the specified
+test suite in a special linker section so that it can be run by KUnit either
+after late_init, or when the test module is loaded (depending on whether the
+test was built in or not).
+
For more information on these types of things see the :doc:`api/test`.
Isolating Behavior
diff --git a/Documentation/devicetree/bindings/display/tilcdc/tilcdc.txt b/Documentation/devicetree/bindings/display/tilcdc/tilcdc.txt
index 8b2a71395647..3e64075ac7ec 100644
--- a/Documentation/devicetree/bindings/display/tilcdc/tilcdc.txt
+++ b/Documentation/devicetree/bindings/display/tilcdc/tilcdc.txt
@@ -37,7 +37,7 @@ Optional nodes:
supports a single port with a single endpoint.
- See also Documentation/devicetree/bindings/display/tilcdc/panel.txt and
- Documentation/devicetree/bindings/display/bridge/ti,tfp410.txt for connecting
+ Documentation/devicetree/bindings/display/bridge/ti,tfp410.yaml for connecting
tfp410 DVI encoder or lcd panel to lcdc
[1] There is an errata about AM335x color wiring. For 16-bit color mode
diff --git a/Documentation/devicetree/bindings/mailbox/arm,mhu.yaml b/Documentation/devicetree/bindings/mailbox/arm,mhu.yaml
new file mode 100644
index 000000000000..d43791a2dde7
--- /dev/null
+++ b/Documentation/devicetree/bindings/mailbox/arm,mhu.yaml
@@ -0,0 +1,135 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mailbox/arm,mhu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM MHU Mailbox Controller
+
+maintainers:
+ - Jassi Brar <jaswinder.singh@linaro.org>
+
+description: |
+ The ARM's Message-Handling-Unit (MHU) is a mailbox controller that has 3
+ independent channels/links to communicate with remote processor(s). MHU links
+ are hardwired on a platform. A link raises interrupt for any received data.
+ However, there is no specified way of knowing if the sent data has been read
+ by the remote. This driver assumes the sender polls STAT register and the
+ remote clears it after having read the data. The last channel is specified to
+ be a 'Secure' resource, hence can't be used by Linux running NS.
+
+ The MHU hardware also allows operations in doorbell mode. The MHU drives the
+ interrupt signal using a 32-bit register, with all 32-bits logically ORed
+ together. It provides a set of registers to enable software to set, clear and
+ check the status of each of the bits of this register independently. The use
+ of 32 bits per interrupt line enables software to provide more information
+ about the source of the interrupt. For example, each bit of the register can
+ be associated with a type of event that can contribute to raising the
+ interrupt. Each of the 32-bits can be used as "doorbell" to alert the remote
+ processor.
+
+# We need a select here so we don't match all nodes with 'arm,primecell'
+select:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - arm,mhu
+ - arm,mhu-doorbell
+ required:
+ - compatible
+
+properties:
+ compatible:
+ oneOf:
+ - description: Data transfer mode
+ items:
+ - const: arm,mhu
+ - const: arm,primecell
+
+ - description: Doorbell mode
+ items:
+ - const: arm,mhu-doorbell
+ - const: arm,primecell
+
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ items:
+ - description: low-priority non-secure
+ - description: high-priority non-secure
+ - description: Secure
+ maxItems: 3
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ items:
+ - const: apb_pclk
+
+ '#mbox-cells':
+ description: |
+ Set to 1 in data transfer mode and represents index of the channel.
+ Set to 2 in doorbell mode and represents index of the channel and doorbell
+ number.
+ enum: [ 1, 2 ]
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - '#mbox-cells'
+
+additionalProperties: false
+
+examples:
+ # Data transfer mode.
+ - |
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ mhuA: mailbox@2b1f0000 {
+ #mbox-cells = <1>;
+ compatible = "arm,mhu", "arm,primecell";
+ reg = <0 0x2b1f0000 0 0x1000>;
+ interrupts = <0 36 4>, /* LP-NonSecure */
+ <0 35 4>, /* HP-NonSecure */
+ <0 37 4>; /* Secure */
+ clocks = <&clock 0 2 1>;
+ clock-names = "apb_pclk";
+ };
+
+ mhu_client_scb: scb@2e000000 {
+ compatible = "fujitsu,mb86s70-scb-1.0";
+ reg = <0 0x2e000000 0 0x4000>;
+ mboxes = <&mhuA 1>; /* HP-NonSecure */
+ };
+ };
+
+ # Doorbell mode.
+ - |
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ mhuB: mailbox@2b2f0000 {
+ #mbox-cells = <2>;
+ compatible = "arm,mhu-doorbell", "arm,primecell";
+ reg = <0 0x2b2f0000 0 0x1000>;
+ interrupts = <0 36 4>, /* LP-NonSecure */
+ <0 35 4>, /* HP-NonSecure */
+ <0 37 4>; /* Secure */
+ clocks = <&clock 0 2 1>;
+ clock-names = "apb_pclk";
+ };
+
+ mhu_client_scpi: scpi@2f000000 {
+ compatible = "arm,scpi";
+ reg = <0 0x2f000000 0 0x200>;
+ mboxes = <&mhuB 1 4>; /* HP-NonSecure, 5th doorbell */
+ };
+ };
diff --git a/Documentation/devicetree/bindings/mailbox/arm-mhu.txt b/Documentation/devicetree/bindings/mailbox/arm-mhu.txt
deleted file mode 100644
index 4971f03f0b33..000000000000
--- a/Documentation/devicetree/bindings/mailbox/arm-mhu.txt
+++ /dev/null
@@ -1,43 +0,0 @@
-ARM MHU Mailbox Driver
-======================
-
-The ARM's Message-Handling-Unit (MHU) is a mailbox controller that has
-3 independent channels/links to communicate with remote processor(s).
- MHU links are hardwired on a platform. A link raises interrupt for any
-received data. However, there is no specified way of knowing if the sent
-data has been read by the remote. This driver assumes the sender polls
-STAT register and the remote clears it after having read the data.
-The last channel is specified to be a 'Secure' resource, hence can't be
-used by Linux running NS.
-
-Mailbox Device Node:
-====================
-
-Required properties:
---------------------
-- compatible: Shall be "arm,mhu" & "arm,primecell"
-- reg: Contains the mailbox register address range (base
- address and length)
-- #mbox-cells Shall be 1 - the index of the channel needed.
-- interrupts: Contains the interrupt information corresponding to
- each of the 3 links of MHU.
-
-Example:
---------
-
- mhu: mailbox@2b1f0000 {
- #mbox-cells = <1>;
- compatible = "arm,mhu", "arm,primecell";
- reg = <0 0x2b1f0000 0x1000>;
- interrupts = <0 36 4>, /* LP-NonSecure */
- <0 35 4>, /* HP-NonSecure */
- <0 37 4>; /* Secure */
- clocks = <&clock 0 2 1>;
- clock-names = "apb_pclk";
- };
-
- mhu_client: scb@2e000000 {
- compatible = "fujitsu,mb86s70-scb-1.0";
- reg = <0 0x2e000000 0x4000>;
- mboxes = <&mhu 1>; /* HP-NonSecure */
- };
diff --git a/Documentation/devicetree/bindings/mailbox/omap-mailbox.txt b/Documentation/devicetree/bindings/mailbox/omap-mailbox.txt
index 35c3f56b7f7b..5fe80c1c19fc 100644
--- a/Documentation/devicetree/bindings/mailbox/omap-mailbox.txt
+++ b/Documentation/devicetree/bindings/mailbox/omap-mailbox.txt
@@ -69,7 +69,7 @@ The following are mandatory properties for the K3 AM65x and J721E SoCs only:
the interrupt routes between the IP and the main GIC
controllers. See the following binding for additional
details,
- Documentation/devicetree/bindings/interrupt-controller/ti,sci-intr.txt
+ Documentation/devicetree/bindings/interrupt-controller/ti,sci-intr.yaml
Child Nodes:
============
diff --git a/Documentation/devicetree/bindings/media/i2c/tvp5150.txt b/Documentation/devicetree/bindings/media/i2c/tvp5150.txt
index 6c88ce858d08..719b2995dc17 100644
--- a/Documentation/devicetree/bindings/media/i2c/tvp5150.txt
+++ b/Documentation/devicetree/bindings/media/i2c/tvp5150.txt
@@ -56,7 +56,7 @@ Optional Connector Properties:
instead of using the autodetection mechnism. Please look at
[1] for more information.
-[1] Documentation/devicetree/bindings/display/connector/analog-tv-connector.txt.
+[1] Documentation/devicetree/bindings/display/connector/analog-tv-connector.yaml.
Example - three input sources:
#include <dt-bindings/display/sdtv-standards.h>
diff --git a/Documentation/devicetree/bindings/mips/ingenic/devices.yaml b/Documentation/devicetree/bindings/mips/ingenic/devices.yaml
index 83c86cbe4716..dc21b4630c25 100644
--- a/Documentation/devicetree/bindings/mips/ingenic/devices.yaml
+++ b/Documentation/devicetree/bindings/mips/ingenic/devices.yaml
@@ -47,4 +47,9 @@ properties:
items:
- const: yna,cu1830-neo
- const: ingenic,x1830
+
+ - description: YSH & ATIL General Board, CU2000 Module with Neo Backplane
+ items:
+ - const: yna,cu2000-neo
+ - const: ingenic,x2000e
...
diff --git a/Documentation/devicetree/bindings/mtd/nand-controller.yaml b/Documentation/devicetree/bindings/mtd/nand-controller.yaml
index 274bbe6a365e..b29050fd7470 100644
--- a/Documentation/devicetree/bindings/mtd/nand-controller.yaml
+++ b/Documentation/devicetree/bindings/mtd/nand-controller.yaml
@@ -55,6 +55,37 @@ patternProperties:
$ref: /schemas/types.yaml#/definitions/string
enum: [none, soft, hw, hw_syndrome, hw_oob_first, on-die]
+ nand-ecc-engine:
+ allOf:
+ - $ref: /schemas/types.yaml#/definitions/phandle
+ description: |
+ A phandle on the hardware ECC engine if any. There are
+ basically three possibilities:
+ 1/ The ECC engine is part of the NAND controller, in this
+ case the phandle should reference the parent node.
+ 2/ The ECC engine is part of the NAND part (on-die), in this
+ case the phandle should reference the node itself.
+ 3/ The ECC engine is external, in this case the phandle should
+ reference the specific ECC engine node.
+
+ nand-use-soft-ecc-engine:
+ type: boolean
+ description: Use a software ECC engine.
+
+ nand-no-ecc-engine:
+ type: boolean
+ description: Do not use any ECC correction.
+
+ nand-ecc-placement:
+ allOf:
+ - $ref: /schemas/types.yaml#/definitions/string
+ - enum: [ oob, interleaved ]
+ description:
+ Location of the ECC bytes. This location is unknown by default
+ but can be explicitly set to "oob", if all ECC bytes are
+ known to be stored in the OOB area, or "interleaved" if ECC
+ bytes will be interleaved with regular data in the main area.
+
nand-ecc-algo:
description:
Desired ECC algorithm.
diff --git a/Documentation/devicetree/bindings/power/reset/ocelot-reset.txt b/Documentation/devicetree/bindings/power/reset/ocelot-reset.txt
index 1b4213eb3473..4d530d815484 100644
--- a/Documentation/devicetree/bindings/power/reset/ocelot-reset.txt
+++ b/Documentation/devicetree/bindings/power/reset/ocelot-reset.txt
@@ -1,10 +1,13 @@
Microsemi Ocelot reset controller
The DEVCPU_GCB:CHIP_REGS have a SOFT_RST register that can be used to reset the
-SoC MIPS core.
+SoC core.
+
+The reset registers are both present in the MSCC vcoreiii MIPS and
+microchip Sparx5 armv8 SoC's.
Required Properties:
- - compatible: "mscc,ocelot-chip-reset"
+ - compatible: "mscc,ocelot-chip-reset" or "microchip,sparx5-chip-reset"
Example:
reset@1070008 {
diff --git a/Documentation/devicetree/bindings/power/reset/reboot-mode.txt b/Documentation/devicetree/bindings/power/reset/reboot-mode.txt
deleted file mode 100644
index de34f27d509e..000000000000
--- a/Documentation/devicetree/bindings/power/reset/reboot-mode.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-Generic reboot mode core map driver
-
-This driver get reboot mode arguments and call the write
-interface to store the magic value in special register
-or ram. Then the bootloader can read it and take different
-action according to the argument stored.
-
-All mode properties are vendor specific, it is a indication to tell
-the bootloader what to do when the system reboots, and should be named
-as mode-xxx = <magic> (xxx is mode name, magic should be a none-zero value).
-
-For example modes common on Android platform:
-- mode-normal: Normal reboot mode, system reboot with command "reboot".
-- mode-recovery: Android Recovery mode, it is a mode to format the device or update a new image.
-- mode-bootloader: Android fastboot mode, it's a mode to re-flash partitions on the Android based device.
-- mode-loader: A bootloader mode, it's a mode used to download image on Rockchip platform,
- usually used in development.
-
-Example:
- reboot-mode {
- mode-normal = <BOOT_NORMAL>;
- mode-recovery = <BOOT_RECOVERY>;
- mode-bootloader = <BOOT_FASTBOOT>;
- mode-loader = <BOOT_BL_DOWNLOAD>;
- }
diff --git a/Documentation/devicetree/bindings/power/reset/reboot-mode.yaml b/Documentation/devicetree/bindings/power/reset/reboot-mode.yaml
new file mode 100644
index 000000000000..a6c91026d4cc
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/reset/reboot-mode.yaml
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/power/reset/reboot-mode.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Generic reboot mode core map
+
+maintainers:
+ - Andy Yan <andy.yan@rock-chips.com>
+
+description: |
+ This driver get reboot mode arguments and call the write
+ interface to store the magic value in special register
+ or ram. Then the bootloader can read it and take different
+ action according to the argument stored.
+
+ All mode properties are vendor specific, it is a indication to tell
+ the bootloader what to do when the system reboots, and should be named
+ as mode-xxx = <magic> (xxx is mode name, magic should be a non-zero value).
+
+ For example, modes common Android platform are:
+ - normal: Normal reboot mode, system reboot with command "reboot".
+ - recovery: Android Recovery mode, it is a mode to format the device or update a new image.
+ - bootloader: Android fastboot mode, it's a mode to re-flash partitions on the Android based device.
+ - loader: A bootloader mode, it's a mode used to download image on Rockchip platform,
+ usually used in development.
+
+properties:
+ mode-normal:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: |
+ Default value to set on a reboot if no command was provided.
+
+patternProperties:
+ "^mode-.*$":
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+examples:
+ - |
+ reboot-mode {
+ mode-normal = <0>;
+ mode-recovery = <1>;
+ mode-bootloader = <2>;
+ mode-loader = <3>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/power/supply/battery.yaml b/Documentation/devicetree/bindings/power/supply/battery.yaml
index 932b736ce5c0..0c7e2e44793b 100644
--- a/Documentation/devicetree/bindings/power/supply/battery.yaml
+++ b/Documentation/devicetree/bindings/power/supply/battery.yaml
@@ -82,6 +82,27 @@ properties:
An array containing the temperature in degree Celsius,
for each of the battery capacity lookup table.
+ operating-range-celsius:
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ description: operating temperature range of a battery
+ items:
+ - description: minimum temperature at which battery can operate
+ - description: maximum temperature at which battery can operate
+
+ ambient-celsius:
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ description: safe range of ambient temperature
+ items:
+ - description: alert when ambient temperature is lower than this value
+ - description: alert when ambient temperature is higher than this value
+
+ alert-celsius:
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ description: safe range of battery temperature
+ items:
+ - description: alert when battery temperature is lower than this value
+ - description: alert when battery temperature is higher than this value
+
required:
- compatible
@@ -130,6 +151,9 @@ examples:
/* table for 10 degree Celsius */
ocv-capacity-table-2 = <4250000 100>, <4200000 95>, <4185000 90>;
resistance-temp-table = <20 100>, <10 90>, <0 80>, <(-10) 60>;
+ operating-range-celsius = <(-30) 50>;
+ ambient-celsius = <(-5) 50>;
+ alert-celsius = <0 40>;
};
charger@11 {
diff --git a/Documentation/devicetree/bindings/power/supply/bq25890.txt b/Documentation/devicetree/bindings/power/supply/bq25890.txt
index 3b4c69a7fa70..805040c6fff9 100644
--- a/Documentation/devicetree/bindings/power/supply/bq25890.txt
+++ b/Documentation/devicetree/bindings/power/supply/bq25890.txt
@@ -33,6 +33,10 @@ Optional properties:
- ti,thermal-regulation-threshold: integer, temperature above which the charge
current is lowered, to avoid overheating (in degrees Celsius). If omitted,
the default setting will be used (120 degrees);
+- ti,ibatcomp-micro-ohms: integer, value of a resistor in series with
+ the battery;
+- ti,ibatcomp-clamp-microvolt: integer, maximum charging voltage adjustment due
+ to expected voltage drop on in-series resistor;
Example:
diff --git a/Documentation/devicetree/bindings/power/supply/bq25980.yaml b/Documentation/devicetree/bindings/power/supply/bq25980.yaml
new file mode 100644
index 000000000000..f6b3dd4093ca
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/supply/bq25980.yaml
@@ -0,0 +1,114 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (C) 2020 Texas Instruments Incorporated
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/power/supply/bq25980.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: TI BQ25980 Flash Charger
+
+maintainers:
+ - Dan Murphy <dmurphy@ti.com>
+ - Ricardo Rivera-Matos <r-rivera-matos@ti.com>
+
+description: |
+ The BQ25980, BQ25975, and BQ25960 are a series of flash chargers intended
+ for use in high-power density portable electronics. These inductorless
+ switching chargers can provide over 97% efficiency by making use of the
+ switched capacitor architecture.
+
+allOf:
+ - $ref: power-supply.yaml#
+
+properties:
+ compatible:
+ enum:
+ - ti,bq25980
+ - ti,bq25975
+ - ti,bq25960
+
+ reg:
+ maxItems: 1
+
+ ti,watchdog-timeout-ms:
+ description: |
+ Watchdog timer in milli seconds. 0 disables the watchdog.
+ default: 0
+ minimum: 0
+ maximum: 300000
+ enum: [ 0, 5000, 10000, 50000, 300000]
+
+ ti,sc-ovp-limit-microvolt:
+ description: |
+ Minimum input voltage limit in micro volts with a when the charger is in
+ switch cap mode. 100000 micro volt step.
+ default: 17800000
+ minimum: 14000000
+ maximum: 22000000
+
+ ti,sc-ocp-limit-microamp:
+ description: |
+ Maximum input current limit in micro amps with a 100000 micro amp step.
+ minimum: 100000
+ maximum: 3300000
+
+ ti,bypass-ovp-limit-microvolt:
+ description: |
+ Minimum input voltage limit in micro volts with a when the charger is in
+ switch cap mode. 50000 micro volt step.
+ minimum: 7000000
+ maximum: 12750000
+
+ ti,bypass-ocp-limit-microamp:
+ description: |
+ Maximum input current limit in micro amps with a 100000 micro amp step.
+ minimum: 100000
+ maximum: 3300000
+
+ ti,bypass-enable:
+ type: boolean
+ description: Enables bypass mode at boot time
+
+ interrupts:
+ description: |
+ Indicates that the device state has changed.
+
+ monitored-battery:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description: phandle to the battery node being monitored
+
+required:
+ - compatible
+ - reg
+ - monitored-battery
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ bat: battery {
+ compatible = "simple-battery";
+ constant-charge-current-max-microamp = <4000000>;
+ constant-charge-voltage-max-microvolt = <8400000>;
+ precharge-current-microamp = <160000>;
+ charge-term-current-microamp = <160000>;
+ };
+ #include <dt-bindings/gpio/gpio.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+ i2c0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ bq25980: charger@65 {
+ compatible = "ti,bq25980";
+ reg = <0x65>;
+ interrupt-parent = <&gpio1>;
+ interrupts = <16 IRQ_TYPE_EDGE_FALLING>;
+ ti,watchdog-timer = <0>;
+ ti,sc-ocp-limit-microamp = <2000000>;
+ ti,sc-ovp-limit-microvolt = <17800000>;
+ monitored-battery = <&bat>;
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/power/supply/bq27xxx.yaml b/Documentation/devicetree/bindings/power/supply/bq27xxx.yaml
index 82f682705f44..45beefccf31a 100644
--- a/Documentation/devicetree/bindings/power/supply/bq27xxx.yaml
+++ b/Documentation/devicetree/bindings/power/supply/bq27xxx.yaml
@@ -51,6 +51,7 @@ properties:
- ti,bq27621
- ti,bq27z561
- ti,bq28z610
+ - ti,bq34z100
reg:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/power/supply/charger-manager.txt b/Documentation/devicetree/bindings/power/supply/charger-manager.txt
index ec4fe9de3137..b5ae9061b7a0 100644
--- a/Documentation/devicetree/bindings/power/supply/charger-manager.txt
+++ b/Documentation/devicetree/bindings/power/supply/charger-manager.txt
@@ -3,24 +3,32 @@ charger-manager bindings
Required properties :
- compatible : "charger-manager"
- - <>-supply : for regulator consumer
- - cm-num-chargers : number of chargers
+ - <>-supply : for regulator consumer, named according to cm-regulator-name
- cm-chargers : name of chargers
- cm-fuel-gauge : name of battery fuel gauge
- subnode <regulator> :
- cm-regulator-name : name of charger regulator
- subnode <cable> :
- - cm-cable-name : name of charger cable
+ - cm-cable-name : name of charger cable - one of USB, USB-HOST,
+ SDP, DCP, CDP, ACA, FAST-CHARGER, SLOW-CHARGER, WPT,
+ PD, DOCK, JIG, or MECHANICAL
- cm-cable-extcon : name of extcon dev
(optional) - cm-cable-min : minimum current of cable
(optional) - cm-cable-max : maximum current of cable
Optional properties :
- cm-name : charger manager's name (default : "battery")
- - cm-poll-mode : polling mode (enum polling_modes)
- - cm-poll-interval : polling interval
- - cm-battery-stat : battery status (enum data_source)
- - cm-fullbatt-* : data for full battery checking
+ - cm-poll-mode : polling mode - 0 for disabled, 1 for always, 2 for when
+ external power is connected, or 3 for when charging. If not present,
+ then polling is disabled
+ - cm-poll-interval : polling interval (in ms)
+ - cm-battery-stat : battery status - 0 for battery always present, 1 for no
+ battery, 2 to check presence via fuel gauge, or 3 to check presence
+ via charger
+ - cm-fullbatt-vchkdrop-volt : voltage drop (in uV) before restarting charging
+ - cm-fullbatt-voltage : voltage (in uV) of full battery
+ - cm-fullbatt-soc : state of charge to consider as full battery
+ - cm-fullbatt-capacity : capcity (in uAh) to consider as full battery
- cm-thermal-zone : name of external thermometer's thermal zone
- cm-battery-* : threshold battery temperature for charging
-cold : critical cold temperature of battery for charging
@@ -29,6 +37,10 @@ Optional properties :
-temp-diff : temperature difference to allow recharging
- cm-dis/charging-max = limits of charging duration
+Deprecated properties:
+ - cm-num-chargers
+ - cm-fullbatt-vchkdrop-ms
+
Example :
charger-manager@0 {
compatible = "charger-manager";
@@ -39,13 +51,11 @@ Example :
cm-poll-mode = <1>;
cm-poll-interval = <30000>;
- cm-fullbatt-vchkdrop-ms = <30000>;
cm-fullbatt-vchkdrop-volt = <150000>;
cm-fullbatt-soc = <100>;
cm-battery-stat = <3>;
- cm-num-chargers = <3>;
cm-chargers = "charger0", "charger1", "charger2";
cm-fuel-gauge = "fuelgauge0";
@@ -71,7 +81,7 @@ Example :
cm-cable-max = <500000>;
};
cable@1 {
- cm-cable-name = "TA";
+ cm-cable-name = "SDP";
cm-cable-extcon = "extcon-dev.0";
cm-cable-min = <650000>;
cm-cable-max = <675000>;
diff --git a/Documentation/devicetree/bindings/power/supply/gpio-charger.yaml b/Documentation/devicetree/bindings/power/supply/gpio-charger.yaml
index 6244b8ee9402..89f8e2bcb2d7 100644
--- a/Documentation/devicetree/bindings/power/supply/gpio-charger.yaml
+++ b/Documentation/devicetree/bindings/power/supply/gpio-charger.yaml
@@ -39,6 +39,25 @@ properties:
maxItems: 1
description: GPIO indicating the charging status
+ charge-current-limit-gpios:
+ minItems: 1
+ maxItems: 32
+ description: GPIOs used for current limiting
+
+ charge-current-limit-mapping:
+ description: List of tuples with current in uA and a GPIO bitmap (in
+ this order). The tuples must be provided in descending order of the
+ current limit.
+ $ref: /schemas/types.yaml#/definitions/uint32-matrix
+ items:
+ items:
+ - description:
+ Current limit in uA
+ - description:
+ Encoded GPIO setting. Bit 0 represents last GPIO from the
+ charge-current-limit-gpios property. Bit 1 second to last
+ GPIO and so on.
+
required:
- compatible
@@ -47,6 +66,12 @@ anyOf:
- gpios
- required:
- charge-status-gpios
+ - required:
+ - charge-current-limit-gpios
+
+dependencies:
+ charge-current-limit-gpios: [ charge-current-limit-mapping ]
+ charge-current-limit-mapping: [ charge-current-limit-gpios ]
additionalProperties: false
@@ -60,4 +85,10 @@ examples:
gpios = <&gpd 28 GPIO_ACTIVE_LOW>;
charge-status-gpios = <&gpc 27 GPIO_ACTIVE_LOW>;
+
+ charge-current-limit-gpios = <&gpioA 11 GPIO_ACTIVE_HIGH>,
+ <&gpioA 12 GPIO_ACTIVE_HIGH>;
+ charge-current-limit-mapping = <2500000 0x00>, // 2.5 A => both GPIOs low
+ <700000 0x01>, // 700 mA => GPIO A.12 high
+ <0 0x02>; // 0 mA => GPIO A.11 high
};
diff --git a/Documentation/devicetree/bindings/power/supply/ingenic,battery.txt b/Documentation/devicetree/bindings/power/supply/ingenic,battery.txt
deleted file mode 100644
index 66430bf73815..000000000000
--- a/Documentation/devicetree/bindings/power/supply/ingenic,battery.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-* Ingenic JZ47xx battery bindings
-
-Required properties:
-
-- compatible: Must be "ingenic,jz4740-battery".
-- io-channels: phandle and IIO specifier pair to the IIO device.
- Format described in iio-bindings.txt.
-- monitored-battery: phandle to a "simple-battery" compatible node.
-
-The "monitored-battery" property must be a phandle to a node using the format
-described in battery.txt, with the following properties being required:
-
-- voltage-min-design-microvolt: Drained battery voltage.
-- voltage-max-design-microvolt: Fully charged battery voltage.
-
-Example:
-
-#include <dt-bindings/iio/adc/ingenic,adc.h>
-
-simple_battery: battery {
- compatible = "simple-battery";
- voltage-min-design-microvolt = <3600000>;
- voltage-max-design-microvolt = <4200000>;
-};
-
-ingenic_battery {
- compatible = "ingenic,jz4740-battery";
- io-channels = <&adc INGENIC_ADC_BATTERY>;
- io-channel-names = "battery";
- monitored-battery = <&simple_battery>;
-};
diff --git a/Documentation/devicetree/bindings/power/supply/ingenic,battery.yaml b/Documentation/devicetree/bindings/power/supply/ingenic,battery.yaml
new file mode 100644
index 000000000000..867e3e6b7e80
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/supply/ingenic,battery.yaml
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2019-2020 Artur Rojek
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/power/supply/ingenic,battery.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Ingenic JZ47xx battery bindings
+
+maintainers:
+ - Artur Rojek <contact@artur-rojek.eu>
+
+properties:
+ compatible:
+ oneOf:
+ - const: ingenic,jz4740-battery
+ - items:
+ - enum:
+ - ingenic,jz4725b-battery
+ - ingenic,jz4770-battery
+ - const: ingenic,jz4740-battery
+
+ io-channels:
+ maxItems: 1
+
+ io-channel-names:
+ const: battery
+
+ monitored-battery:
+ description: >
+ phandle to a "simple-battery" compatible node.
+
+ This property must be a phandle to a node using the format described
+ in battery.yaml, with the following properties being required:
+ - voltage-min-design-microvolt: drained battery voltage,
+ - voltage-max-design-microvolt: fully charged battery voltage.
+
+required:
+ - compatible
+ - io-channels
+ - io-channel-names
+ - monitored-battery
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/iio/adc/ingenic,adc.h>
+
+ simple_battery: battery {
+ compatible = "simple-battery";
+ voltage-min-design-microvolt = <3600000>;
+ voltage-max-design-microvolt = <4200000>;
+ };
+
+ ingenic-battery {
+ compatible = "ingenic,jz4740-battery";
+ io-channels = <&adc INGENIC_ADC_BATTERY>;
+ io-channel-names = "battery";
+ monitored-battery = <&simple_battery>;
+ };
diff --git a/Documentation/devicetree/bindings/power/supply/max17040_battery.txt b/Documentation/devicetree/bindings/power/supply/max17040_battery.txt
index 4e0186b8380f..c802f664b508 100644
--- a/Documentation/devicetree/bindings/power/supply/max17040_battery.txt
+++ b/Documentation/devicetree/bindings/power/supply/max17040_battery.txt
@@ -2,7 +2,9 @@ max17040_battery
~~~~~~~~~~~~~~~~
Required properties :
- - compatible : "maxim,max17040" or "maxim,max77836-battery"
+ - compatible : "maxim,max17040", "maxim,max17041", "maxim,max17043",
+ "maxim,max17044", "maxim,max17048", "maxim,max17049",
+ "maxim,max17058", "maxim,max17059" or "maxim,max77836-battery"
- reg: i2c slave address
Optional properties :
@@ -11,6 +13,15 @@ Optional properties :
generated. Can be configured from 1 up to 32
(%). If skipped the power up default value of
4 (%) will be used.
+- maxim,double-soc : Certain devices return double the capacity.
+ Specify this boolean property to divide the
+ reported value in 2 and thus normalize it.
+ SOC == State of Charge == Capacity.
+- maxim,rcomp : A value to compensate readings for various
+ battery chemistries and operating temperatures.
+ max17040,41 have 2 byte rcomp, default to
+ 0x97 0x00. All other devices have one byte
+ rcomp, default to 0x97.
- interrupts : Interrupt line see Documentation/devicetree/
bindings/interrupt-controller/interrupts.txt
- wakeup-source : This device has wakeup capabilities. Use this
@@ -31,3 +42,11 @@ Example:
interrupts = <2 IRQ_TYPE_EDGE_FALLING>;
wakeup-source;
};
+
+ battery-fuel-gauge@36 {
+ compatible = "maxim,max17048";
+ reg = <0x36>;
+ maxim,rcomp = /bits/ 8 <0x56>;
+ maxim,alert-low-soc-level = <10>;
+ maxim,double-soc;
+ };
diff --git a/Documentation/devicetree/bindings/power/supply/summit,smb347-charger.yaml b/Documentation/devicetree/bindings/power/supply/summit,smb347-charger.yaml
new file mode 100644
index 000000000000..193a23af2007
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/supply/summit,smb347-charger.yaml
@@ -0,0 +1,152 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/power/supply/summit,smb347-charger.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Battery charger driver for SMB345, SMB347 and SMB358
+
+maintainers:
+ - David Heidelberg <david@ixit.cz>
+ - Dmitry Osipenko <digetx@gmail.com>
+
+properties:
+ compatible:
+ enum:
+ - summit,smb345
+ - summit,smb347
+ - summit,smb358
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ monitored-battery:
+ description: phandle to the battery node
+ $ref: /schemas/types.yaml#/definitions/phandle
+
+ summit,enable-usb-charging:
+ type: boolean
+ description: Enable charging through USB.
+
+ summit,enable-otg-charging:
+ type: boolean
+ description: Provide power for USB OTG
+
+ summit,enable-mains-charging:
+ type: boolean
+ description: Enable charging through mains
+
+ summit,enable-charge-control:
+ description: Enable charging control
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum:
+ - 0 # SMB3XX_CHG_ENABLE_SW SW (I2C interface)
+ - 1 # SMB3XX_CHG_ENABLE_PIN_ACTIVE_LOW Pin control (Active Low)
+ - 2 # SMB3XX_CHG_ENABLE_PIN_ACTIVE_HIGH Pin control (Active High)
+
+ summit,fast-voltage-threshold-microvolt:
+ description: Voltage threshold to transit to fast charge mode (in uV)
+ minimum: 2400000
+ maximum: 3000000
+
+ summit,mains-current-limit-microamp:
+ description: Maximum input current from AC/DC input (in uA)
+
+ summit,usb-current-limit-microamp:
+ description: Maximum input current from USB input (in uA)
+
+ summit,charge-current-compensation-microamp:
+ description: Charge current compensation (in uA)
+
+ summit,chip-temperature-threshold-celsius:
+ description: Chip temperature for thermal regulation in °C.
+ enum: [100, 110, 120, 130]
+
+ summit,soft-compensation-method:
+ description: Soft temperature limit compensation method
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum:
+ - 0 # SMB3XX_SOFT_TEMP_COMPENSATE_NONE Compensation none
+ - 1 # SMB3XX_SOFT_TEMP_COMPENSATE_CURRENT Current compensation
+ - 2 # SMB3XX_SOFT_TEMP_COMPENSATE_VOLTAGE Voltage compensation
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ enum:
+ - summit,smb345
+ - summit,smb358
+
+ then:
+ properties:
+ summit,mains-current-limit-microamp:
+ enum: [ 300000, 500000, 700000, 1000000,
+ 1500000, 1800000, 2000000]
+
+ summit,usb-current-limit-microamp:
+ enum: [ 300000, 500000, 700000, 1000000,
+ 1500000, 1800000, 2000000]
+
+ summit,charge-current-compensation-microamp:
+ enum: [200000, 450000, 600000, 900000]
+
+ else:
+ properties:
+ summit,mains-current-limit-microamp:
+ enum: [ 300000, 500000, 700000, 900000, 1200000,
+ 1500000, 1800000, 2000000, 2200000, 2500000]
+
+ summit,usb-current-limit-microamp:
+ enum: [ 300000, 500000, 700000, 900000, 1200000,
+ 1500000, 1800000, 2000000, 2200000, 2500000]
+
+ summit,charge-current-compensation-microamp:
+ enum: [250000, 700000, 900000, 1200000]
+
+required:
+ - compatible
+ - reg
+
+anyOf:
+ - required:
+ - summit,enable-usb-charging
+ - required:
+ - summit,enable-otg-charging
+ - required:
+ - summit,enable-mains-charging
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/power/summit,smb347-charger.h>
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ charger@7f {
+ compatible = "summit,smb347";
+ reg = <0x7f>;
+
+ summit,enable-charge-control = <SMB3XX_CHG_ENABLE_PIN_ACTIVE_HIGH>;
+ summit,chip-temperature-threshold-celsius = <110>;
+ summit,mains-current-limit-microamp = <2000000>;
+ summit,usb-current-limit-microamp = <500000>;
+ summit,enable-usb-charging;
+ summit,enable-mains-charging;
+
+ monitored-battery = <&battery>;
+ };
+ };
+
+ battery: battery-cell {
+ compatible = "simple-battery";
+ constant-charge-current-max-microamp = <1800000>;
+ operating-range-celsius = <0 45>;
+ alert-celsius = <3 42>;
+ };
diff --git a/Documentation/devicetree/bindings/pwm/google,cros-ec-pwm.yaml b/Documentation/devicetree/bindings/pwm/google,cros-ec-pwm.yaml
index 41ece1d85315..4cfbffd8414a 100644
--- a/Documentation/devicetree/bindings/pwm/google,cros-ec-pwm.yaml
+++ b/Documentation/devicetree/bindings/pwm/google,cros-ec-pwm.yaml
@@ -14,7 +14,7 @@ description: |
Google's ChromeOS EC PWM is a simple PWM attached to the Embedded Controller
(EC) and controlled via a host-command interface.
An EC PWM node should be only found as a sub-node of the EC node (see
- Documentation/devicetree/bindings/mfd/cros-ec.txt).
+ Documentation/devicetree/bindings/mfd/google,cros-ec.yaml).
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,smd-rpm.yaml b/Documentation/devicetree/bindings/soc/qcom/qcom,smd-rpm.yaml
index 468d658ce3e7..2684f22a1d85 100644
--- a/Documentation/devicetree/bindings/soc/qcom/qcom,smd-rpm.yaml
+++ b/Documentation/devicetree/bindings/soc/qcom/qcom,smd-rpm.yaml
@@ -20,7 +20,7 @@ description: |
present and this subnode may contain children that designate regulator
resources.
- Refer to Documentation/devicetree/bindings/regulator/qcom,smd-rpm-regulator.txt
+ Refer to Documentation/devicetree/bindings/regulator/qcom,smd-rpm-regulator.yaml
for information on the regulator subnodes that can exist under the
rpm_requests.
diff --git a/Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml b/Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml
index c84e656afb0a..3b9143af2c7c 100644
--- a/Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml
+++ b/Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml
@@ -13,7 +13,7 @@ description: |
Google's ChromeOS EC codec is a digital mic codec provided by the
Embedded Controller (EC) and is controlled via a host-command interface.
An EC codec node should only be found as a sub-node of the EC node (see
- Documentation/devicetree/bindings/mfd/cros-ec.txt).
+ Documentation/devicetree/bindings/mfd/google,cros-ec.yaml).
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/thermal/allwinner,sun8i-a83t-ths.yaml b/Documentation/devicetree/bindings/thermal/allwinner,sun8i-a83t-ths.yaml
index 44ba6765697d..31edd051295a 100644
--- a/Documentation/devicetree/bindings/thermal/allwinner,sun8i-a83t-ths.yaml
+++ b/Documentation/devicetree/bindings/thermal/allwinner,sun8i-a83t-ths.yaml
@@ -17,6 +17,7 @@ properties:
- allwinner,sun8i-h3-ths
- allwinner,sun8i-r40-ths
- allwinner,sun50i-a64-ths
+ - allwinner,sun50i-a100-ths
- allwinner,sun50i-h5-ths
- allwinner,sun50i-h6-ths
@@ -61,7 +62,9 @@ allOf:
properties:
compatible:
contains:
- const: allwinner,sun50i-h6-ths
+ enum:
+ - allwinner,sun50i-a100-ths
+ - allwinner,sun50i-h6-ths
then:
properties:
@@ -103,6 +106,7 @@ allOf:
- const: allwinner,sun8i-h3-ths
- const: allwinner,sun8i-r40-ths
- const: allwinner,sun50i-a64-ths
+ - const: allwinner,sun50i-a100-ths
- const: allwinner,sun50i-h5-ths
- const: allwinner,sun50i-h6-ths
diff --git a/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.yaml b/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.yaml
index b1a55ae497de..f386f2a7c06c 100644
--- a/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.yaml
+++ b/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.yaml
@@ -20,6 +20,7 @@ properties:
enum:
- renesas,r8a774a1-thermal # RZ/G2M
- renesas,r8a774b1-thermal # RZ/G2N
+ - renesas,r8a774e1-thermal # RZ/G2H
- renesas,r8a7795-thermal # R-Car H3
- renesas,r8a7796-thermal # R-Car M3-W
- renesas,r8a77961-thermal # R-Car M3-W+
diff --git a/Documentation/doc-guide/kernel-doc.rst b/Documentation/doc-guide/kernel-doc.rst
index 4fd86c21397b..52a87ab4c99f 100644
--- a/Documentation/doc-guide/kernel-doc.rst
+++ b/Documentation/doc-guide/kernel-doc.rst
@@ -490,6 +490,14 @@ identifiers: *[ function/type ...]*
.. kernel-doc:: lib/idr.c
:identifiers:
+no-identifiers: *[ function/type ...]*
+ Exclude documentation for each *function* and *type* in *source*.
+
+ Example::
+
+ .. kernel-doc:: lib/bitmap.c
+ :no-identifiers: bitmap_parselist
+
functions: *[ function/type ...]*
This is an alias of the 'identifiers' directive and deprecated.
diff --git a/Documentation/driver-api/basics.rst b/Documentation/driver-api/basics.rst
index 1ba88c7b3984..3e2dae954898 100644
--- a/Documentation/driver-api/basics.rst
+++ b/Documentation/driver-api/basics.rst
@@ -12,6 +12,8 @@ Driver device table
.. kernel-doc:: include/linux/mod_devicetable.h
:internal:
+ :no-identifiers: pci_device_id
+
Delaying, scheduling, and timer routines
----------------------------------------
@@ -55,15 +57,6 @@ High-resolution timers
.. kernel-doc:: kernel/time/hrtimer.c
:export:
-Workqueues and Kevents
-----------------------
-
-.. kernel-doc:: include/linux/workqueue.h
- :internal:
-
-.. kernel-doc:: kernel/workqueue.c
- :export:
-
Internal Functions
------------------
@@ -105,19 +98,15 @@ Kernel utility functions
.. kernel-doc:: include/linux/kernel.h
:internal:
+ :no-identifiers: kstrtol kstrtoul
.. kernel-doc:: kernel/printk/printk.c
:export:
+ :no-identifiers: printk
.. kernel-doc:: kernel/panic.c
:export:
-.. kernel-doc:: kernel/rcu/tree.c
- :export:
-
-.. kernel-doc:: kernel/rcu/update.c
- :export:
-
.. kernel-doc:: include/linux/overflow.h
:internal:
diff --git a/Documentation/driver-api/device_link.rst b/Documentation/driver-api/device_link.rst
index bc2d89af88ce..ee913ae16371 100644
--- a/Documentation/driver-api/device_link.rst
+++ b/Documentation/driver-api/device_link.rst
@@ -1,7 +1,3 @@
-.. |struct dev_pm_domain| replace:: :c:type:`struct dev_pm_domain <dev_pm_domain>`
-.. |struct generic_pm_domain| replace:: :c:type:`struct generic_pm_domain <generic_pm_domain>`
-
-
.. _device_link:
============
@@ -166,7 +162,7 @@ Examples
is the same as if the MMU was the parent of the master device.
The fact that both devices share the same power domain would normally
- suggest usage of a |struct dev_pm_domain| or |struct generic_pm_domain|,
+ suggest usage of a struct dev_pm_domain or struct generic_pm_domain,
however these are not independent devices that happen to share a power
switch, but rather the MMU device serves the busmaster device and is
useless without it. A device link creates a synthetic hierarchical
@@ -202,7 +198,7 @@ Examples
Alternatives
============
-* A |struct dev_pm_domain| can be used to override the bus,
+* A struct dev_pm_domain can be used to override the bus,
class or device type callbacks. It is intended for devices sharing
a single on/off switch, however it does not guarantee a specific
suspend/resume ordering, this needs to be implemented separately.
@@ -211,7 +207,7 @@ Alternatives
suspended. Furthermore it cannot be used to enforce a specific shutdown
ordering or a driver presence dependency.
-* A |struct generic_pm_domain| is a lot more heavyweight than a
+* A struct generic_pm_domain is a lot more heavyweight than a
device link and does not allow for shutdown ordering or driver presence
dependencies. It also cannot be used on ACPI systems.
@@ -321,5 +317,4 @@ State machine
API
===
-.. kernel-doc:: drivers/base/core.c
- :functions: device_link_add device_link_del device_link_remove
+See device_link_add(), device_link_del() and device_link_remove().
diff --git a/Documentation/driver-api/fpga/fpga-bridge.rst b/Documentation/driver-api/fpga/fpga-bridge.rst
index ccd677ba7d76..198aadafd3e7 100644
--- a/Documentation/driver-api/fpga/fpga-bridge.rst
+++ b/Documentation/driver-api/fpga/fpga-bridge.rst
@@ -4,8 +4,8 @@ FPGA Bridge
API to implement a new FPGA bridge
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-* struct :c:type:`fpga_bridge` — The FPGA Bridge structure
-* struct :c:type:`fpga_bridge_ops` — Low level Bridge driver ops
+* struct fpga_bridge — The FPGA Bridge structure
+* struct fpga_bridge_ops — Low level Bridge driver ops
* devm_fpga_bridge_create() — Allocate and init a bridge struct
* fpga_bridge_register() — Register a bridge
* fpga_bridge_unregister() — Unregister a bridge
diff --git a/Documentation/driver-api/fpga/fpga-mgr.rst b/Documentation/driver-api/fpga/fpga-mgr.rst
index af5382af1379..917ee22db429 100644
--- a/Documentation/driver-api/fpga/fpga-mgr.rst
+++ b/Documentation/driver-api/fpga/fpga-mgr.rst
@@ -101,9 +101,9 @@ in state.
API for implementing a new FPGA Manager driver
----------------------------------------------
-* ``fpga_mgr_states`` — Values for :c:member:`fpga_manager->state`.
-* struct :c:type:`fpga_manager` — the FPGA manager struct
-* struct :c:type:`fpga_manager_ops` — Low level FPGA manager driver ops
+* ``fpga_mgr_states`` — Values for :c:expr:`fpga_manager->state`.
+* struct fpga_manager — the FPGA manager struct
+* struct fpga_manager_ops — Low level FPGA manager driver ops
* devm_fpga_mgr_create() — Allocate and init a manager struct
* fpga_mgr_register() — Register an FPGA manager
* fpga_mgr_unregister() — Unregister an FPGA manager
diff --git a/Documentation/driver-api/fpga/fpga-programming.rst b/Documentation/driver-api/fpga/fpga-programming.rst
index f487ad64dfb9..002392dab04f 100644
--- a/Documentation/driver-api/fpga/fpga-programming.rst
+++ b/Documentation/driver-api/fpga/fpga-programming.rst
@@ -15,7 +15,7 @@ the FPGA manager and bridges. It will:
* lock the mutex of the region's FPGA manager
* build a list of FPGA bridges if a method has been specified to do so
* disable the bridges
- * program the FPGA using info passed in :c:member:`fpga_region->info`.
+ * program the FPGA using info passed in :c:expr:`fpga_region->info`.
* re-enable the bridges
* release the locks
diff --git a/Documentation/driver-api/fpga/fpga-region.rst b/Documentation/driver-api/fpga/fpga-region.rst
index 31118a8ba218..363a8171ab0a 100644
--- a/Documentation/driver-api/fpga/fpga-region.rst
+++ b/Documentation/driver-api/fpga/fpga-region.rst
@@ -45,7 +45,7 @@ An example of usage can be seen in the probe function of [#f2]_.
API to add a new FPGA region
----------------------------
-* struct :c:type:`fpga_region` — The FPGA region struct
+* struct fpga_region — The FPGA region struct
* devm_fpga_region_create() — Allocate and init a region struct
* fpga_region_register() — Register an FPGA region
* fpga_region_unregister() — Unregister an FPGA region
@@ -61,9 +61,9 @@ during the region's probe function.
The FPGA region will need to specify which bridges to control while programming
the FPGA. The region driver can build a list of bridges during probe time
-(:c:member:`fpga_region->bridge_list`) or it can have a function that creates
+(:c:expr:`fpga_region->bridge_list`) or it can have a function that creates
the list of bridges to program just before programming
-(:c:member:`fpga_region->get_bridges`). The FPGA bridge framework supplies the
+(:c:expr:`fpga_region->get_bridges`). The FPGA bridge framework supplies the
following APIs to handle building or tearing down that list.
* fpga_bridge_get_to_list() — Get a ref of an FPGA bridge, add it to a
diff --git a/Documentation/driver-api/iio/buffers.rst b/Documentation/driver-api/iio/buffers.rst
index dd64c9c5fb1e..3ddebddc02ca 100644
--- a/Documentation/driver-api/iio/buffers.rst
+++ b/Documentation/driver-api/iio/buffers.rst
@@ -2,7 +2,7 @@
Buffers
=======
-* struct :c:type:`iio_buffer` — general buffer structure
+* struct iio_buffer — general buffer structure
* :c:func:`iio_validate_scan_mask_onehot` — Validates that exactly one channel
is selected
* :c:func:`iio_buffer_get` — Grab a reference to the buffer
diff --git a/Documentation/driver-api/iio/core.rst b/Documentation/driver-api/iio/core.rst
index 51b21e002396..715cf29482a1 100644
--- a/Documentation/driver-api/iio/core.rst
+++ b/Documentation/driver-api/iio/core.rst
@@ -10,7 +10,7 @@ applications manipulating sensors. The implementation can be found under
Industrial I/O Devices
----------------------
-* struct :c:type:`iio_dev` - industrial I/O device
+* struct iio_dev - industrial I/O device
* iio_device_alloc() - allocate an :c:type:`iio_dev` from a driver
* iio_device_free() - free an :c:type:`iio_dev` from a driver
* iio_device_register() - register a device with the IIO subsystem
@@ -66,7 +66,7 @@ Common attributes are:
IIO device channels
===================
-struct :c:type:`iio_chan_spec` - specification of a single channel
+struct iio_chan_spec - specification of a single channel
An IIO device channel is a representation of a data channel. An IIO device can
have one or multiple channels. For example:
@@ -77,7 +77,7 @@ have one or multiple channels. For example:
* an accelerometer can have up to 3 channels representing acceleration on X, Y
and Z axes.
-An IIO channel is described by the struct :c:type:`iio_chan_spec`.
+An IIO channel is described by the struct iio_chan_spec.
A thermometer driver for the temperature sensor in the example above would
have to describe its channel as follows::
diff --git a/Documentation/driver-api/iio/hw-consumer.rst b/Documentation/driver-api/iio/hw-consumer.rst
index 819fb9edc005..76133a3796f2 100644
--- a/Documentation/driver-api/iio/hw-consumer.rst
+++ b/Documentation/driver-api/iio/hw-consumer.rst
@@ -8,7 +8,7 @@ software buffer for data. The implementation can be found under
:file:`drivers/iio/buffer/hw-consumer.c`
-* struct :c:type:`iio_hw_consumer` — Hardware consumer structure
+* struct iio_hw_consumer — Hardware consumer structure
* :c:func:`iio_hw_consumer_alloc` — Allocate IIO hardware consumer
* :c:func:`iio_hw_consumer_free` — Free IIO hardware consumer
* :c:func:`iio_hw_consumer_enable` — Enable IIO hardware consumer
diff --git a/Documentation/driver-api/iio/triggered-buffers.rst b/Documentation/driver-api/iio/triggered-buffers.rst
index 0db12660cc90..417555dbbdf4 100644
--- a/Documentation/driver-api/iio/triggered-buffers.rst
+++ b/Documentation/driver-api/iio/triggered-buffers.rst
@@ -10,7 +10,7 @@ IIO triggered buffer setup
* :c:func:`iio_triggered_buffer_setup` — Setup triggered buffer and pollfunc
* :c:func:`iio_triggered_buffer_cleanup` — Free resources allocated by
:c:func:`iio_triggered_buffer_setup`
-* struct :c:type:`iio_buffer_setup_ops` — buffer setup related callbacks
+* struct iio_buffer_setup_ops — buffer setup related callbacks
A typical triggered buffer setup looks like this::
diff --git a/Documentation/driver-api/iio/triggers.rst b/Documentation/driver-api/iio/triggers.rst
index dfd7ba3eabde..288625e40672 100644
--- a/Documentation/driver-api/iio/triggers.rst
+++ b/Documentation/driver-api/iio/triggers.rst
@@ -2,7 +2,7 @@
Triggers
========
-* struct :c:type:`iio_trigger` — industrial I/O trigger device
+* struct iio_trigger — industrial I/O trigger device
* :c:func:`devm_iio_trigger_alloc` — Resource-managed iio_trigger_alloc
* :c:func:`devm_iio_trigger_register` — Resource-managed iio_trigger_register
iio_trigger_unregister
@@ -63,7 +63,7 @@ Let's see a simple example of how to setup a trigger to be used by a driver::
IIO trigger ops
===============
-* struct :c:type:`iio_trigger_ops` — operations structure for an iio_trigger.
+* struct iio_trigger_ops — operations structure for an iio_trigger.
Notice that a trigger has a set of operations attached:
diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index 6e7c702e0268..987d6e74ea6a 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -27,7 +27,6 @@ available subsections can be seen below.
component
message-based
infiniband
- sound
frame-buffer
regulator
iio/index
diff --git a/Documentation/driver-api/infrastructure.rst b/Documentation/driver-api/infrastructure.rst
index 06d98c4526df..683bd460e222 100644
--- a/Documentation/driver-api/infrastructure.rst
+++ b/Documentation/driver-api/infrastructure.rst
@@ -6,6 +6,7 @@ The Basic Device Driver-Model Structures
.. kernel-doc:: include/linux/device.h
:internal:
+ :no-identifiers: device_link_state
Device Drivers Base
-------------------
@@ -28,9 +29,6 @@ Device Drivers Base
.. kernel-doc:: drivers/base/node.c
:internal:
-.. kernel-doc:: drivers/base/firmware_loader/main.c
- :export:
-
.. kernel-doc:: drivers/base/transport_class.c
:export:
diff --git a/Documentation/driver-api/libata.rst b/Documentation/driver-api/libata.rst
index e2f87b82b074..d477e296bda5 100644
--- a/Documentation/driver-api/libata.rst
+++ b/Documentation/driver-api/libata.rst
@@ -508,7 +508,7 @@ also complete commands.
2. ATA_QCFLAG_ACTIVE is cleared from qc->flags.
-3. :c:func:`qc->complete_fn` callback is invoked. If the return value of the
+3. :c:expr:`qc->complete_fn` callback is invoked. If the return value of the
callback is not zero. Completion is short circuited and
:c:func:`ata_qc_complete` returns.
diff --git a/Documentation/driver-api/media/cec-core.rst b/Documentation/driver-api/media/cec-core.rst
index 03016eeaf8f4..bc42982ac21e 100644
--- a/Documentation/driver-api/media/cec-core.rst
+++ b/Documentation/driver-api/media/cec-core.rst
@@ -98,7 +98,7 @@ Implementing the Low-Level CEC Adapter
The following low-level adapter operations have to be implemented in
your driver:
-.. c:type:: struct cec_adap_ops
+.. c:struct:: cec_adap_ops
.. code-block:: none
diff --git a/Documentation/driver-api/media/dtv-frontend.rst b/Documentation/driver-api/media/dtv-frontend.rst
index b362109bb131..91f77fe58e83 100644
--- a/Documentation/driver-api/media/dtv-frontend.rst
+++ b/Documentation/driver-api/media/dtv-frontend.rst
@@ -125,7 +125,7 @@ responsible for tuning the device. It supports multiple algorithms to
detect a channel, as defined at enum :c:func:`dvbfe_algo`.
The algorithm to be used is obtained via ``.get_frontend_algo``. If the driver
-doesn't fill its field at struct :c:type:`dvb_frontend_ops`, it will default to
+doesn't fill its field at struct dvb_frontend_ops, it will default to
``DVBFE_ALGO_SW``, meaning that the dvb-core will do a zigzag when tuning,
e. g. it will try first to use the specified center frequency ``f``,
then, it will do ``f`` + |delta|, ``f`` - |delta|, ``f`` + 2 x |delta|,
@@ -140,7 +140,7 @@ define a ``.get_frontend_algo`` function that would return ``DVBFE_ALGO_HW``.
a third type (``DVBFE_ALGO_CUSTOM``), in order to allow the driver to
define its own hardware-assisted algorithm. Very few hardware need to
use it nowadays. Using ``DVBFE_ALGO_CUSTOM`` require to provide other
- function callbacks at struct :c:type:`dvb_frontend_ops`.
+ function callbacks at struct dvb_frontend_ops.
Attaching frontend driver to the bridge driver
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/Documentation/driver-api/media/mc-core.rst b/Documentation/driver-api/media/mc-core.rst
index 05bba0b61748..57b5bbba944e 100644
--- a/Documentation/driver-api/media/mc-core.rst
+++ b/Documentation/driver-api/media/mc-core.rst
@@ -36,7 +36,7 @@ pad to a sink pad.
Media device
^^^^^^^^^^^^
-A media device is represented by a struct :c:type:`media_device`
+A media device is represented by a struct media_device
instance, defined in ``include/media/media-device.h``.
Allocation of the structure is handled by the media device driver, usually by
embedding the :c:type:`media_device` instance in a larger driver-specific
@@ -49,7 +49,7 @@ and unregistered by calling :c:func:`media_device_unregister()`.
Entities
^^^^^^^^
-Entities are represented by a struct :c:type:`media_entity`
+Entities are represented by a struct media_entity
instance, defined in ``include/media/media-entity.h``. The structure is usually
embedded into a higher-level structure, such as
:c:type:`v4l2_subdev` or :c:type:`video_device`
@@ -67,10 +67,10 @@ Interfaces
^^^^^^^^^^
Interfaces are represented by a
-struct :c:type:`media_interface` instance, defined in
+struct media_interface instance, defined in
``include/media/media-entity.h``. Currently, only one type of interface is
defined: a device node. Such interfaces are represented by a
-struct :c:type:`media_intf_devnode`.
+struct media_intf_devnode.
Drivers initialize and create device node interfaces by calling
:c:func:`media_devnode_create()`
@@ -79,7 +79,7 @@ and remove them by calling:
Pads
^^^^
-Pads are represented by a struct :c:type:`media_pad` instance,
+Pads are represented by a struct media_pad instance,
defined in ``include/media/media-entity.h``. Each entity stores its pads in
a pads array managed by the entity driver. Drivers usually embed the array in
a driver-specific structure.
@@ -87,8 +87,8 @@ a driver-specific structure.
Pads are identified by their entity and their 0-based index in the pads
array.
-Both information are stored in the struct :c:type:`media_pad`,
-making the struct :c:type:`media_pad` pointer the canonical way
+Both information are stored in the struct media_pad,
+making the struct media_pad pointer the canonical way
to store and pass link references.
Pads have flags that describe the pad capabilities and state.
@@ -104,7 +104,7 @@ Pads have flags that describe the pad capabilities and state.
Links
^^^^^
-Links are represented by a struct :c:type:`media_link` instance,
+Links are represented by a struct media_link instance,
defined in ``include/media/media-entity.h``. There are two types of links:
**1. pad to pad links**:
@@ -187,7 +187,7 @@ Use count and power handling
Due to the wide differences between drivers regarding power management
needs, the media controller does not implement power management. However,
-the struct :c:type:`media_entity` includes a ``use_count``
+the struct media_entity includes a ``use_count``
field that media drivers
can use to track the number of users of every entity for power management
needs.
@@ -213,11 +213,11 @@ prevent link states from being modified during streaming by calling
The function will mark all entities connected to the given entity through
enabled links, either directly or indirectly, as streaming.
-The struct :c:type:`media_pipeline` instance pointed to by
+The struct media_pipeline instance pointed to by
the pipe argument will be stored in every entity in the pipeline.
-Drivers should embed the struct :c:type:`media_pipeline`
+Drivers should embed the struct media_pipeline
in higher-level pipeline structures and can then access the
-pipeline through the struct :c:type:`media_entity`
+pipeline through the struct media_entity
pipe field.
Calls to :c:func:`media_pipeline_start()` can be nested.
diff --git a/Documentation/driver-api/media/v4l2-controls.rst b/Documentation/driver-api/media/v4l2-controls.rst
index 5129019afb49..77f42ea3bac7 100644
--- a/Documentation/driver-api/media/v4l2-controls.rst
+++ b/Documentation/driver-api/media/v4l2-controls.rst
@@ -27,7 +27,7 @@ V4L2 specification with respect to controls in a central place. And to make
life as easy as possible for the driver developer.
Note that the control framework relies on the presence of a struct
-:c:type:`v4l2_device` for V4L2 drivers and struct :c:type:`v4l2_subdev` for
+:c:type:`v4l2_device` for V4L2 drivers and struct v4l2_subdev for
sub-device drivers.
diff --git a/Documentation/driver-api/media/v4l2-dev.rst b/Documentation/driver-api/media/v4l2-dev.rst
index 63c064837c00..666330af31ed 100644
--- a/Documentation/driver-api/media/v4l2-dev.rst
+++ b/Documentation/driver-api/media/v4l2-dev.rst
@@ -67,7 +67,7 @@ You should also set these fields of :c:type:`video_device`:
file operation is called this lock will be taken by the core and released
afterwards. See the next section for more details.
-- :c:type:`video_device`->queue: a pointer to the struct :c:type:`vb2_queue`
+- :c:type:`video_device`->queue: a pointer to the struct vb2_queue
associated with this device node.
If queue is not ``NULL``, and queue->lock is not ``NULL``, then queue->lock
is used for the queuing ioctls (``VIDIOC_REQBUFS``, ``CREATE_BUFS``,
@@ -81,7 +81,7 @@ You should also set these fields of :c:type:`video_device`:
- :c:type:`video_device`->prio: keeps track of the priorities. Used to
implement ``VIDIOC_G_PRIORITY`` and ``VIDIOC_S_PRIORITY``.
- If left to ``NULL``, then it will use the struct :c:type:`v4l2_prio_state`
+ If left to ``NULL``, then it will use the struct v4l2_prio_state
in :c:type:`v4l2_device`. If you want to have a separate priority state per
(group of) device node(s), then you can point it to your own struct
:c:type:`v4l2_prio_state`.
@@ -95,7 +95,7 @@ You should also set these fields of :c:type:`video_device`:
but it is used by both a raw video PCI device (cx8800) and a MPEG PCI device
(cx8802). Since the :c:type:`v4l2_device` cannot be associated with two PCI
devices at the same time it is setup without a parent device. But when the
- struct :c:type:`video_device` is initialized you **do** know which parent
+ struct video_device is initialized you **do** know which parent
PCI device to use and so you set ``dev_device`` to the correct PCI device.
If you use :c:type:`v4l2_ioctl_ops`, then you should set
@@ -138,7 +138,7 @@ ioctls and locking
------------------
The V4L core provides optional locking services. The main service is the
-lock field in struct :c:type:`video_device`, which is a pointer to a mutex.
+lock field in struct video_device, which is a pointer to a mutex.
If you set this pointer, then that will be used by unlocked_ioctl to
serialize all ioctls.
diff --git a/Documentation/driver-api/media/v4l2-device.rst b/Documentation/driver-api/media/v4l2-device.rst
index 5e25bf182c18..7bd9c45f551b 100644
--- a/Documentation/driver-api/media/v4l2-device.rst
+++ b/Documentation/driver-api/media/v4l2-device.rst
@@ -3,7 +3,7 @@
V4L2 device instance
--------------------
-Each device instance is represented by a struct :c:type:`v4l2_device`.
+Each device instance is represented by a struct v4l2_device.
Very simple devices can just allocate this struct, but most of the time you
would embed this struct inside a larger struct.
@@ -18,9 +18,9 @@ dev->driver_data field is ``NULL``, it will be linked to
Drivers that want integration with the media device framework need to set
dev->driver_data manually to point to the driver-specific device structure
-that embed the struct :c:type:`v4l2_device` instance. This is achieved by a
+that embed the struct v4l2_device instance. This is achieved by a
``dev_set_drvdata()`` call before registering the V4L2 device instance.
-They must also set the struct :c:type:`v4l2_device` mdev field to point to a
+They must also set the struct v4l2_device mdev field to point to a
properly initialized and registered :c:type:`media_device` instance.
If :c:type:`v4l2_dev <v4l2_device>`\ ->name is empty then it will be set to a
diff --git a/Documentation/driver-api/media/v4l2-event.rst b/Documentation/driver-api/media/v4l2-event.rst
index a4b7ae2b94d8..5b8254eba7da 100644
--- a/Documentation/driver-api/media/v4l2-event.rst
+++ b/Documentation/driver-api/media/v4l2-event.rst
@@ -44,18 +44,18 @@ such objects.
So to summarize:
-- struct :c:type:`v4l2_fh` has two lists: one of the ``subscribed`` events,
+- struct v4l2_fh has two lists: one of the ``subscribed`` events,
and one of the ``available`` events.
-- struct :c:type:`v4l2_subscribed_event` has a ringbuffer of raised
+- struct v4l2_subscribed_event has a ringbuffer of raised
(pending) events of that particular type.
-- If struct :c:type:`v4l2_subscribed_event` is associated with a specific
+- If struct v4l2_subscribed_event is associated with a specific
object, then that object will have an internal list of
- struct :c:type:`v4l2_subscribed_event` so it knows who subscribed an
+ struct v4l2_subscribed_event so it knows who subscribed an
event to that object.
-Furthermore, the internal struct :c:type:`v4l2_subscribed_event` has
+Furthermore, the internal struct v4l2_subscribed_event has
``merge()`` and ``replace()`` callbacks which drivers can set. These
callbacks are called when a new event is raised and there is no more room.
diff --git a/Documentation/driver-api/media/v4l2-fh.rst b/Documentation/driver-api/media/v4l2-fh.rst
index 4c62b19af744..3eeaa8da0c9e 100644
--- a/Documentation/driver-api/media/v4l2-fh.rst
+++ b/Documentation/driver-api/media/v4l2-fh.rst
@@ -3,11 +3,11 @@
V4L2 File handlers
------------------
-struct :c:type:`v4l2_fh` provides a way to easily keep file handle specific
+struct v4l2_fh provides a way to easily keep file handle specific
data that is used by the V4L2 framework.
.. attention::
- New drivers must use struct :c:type:`v4l2_fh`
+ New drivers must use struct v4l2_fh
since it is also used to implement priority handling
(:ref:`VIDIOC_G_PRIORITY`).
@@ -16,11 +16,11 @@ whether a driver uses :c:type:`v4l2_fh` as its ``file->private_data`` pointer
by testing the ``V4L2_FL_USES_V4L2_FH`` bit in :c:type:`video_device`->flags.
This bit is set whenever :c:func:`v4l2_fh_init` is called.
-struct :c:type:`v4l2_fh` is allocated as a part of the driver's own file handle
+struct v4l2_fh is allocated as a part of the driver's own file handle
structure and ``file->private_data`` is set to it in the driver's ``open()``
function by the driver.
-In many cases the struct :c:type:`v4l2_fh` will be embedded in a larger
+In many cases the struct v4l2_fh will be embedded in a larger
structure. In that case you should call:
#) :c:func:`v4l2_fh_init` and :c:func:`v4l2_fh_add` in ``open()``
@@ -102,18 +102,18 @@ Below is a short description of the :c:type:`v4l2_fh` functions used:
memory can be freed.
-If struct :c:type:`v4l2_fh` is not embedded, then you can use these helper functions:
+If struct v4l2_fh is not embedded, then you can use these helper functions:
:c:func:`v4l2_fh_open <v4l2_fh_open>`
(struct file \*filp)
-- This allocates a struct :c:type:`v4l2_fh`, initializes it and adds it to
- the struct :c:type:`video_device` associated with the file struct.
+- This allocates a struct v4l2_fh, initializes it and adds it to
+ the struct video_device associated with the file struct.
:c:func:`v4l2_fh_release <v4l2_fh_release>`
(struct file \*filp)
-- This deletes it from the struct :c:type:`video_device` associated with the
+- This deletes it from the struct video_device associated with the
file struct, uninitialised the :c:type:`v4l2_fh` and frees it.
These two functions can be plugged into the v4l2_file_operation's ``open()``
diff --git a/Documentation/driver-api/media/v4l2-subdev.rst b/Documentation/driver-api/media/v4l2-subdev.rst
index 6248ea99e979..bb5b1a7cdfd9 100644
--- a/Documentation/driver-api/media/v4l2-subdev.rst
+++ b/Documentation/driver-api/media/v4l2-subdev.rst
@@ -110,7 +110,7 @@ pads:
err = media_entity_pads_init(&sd->entity, npads, pads);
The pads array must have been previously initialized. There is no need to
-manually set the struct :c:type:`media_entity` function and name fields, but the
+manually set the struct media_entity function and name fields, but the
revision field must be initialized if needed.
A reference to the entity will be automatically acquired/released when the
diff --git a/Documentation/driver-api/mei/mei.rst b/Documentation/driver-api/mei/mei.rst
index cea0b69ec216..4f2ced4ccdc6 100644
--- a/Documentation/driver-api/mei/mei.rst
+++ b/Documentation/driver-api/mei/mei.rst
@@ -38,7 +38,7 @@ Because some of the Intel ME features can change the system
configuration, the driver by default allows only a privileged
user to access it.
-The session is terminated calling :c:func:`close(int fd)`.
+The session is terminated calling :c:expr:`close(fd)`.
A code snippet for an application communicating with Intel AMTHI client:
diff --git a/Documentation/driver-api/pm/cpuidle.rst b/Documentation/driver-api/pm/cpuidle.rst
index 3588bf078566..d477208604b8 100644
--- a/Documentation/driver-api/pm/cpuidle.rst
+++ b/Documentation/driver-api/pm/cpuidle.rst
@@ -1,11 +1,6 @@
.. SPDX-License-Identifier: GPL-2.0
.. include:: <isonum.txt>
-.. |struct cpuidle_governor| replace:: :c:type:`struct cpuidle_governor <cpuidle_governor>`
-.. |struct cpuidle_device| replace:: :c:type:`struct cpuidle_device <cpuidle_device>`
-.. |struct cpuidle_driver| replace:: :c:type:`struct cpuidle_driver <cpuidle_driver>`
-.. |struct cpuidle_state| replace:: :c:type:`struct cpuidle_state <cpuidle_state>`
-
========================
CPU Idle Time Management
========================
@@ -54,7 +49,7 @@ platform that the Linux kernel can run on. For this reason, data structures
operated on by them cannot depend on any hardware architecture or platform
design details as well.
-The governor itself is represented by a |struct cpuidle_governor| object
+The governor itself is represented by a struct cpuidle_governor object
containing four callback pointers, :c:member:`enable`, :c:member:`disable`,
:c:member:`select`, :c:member:`reflect`, a :c:member:`rating` field described
below, and a name (string) used for identifying it.
@@ -83,11 +78,11 @@ callbacks:
int (*enable) (struct cpuidle_driver *drv, struct cpuidle_device *dev);
The role of this callback is to prepare the governor for handling the
- (logical) CPU represented by the |struct cpuidle_device| object pointed
- to by the ``dev`` argument. The |struct cpuidle_driver| object pointed
+ (logical) CPU represented by the struct cpuidle_device object pointed
+ to by the ``dev`` argument. The struct cpuidle_driver object pointed
to by the ``drv`` argument represents the ``CPUIdle`` driver to be used
with that CPU (among other things, it should contain the list of
- |struct cpuidle_state| objects representing idle states that the
+ struct cpuidle_state objects representing idle states that the
processor holding the given CPU can be asked to enter).
It may fail, in which case it is expected to return a negative error
@@ -102,7 +97,7 @@ callbacks:
void (*disable) (struct cpuidle_driver *drv, struct cpuidle_device *dev);
Called to make the governor stop handling the (logical) CPU represented
- by the |struct cpuidle_device| object pointed to by the ``dev``
+ by the struct cpuidle_device object pointed to by the ``dev``
argument.
It is expected to reverse any changes made by the ``->enable()``
@@ -116,12 +111,12 @@ callbacks:
bool *stop_tick);
Called to select an idle state for the processor holding the (logical)
- CPU represented by the |struct cpuidle_device| object pointed to by the
+ CPU represented by the struct cpuidle_device object pointed to by the
``dev`` argument.
The list of idle states to take into consideration is represented by the
- :c:member:`states` array of |struct cpuidle_state| objects held by the
- |struct cpuidle_driver| object pointed to by the ``drv`` argument (which
+ :c:member:`states` array of struct cpuidle_state objects held by the
+ struct cpuidle_driver object pointed to by the ``drv`` argument (which
represents the ``CPUIdle`` driver to be used with the CPU at hand). The
value returned by this callback is interpreted as an index into that
array (unless it is a negative error code).
@@ -136,7 +131,7 @@ callbacks:
asking the processor to enter the idle state).
This callback is mandatory (i.e. the :c:member:`select` callback pointer
- in |struct cpuidle_governor| must not be ``NULL`` for the registration
+ in struct cpuidle_governor must not be ``NULL`` for the registration
of the governor to succeed).
:c:member:`reflect`
@@ -167,21 +162,21 @@ CPU idle time management (``CPUIdle``) drivers provide an interface between the
other parts of ``CPUIdle`` and the hardware.
First of all, a ``CPUIdle`` driver has to populate the :c:member:`states` array
-of |struct cpuidle_state| objects included in the |struct cpuidle_driver| object
+of struct cpuidle_state objects included in the struct cpuidle_driver object
representing it. Going forward this array will represent the list of available
idle states that the processor hardware can be asked to enter shared by all of
the logical CPUs handled by the given driver.
The entries in the :c:member:`states` array are expected to be sorted by the
-value of the :c:member:`target_residency` field in |struct cpuidle_state| in
+value of the :c:member:`target_residency` field in struct cpuidle_state in
the ascending order (that is, index 0 should correspond to the idle state with
the minimum value of :c:member:`target_residency`). [Since the
:c:member:`target_residency` value is expected to reflect the "depth" of the
-idle state represented by the |struct cpuidle_state| object holding it, this
+idle state represented by the struct cpuidle_state object holding it, this
sorting order should be the same as the ascending sorting order by the idle
state "depth".]
-Three fields in |struct cpuidle_state| are used by the existing ``CPUIdle``
+Three fields in struct cpuidle_state are used by the existing ``CPUIdle``
governors for computations related to idle state selection:
:c:member:`target_residency`
@@ -203,7 +198,7 @@ governors for computations related to idle state selection:
any idle state at all. [There are other flags used by the ``CPUIdle``
core in special situations.]
-The :c:member:`enter` callback pointer in |struct cpuidle_state|, which must not
+The :c:member:`enter` callback pointer in struct cpuidle_state, which must not
be ``NULL``, points to the routine to execute in order to ask the processor to
enter this particular idle state:
@@ -212,14 +207,14 @@ enter this particular idle state:
void (*enter) (struct cpuidle_device *dev, struct cpuidle_driver *drv,
int index);
-The first two arguments of it point to the |struct cpuidle_device| object
+The first two arguments of it point to the struct cpuidle_device object
representing the logical CPU running this callback and the
-|struct cpuidle_driver| object representing the driver itself, respectively,
-and the last one is an index of the |struct cpuidle_state| entry in the driver's
+struct cpuidle_driver object representing the driver itself, respectively,
+and the last one is an index of the struct cpuidle_state entry in the driver's
:c:member:`states` array representing the idle state to ask the processor to
enter.
-The analogous ``->enter_s2idle()`` callback in |struct cpuidle_state| is used
+The analogous ``->enter_s2idle()`` callback in struct cpuidle_state is used
only for implementing the suspend-to-idle system-wide power management feature.
The difference between in and ``->enter()`` is that it must not re-enable
interrupts at any point (even temporarily) or attempt to change the states of
@@ -227,48 +222,48 @@ clock event devices, which the ``->enter()`` callback may do sometimes.
Once the :c:member:`states` array has been populated, the number of valid
entries in it has to be stored in the :c:member:`state_count` field of the
-|struct cpuidle_driver| object representing the driver. Moreover, if any
+struct cpuidle_driver object representing the driver. Moreover, if any
entries in the :c:member:`states` array represent "coupled" idle states (that
is, idle states that can only be asked for if multiple related logical CPUs are
-idle), the :c:member:`safe_state_index` field in |struct cpuidle_driver| needs
+idle), the :c:member:`safe_state_index` field in struct cpuidle_driver needs
to be the index of an idle state that is not "coupled" (that is, one that can be
asked for if only one logical CPU is idle).
In addition to that, if the given ``CPUIdle`` driver is only going to handle a
subset of logical CPUs in the system, the :c:member:`cpumask` field in its
-|struct cpuidle_driver| object must point to the set (mask) of CPUs that will be
+struct cpuidle_driver object must point to the set (mask) of CPUs that will be
handled by it.
A ``CPUIdle`` driver can only be used after it has been registered. If there
are no "coupled" idle state entries in the driver's :c:member:`states` array,
-that can be accomplished by passing the driver's |struct cpuidle_driver| object
+that can be accomplished by passing the driver's struct cpuidle_driver object
to :c:func:`cpuidle_register_driver()`. Otherwise, :c:func:`cpuidle_register()`
should be used for this purpose.
-However, it also is necessary to register |struct cpuidle_device| objects for
+However, it also is necessary to register struct cpuidle_device objects for
all of the logical CPUs to be handled by the given ``CPUIdle`` driver with the
help of :c:func:`cpuidle_register_device()` after the driver has been registered
and :c:func:`cpuidle_register_driver()`, unlike :c:func:`cpuidle_register()`,
does not do that automatically. For this reason, the drivers that use
:c:func:`cpuidle_register_driver()` to register themselves must also take care
-of registering the |struct cpuidle_device| objects as needed, so it is generally
+of registering the struct cpuidle_device objects as needed, so it is generally
recommended to use :c:func:`cpuidle_register()` for ``CPUIdle`` driver
registration in all cases.
-The registration of a |struct cpuidle_device| object causes the ``CPUIdle``
+The registration of a struct cpuidle_device object causes the ``CPUIdle``
``sysfs`` interface to be created and the governor's ``->enable()`` callback to
be invoked for the logical CPU represented by it, so it must take place after
registering the driver that will handle the CPU in question.
-``CPUIdle`` drivers and |struct cpuidle_device| objects can be unregistered
+``CPUIdle`` drivers and struct cpuidle_device objects can be unregistered
when they are not necessary any more which allows some resources associated with
them to be released. Due to dependencies between them, all of the
-|struct cpuidle_device| objects representing CPUs handled by the given
+struct cpuidle_device objects representing CPUs handled by the given
``CPUIdle`` driver must be unregistered, with the help of
:c:func:`cpuidle_unregister_device()`, before calling
:c:func:`cpuidle_unregister_driver()` to unregister the driver. Alternatively,
:c:func:`cpuidle_unregister()` can be called to unregister a ``CPUIdle`` driver
-along with all of the |struct cpuidle_device| objects representing CPUs handled
+along with all of the struct cpuidle_device objects representing CPUs handled
by it.
``CPUIdle`` drivers can respond to runtime system configuration changes that
@@ -277,8 +272,8 @@ happen, for example, when the system's power source is switched from AC to
battery or the other way around). Upon a notification of such a change,
a ``CPUIdle`` driver is expected to call :c:func:`cpuidle_pause_and_lock()` to
turn ``CPUIdle`` off temporarily and then :c:func:`cpuidle_disable_device()` for
-all of the |struct cpuidle_device| objects representing CPUs affected by that
+all of the struct cpuidle_device objects representing CPUs affected by that
change. Next, it can update its :c:member:`states` array in accordance with
the new configuration of the system, call :c:func:`cpuidle_enable_device()` for
-all of the relevant |struct cpuidle_device| objects and invoke
+all of the relevant struct cpuidle_device objects and invoke
:c:func:`cpuidle_resume_and_unlock()` to allow ``CPUIdle`` to be used again.
diff --git a/Documentation/driver-api/pm/devices.rst b/Documentation/driver-api/pm/devices.rst
index 946ad0b94e31..6b3bfd29fd84 100644
--- a/Documentation/driver-api/pm/devices.rst
+++ b/Documentation/driver-api/pm/devices.rst
@@ -1,14 +1,6 @@
.. SPDX-License-Identifier: GPL-2.0
.. include:: <isonum.txt>
-.. |struct dev_pm_ops| replace:: :c:type:`struct dev_pm_ops <dev_pm_ops>`
-.. |struct dev_pm_domain| replace:: :c:type:`struct dev_pm_domain <dev_pm_domain>`
-.. |struct bus_type| replace:: :c:type:`struct bus_type <bus_type>`
-.. |struct device_type| replace:: :c:type:`struct device_type <device_type>`
-.. |struct class| replace:: :c:type:`struct class <class>`
-.. |struct wakeup_source| replace:: :c:type:`struct wakeup_source <wakeup_source>`
-.. |struct device| replace:: :c:type:`struct device <device>`
-
.. _driverapi_pm_devices:
==============================
@@ -107,7 +99,7 @@ Device Power Management Operations
Device power management operations, at the subsystem level as well as at the
device driver level, are implemented by defining and populating objects of type
-|struct dev_pm_ops| defined in :file:`include/linux/pm.h`. The roles of the
+struct dev_pm_ops defined in :file:`include/linux/pm.h`. The roles of the
methods included in it will be explained in what follows. For now, it should be
sufficient to remember that the last three methods are specific to runtime power
management while the remaining ones are used during system-wide power
@@ -115,7 +107,7 @@ transitions.
There also is a deprecated "old" or "legacy" interface for power management
operations available at least for some subsystems. This approach does not use
-|struct dev_pm_ops| objects and it is suitable only for implementing system
+struct dev_pm_ops objects and it is suitable only for implementing system
sleep power management methods in a limited way. Therefore it is not described
in this document, so please refer directly to the source code for more
information about it.
@@ -125,9 +117,9 @@ Subsystem-Level Methods
-----------------------
The core methods to suspend and resume devices reside in
-|struct dev_pm_ops| pointed to by the :c:member:`ops` member of
-|struct dev_pm_domain|, or by the :c:member:`pm` member of |struct bus_type|,
-|struct device_type| and |struct class|. They are mostly of interest to the
+struct dev_pm_ops pointed to by the :c:member:`ops` member of
+struct dev_pm_domain, or by the :c:member:`pm` member of struct bus_type,
+struct device_type and struct class. They are mostly of interest to the
people writing infrastructure for platforms and buses, like PCI or USB, or
device type and device class drivers. They also are relevant to the writers of
device drivers whose subsystems (PM domains, device types, device classes and
@@ -156,7 +148,7 @@ The :c:member:`power.can_wakeup` flag just records whether the device (and its
driver) can physically support wakeup events. The
:c:func:`device_set_wakeup_capable()` routine affects this flag. The
:c:member:`power.wakeup` field is a pointer to an object of type
-|struct wakeup_source| used for controlling whether or not the device should use
+struct wakeup_source used for controlling whether or not the device should use
its system wakeup mechanism and for notifying the PM core of system wakeup
events signaled by the device. This object is only present for wakeup-capable
devices (i.e. devices whose :c:member:`can_wakeup` flags are set) and is created
@@ -418,7 +410,7 @@ On many platforms they will gate off one or more clock sources; sometimes they
will also switch off power supplies or reduce voltages. [Drivers supporting
runtime PM may already have performed some or all of these steps.]
-If :c:func:`device_may_wakeup(dev)` returns ``true``, the device should be
+If :c:func:`device_may_wakeup()` returns ``true``, the device should be
prepared for generating hardware wakeup signals to trigger a system wakeup event
when the system is in the sleep state. For example, :c:func:`enable_irq_wake()`
might identify GPIO signals hooked up to a switch or other external hardware,
@@ -713,8 +705,8 @@ nested inside another power domain. The nested domain is referred to as the
sub-domain of the parent domain.
Support for power domains is provided through the :c:member:`pm_domain` field of
-|struct device|. This field is a pointer to an object of type
-|struct dev_pm_domain|, defined in :file:`include/linux/pm.h`, providing a set
+struct device. This field is a pointer to an object of type
+struct dev_pm_domain, defined in :file:`include/linux/pm.h`, providing a set
of power management callbacks analogous to the subsystem-level and device driver
callbacks that are executed for the given device during all power transitions,
instead of the respective subsystem-level callbacks. Specifically, if a
diff --git a/Documentation/driver-api/regulator.rst b/Documentation/driver-api/regulator.rst
index 520da0a5251d..b43c78eb24d8 100644
--- a/Documentation/driver-api/regulator.rst
+++ b/Documentation/driver-api/regulator.rst
@@ -116,7 +116,7 @@ core, providing operations structures to the core. A notifier interface
allows error conditions to be reported to the core.
Registration should be triggered by explicit setup done by the platform,
-supplying a struct :c:type:`regulator_init_data` for the regulator
+supplying a struct regulator_init_data for the regulator
containing constraint and supply information.
Machine interface
@@ -144,7 +144,7 @@ a given system, for example supporting higher supply voltages than the
consumers are rated for.
This is done at driver registration time` by providing a
-struct :c:type:`regulation_constraints`.
+struct regulation_constraints.
The constraints may also specify an initial configuration for the
regulator in the constraints, which is particularly useful for use with
diff --git a/Documentation/driver-api/sound.rst b/Documentation/driver-api/sound.rst
deleted file mode 100644
index afef6eabc073..000000000000
--- a/Documentation/driver-api/sound.rst
+++ /dev/null
@@ -1,54 +0,0 @@
-Sound Devices
-=============
-
-.. kernel-doc:: include/sound/core.h
- :internal:
-
-.. kernel-doc:: sound/sound_core.c
- :export:
-
-.. kernel-doc:: include/sound/pcm.h
- :internal:
-
-.. kernel-doc:: sound/core/pcm.c
- :export:
-
-.. kernel-doc:: sound/core/device.c
- :export:
-
-.. kernel-doc:: sound/core/info.c
- :export:
-
-.. kernel-doc:: sound/core/rawmidi.c
- :export:
-
-.. kernel-doc:: sound/core/sound.c
- :export:
-
-.. kernel-doc:: sound/core/memory.c
- :export:
-
-.. kernel-doc:: sound/core/pcm_memory.c
- :export:
-
-.. kernel-doc:: sound/core/init.c
- :export:
-
-.. kernel-doc:: sound/core/isadma.c
- :export:
-
-.. kernel-doc:: sound/core/control.c
- :export:
-
-.. kernel-doc:: sound/core/pcm_lib.c
- :export:
-
-.. kernel-doc:: sound/core/hwdep.c
- :export:
-
-.. kernel-doc:: sound/core/pcm_native.c
- :export:
-
-.. kernel-doc:: sound/core/memalloc.c
- :export:
-
diff --git a/Documentation/driver-api/target.rst b/Documentation/driver-api/target.rst
index 620ec6173a93..c70ca25171c0 100644
--- a/Documentation/driver-api/target.rst
+++ b/Documentation/driver-api/target.rst
@@ -41,18 +41,6 @@ iSCSI boot information
.. kernel-doc:: drivers/scsi/iscsi_boot_sysfs.c
:export:
-
-iSCSI transport class
-=====================
-
-The file drivers/scsi/scsi_transport_iscsi.c defines transport
-attributes for the iSCSI class, which sends SCSI packets over TCP/IP
-connections.
-
-.. kernel-doc:: drivers/scsi/scsi_transport_iscsi.c
- :export:
-
-
iSCSI TCP interfaces
====================
diff --git a/Documentation/driver-api/usb/URB.rst b/Documentation/driver-api/usb/URB.rst
index 1e4abc896a0d..a182c0f5e38a 100644
--- a/Documentation/driver-api/usb/URB.rst
+++ b/Documentation/driver-api/usb/URB.rst
@@ -47,7 +47,7 @@ called USB Request Block, or URB for short.
The URB structure
=================
-Some of the fields in struct :c:type:`urb` are::
+Some of the fields in struct urb are::
struct urb
{
diff --git a/Documentation/driver-api/usb/gadget.rst b/Documentation/driver-api/usb/gadget.rst
index 3e8a3809c0b8..09396edd6131 100644
--- a/Documentation/driver-api/usb/gadget.rst
+++ b/Documentation/driver-api/usb/gadget.rst
@@ -176,9 +176,9 @@ Kernel Mode Gadget API
Gadget drivers declare themselves through a struct
:c:type:`usb_gadget_driver`, which is responsible for most parts of enumeration
-for a struct :c:type:`usb_gadget`. The response to a set_configuration usually
-involves enabling one or more of the struct :c:type:`usb_ep` objects exposed by
-the gadget, and submitting one or more struct :c:type:`usb_request` buffers to
+for a struct usb_gadget. The response to a set_configuration usually
+involves enabling one or more of the struct usb_ep objects exposed by
+the gadget, and submitting one or more struct usb_request buffers to
transfer data. Understand those four data types, and their operations,
and you will understand how this API works.
@@ -339,8 +339,8 @@ multi-configuration devices (also more than one function, but not
necessarily sharing a given configuration). There is however an optional
framework which makes it easier to reuse and combine functions.
-Devices using this framework provide a struct :c:type:`usb_composite_driver`,
-which in turn provides one or more struct :c:type:`usb_configuration`
+Devices using this framework provide a struct usb_composite_driver,
+which in turn provides one or more struct usb_configuration
instances. Each such configuration includes at least one struct
:c:type:`usb_function`, which packages a user visible role such as "network
link" or "mass storage device". Management functions may also exist,
diff --git a/Documentation/driver-api/usb/hotplug.rst b/Documentation/driver-api/usb/hotplug.rst
index 79663e653ca1..c1e13107c50e 100644
--- a/Documentation/driver-api/usb/hotplug.rst
+++ b/Documentation/driver-api/usb/hotplug.rst
@@ -122,7 +122,7 @@ and their quirks, might have a MODULE_DEVICE_TABLE like this::
Most USB device drivers should pass these tables to the USB subsystem as
well as to the module management subsystem. Not all, though: some driver
frameworks connect using interfaces layered over USB, and so they won't
-need such a struct :c:type:`usb_driver`.
+need such a struct usb_driver.
Drivers that connect directly to the USB subsystem should be declared
something like this::
diff --git a/Documentation/driver-api/usb/typec_bus.rst b/Documentation/driver-api/usb/typec_bus.rst
index 03dfa9c018b7..21c890ae17e5 100644
--- a/Documentation/driver-api/usb/typec_bus.rst
+++ b/Documentation/driver-api/usb/typec_bus.rst
@@ -91,10 +91,16 @@ their control.
Driver API
----------
+Alternate mode structs
+~~~~~~~~~~~~~~~~~~~~~~
+
+.. kernel-doc:: include/linux/usb/typec_altmode.h
+ :functions: typec_altmode_driver typec_altmode_ops
+
Alternate mode driver registering/unregistering
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-.. kernel-doc:: drivers/usb/typec/bus.c
+.. kernel-doc:: include/linux/usb/typec_altmode.h
:functions: typec_altmode_register_driver typec_altmode_unregister_driver
Alternate mode driver operations
diff --git a/Documentation/fault-injection/fault-injection.rst b/Documentation/fault-injection/fault-injection.rst
index f850ad018b70..31ecfe44e5b4 100644
--- a/Documentation/fault-injection/fault-injection.rst
+++ b/Documentation/fault-injection/fault-injection.rst
@@ -16,6 +16,10 @@ Available fault injection capabilities
injects page allocation failures. (alloc_pages(), get_free_pages(), ...)
+- fail_usercopy
+
+ injects failures in user memory access functions. (copy_from_user(), get_user(), ...)
+
- fail_futex
injects futex deadlock and uaddr fault errors.
@@ -177,6 +181,7 @@ use the boot option::
failslab=
fail_page_alloc=
+ fail_usercopy=
fail_make_request=
fail_futex=
mmc_core.fail_request=<interval>,<probability>,<space>,<times>
@@ -222,7 +227,7 @@ How to add new fault injection capability
- debugfs entries
- failslab, fail_page_alloc, and fail_make_request use this way.
+ failslab, fail_page_alloc, fail_usercopy, and fail_make_request use this way.
Helper functions:
fault_create_debugfs_attr(name, parent, attr);
diff --git a/Documentation/features/vm/ioremap_prot/arch-support.txt b/Documentation/features/vm/ioremap_prot/arch-support.txt
index 1cb7406cd858..b5fb37c28cc6 100644
--- a/Documentation/features/vm/ioremap_prot/arch-support.txt
+++ b/Documentation/features/vm/ioremap_prot/arch-support.txt
@@ -24,7 +24,7 @@
| parisc: | TODO |
| powerpc: | ok |
| riscv: | TODO |
- | s390: | TODO |
+ | s390: | ok |
| sh: | ok |
| sparc: | TODO |
| um: | TODO |
diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
index ec8d99703ecb..b8ee761c9922 100644
--- a/Documentation/filesystems/f2fs.rst
+++ b/Documentation/filesystems/f2fs.rst
@@ -127,14 +127,14 @@ active_logs=%u Support configuring the number of active logs. In the
current design, f2fs supports only 2, 4, and 6 logs.
Default number is 6.
disable_ext_identify Disable the extension list configured by mkfs, so f2fs
- does not aware of cold files such as media files.
+ is not aware of cold files such as media files.
inline_xattr Enable the inline xattrs feature.
noinline_xattr Disable the inline xattrs feature.
inline_xattr_size=%u Support configuring inline xattr size, it depends on
flexible inline xattr feature.
-inline_data Enable the inline data feature: New created small(<~3.4k)
+inline_data Enable the inline data feature: Newly created small (<~3.4k)
files can be written into inode block.
-inline_dentry Enable the inline dir feature: data in new created
+inline_dentry Enable the inline dir feature: data in newly created
directory entries can be written into inode block. The
space of inode block which is used to store inline
dentries is limited to ~3.4k.
@@ -203,9 +203,9 @@ usrjquota=<file> Appoint specified file and type during mount, so that quota
grpjquota=<file> information can be properly updated during recovery flow,
prjjquota=<file> <quota file>: must be in root directory;
jqfmt=<quota type> <quota type>: [vfsold,vfsv0,vfsv1].
-offusrjquota Turn off user journelled quota.
-offgrpjquota Turn off group journelled quota.
-offprjjquota Turn off project journelled quota.
+offusrjquota Turn off user journalled quota.
+offgrpjquota Turn off group journalled quota.
+offprjjquota Turn off project journalled quota.
quota Enable plain user disk quota accounting.
noquota Disable all plain disk quota option.
whint_mode=%s Control which write hints are passed down to block
@@ -266,6 +266,8 @@ inlinecrypt When possible, encrypt/decrypt the contents of encrypted
inline encryption hardware. The on-disk format is
unaffected. For more details, see
Documentation/block/inline-encryption.rst.
+atgc Enable age-threshold garbage collection, it provides high
+ effectiveness and efficiency on background GC.
======================== ============================================================
Debugfs Entries
@@ -301,7 +303,7 @@ Usage
# insmod f2fs.ko
-3. Create a directory trying to mount::
+3. Create a directory to use when mounting::
# mkdir /mnt/f2fs
@@ -315,7 +317,7 @@ mkfs.f2fs
The mkfs.f2fs is for the use of formatting a partition as the f2fs filesystem,
which builds a basic on-disk layout.
-The options consist of:
+The quick options consist of:
=============== ===========================================================
``-l [label]`` Give a volume label, up to 512 unicode name.
@@ -337,6 +339,8 @@ The options consist of:
1 is set by default, which conducts discard.
=============== ===========================================================
+Note: please refer to the manpage of mkfs.f2fs(8) to get full option list.
+
fsck.f2fs
---------
The fsck.f2fs is a tool to check the consistency of an f2fs-formatted
@@ -344,10 +348,12 @@ partition, which examines whether the filesystem metadata and user-made data
are cross-referenced correctly or not.
Note that, initial version of the tool does not fix any inconsistency.
-The options consist of::
+The quick options consist of::
-d debug level [default:0]
+Note: please refer to the manpage of fsck.f2fs(8) to get full option list.
+
dump.f2fs
---------
The dump.f2fs shows the information of specific inode and dumps SSA and SIT to
@@ -371,6 +377,37 @@ Examples::
# dump.f2fs -s 0~-1 /dev/sdx (SIT dump)
# dump.f2fs -a 0~-1 /dev/sdx (SSA dump)
+Note: please refer to the manpage of dump.f2fs(8) to get full option list.
+
+sload.f2fs
+----------
+The sload.f2fs gives a way to insert files and directories in the exisiting disk
+image. This tool is useful when building f2fs images given compiled files.
+
+Note: please refer to the manpage of sload.f2fs(8) to get full option list.
+
+resize.f2fs
+-----------
+The resize.f2fs lets a user resize the f2fs-formatted disk image, while preserving
+all the files and directories stored in the image.
+
+Note: please refer to the manpage of resize.f2fs(8) to get full option list.
+
+defrag.f2fs
+-----------
+The defrag.f2fs can be used to defragment scattered written data as well as
+filesystem metadata across the disk. This can improve the write speed by giving
+more free consecutive space.
+
+Note: please refer to the manpage of defrag.f2fs(8) to get full option list.
+
+f2fs_io
+-------
+The f2fs_io is a simple tool to issue various filesystem APIs as well as
+f2fs-specific ones, which is very useful for QA tests.
+
+Note: please refer to the manpage of f2fs_io(8) to get full option list.
+
Design
======
@@ -383,7 +420,7 @@ consists of a set of sections. By default, section and zone sizes are set to one
segment size identically, but users can easily modify the sizes by mkfs.
F2FS splits the entire volume into six areas, and all the areas except superblock
-consists of multiple segments as described below::
+consist of multiple segments as described below::
align with the zone size <-|
|-> align with the segment size
@@ -486,7 +523,7 @@ one inode block (i.e., a file) covers::
`- direct node (1018)
`- data (1018)
-Note that, all the node blocks are mapped by NAT which means the location of
+Note that all the node blocks are mapped by NAT which means the location of
each node is translated by the NAT table. In the consideration of the wandering
tree problem, F2FS is able to cut off the propagation of node updates caused by
leaf data writes.
@@ -566,7 +603,7 @@ When F2FS finds a file name in a directory, at first a hash value of the file
name is calculated. Then, F2FS scans the hash table in level #0 to find the
dentry consisting of the file name and its inode number. If not found, F2FS
scans the next hash table in level #1. In this way, F2FS scans hash tables in
-each levels incrementally from 1 to N. In each levels F2FS needs to scan only
+each levels incrementally from 1 to N. In each level F2FS needs to scan only
one bucket determined by the following equation, which shows O(log(# of files))
complexity::
@@ -707,7 +744,7 @@ WRITE_LIFE_LONG " WRITE_LIFE_LONG
Fallocate(2) Policy
-------------------
-The default policy follows the below posix rule.
+The default policy follows the below POSIX rule.
Allocating disk space
The default operation (i.e., mode is zero) of fallocate() allocates
@@ -720,7 +757,7 @@ Allocating disk space
as a method of optimally implementing that function.
However, once F2FS receives ioctl(fd, F2FS_IOC_SET_PIN_FILE) in prior to
-fallocate(fd, DEFAULT_MODE), it allocates on-disk blocks addressess having
+fallocate(fd, DEFAULT_MODE), it allocates on-disk block addressess having
zero or random data, which is useful to the below scenario where:
1. create(fd)
@@ -739,7 +776,7 @@ Compression implementation
cluster can be compressed or not.
- In cluster metadata layout, one special block address is used to indicate
- cluster is compressed one or normal one, for compressed cluster, following
+ a cluster is a compressed one or normal one; for compressed cluster, following
metadata maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs
stores data including compress header and compressed data.
@@ -772,3 +809,18 @@ Compress metadata layout::
+-------------+-------------+----------+----------------------------+
| data length | data chksum | reserved | compressed data |
+-------------+-------------+----------+----------------------------+
+
+NVMe Zoned Namespace devices
+----------------------------
+
+- ZNS defines a per-zone capacity which can be equal or less than the
+ zone-size. Zone-capacity is the number of usable blocks in the zone.
+ F2FS checks if zone-capacity is less than zone-size, if it is, then any
+ segment which starts after the zone-capacity is marked as not-free in
+ the free segment bitmap at initial mount time. These segments are marked
+ as permanently used so they are not allocated for writes and
+ consequently are not needed to be garbage collected. In case the
+ zone-capacity is not aligned to default segment size(2MB), then a segment
+ can start before the zone-capacity and span across zone-capacity boundary.
+ Such spanning segments are also considered as usable segments. All blocks
+ past the zone-capacity are considered unusable in these segments.
diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst
index 423c5a0daf45..44b67ebd6e40 100644
--- a/Documentation/filesystems/fscrypt.rst
+++ b/Documentation/filesystems/fscrypt.rst
@@ -436,9 +436,9 @@ FS_IOC_SET_ENCRYPTION_POLICY
The FS_IOC_SET_ENCRYPTION_POLICY ioctl sets an encryption policy on an
empty directory or verifies that a directory or regular file already
-has the specified encryption policy. It takes in a pointer to a
-:c:type:`struct fscrypt_policy_v1` or a :c:type:`struct
-fscrypt_policy_v2`, defined as follows::
+has the specified encryption policy. It takes in a pointer to
+struct fscrypt_policy_v1 or struct fscrypt_policy_v2, defined as
+follows::
#define FSCRYPT_POLICY_V1 0
#define FSCRYPT_KEY_DESCRIPTOR_SIZE 8
@@ -464,11 +464,11 @@ fscrypt_policy_v2`, defined as follows::
This structure must be initialized as follows:
-- ``version`` must be FSCRYPT_POLICY_V1 (0) if the struct is
- :c:type:`fscrypt_policy_v1` or FSCRYPT_POLICY_V2 (2) if the struct
- is :c:type:`fscrypt_policy_v2`. (Note: we refer to the original
- policy version as "v1", though its version code is really 0.) For
- new encrypted directories, use v2 policies.
+- ``version`` must be FSCRYPT_POLICY_V1 (0) if
+ struct fscrypt_policy_v1 is used or FSCRYPT_POLICY_V2 (2) if
+ struct fscrypt_policy_v2 is used. (Note: we refer to the original
+ policy version as "v1", though its version code is really 0.)
+ For new encrypted directories, use v2 policies.
- ``contents_encryption_mode`` and ``filenames_encryption_mode`` must
be set to constants from ``<linux/fscrypt.h>`` which identify the
@@ -508,9 +508,9 @@ This structure must be initialized as follows:
replaced with ``master_key_identifier``, which is longer and cannot
be arbitrarily chosen. Instead, the key must first be added using
`FS_IOC_ADD_ENCRYPTION_KEY`_. Then, the ``key_spec.u.identifier``
- the kernel returned in the :c:type:`struct fscrypt_add_key_arg` must
- be used as the ``master_key_identifier`` in the :c:type:`struct
- fscrypt_policy_v2`.
+ the kernel returned in the struct fscrypt_add_key_arg must
+ be used as the ``master_key_identifier`` in
+ struct fscrypt_policy_v2.
If the file is not yet encrypted, then FS_IOC_SET_ENCRYPTION_POLICY
verifies that the file is an empty directory. If so, the specified
@@ -590,7 +590,7 @@ FS_IOC_GET_ENCRYPTION_POLICY_EX
The FS_IOC_GET_ENCRYPTION_POLICY_EX ioctl retrieves the encryption
policy, if any, for a directory or regular file. No additional
permissions are required beyond the ability to open the file. It
-takes in a pointer to a :c:type:`struct fscrypt_get_policy_ex_arg`,
+takes in a pointer to struct fscrypt_get_policy_ex_arg,
defined as follows::
struct fscrypt_get_policy_ex_arg {
@@ -637,9 +637,8 @@ The FS_IOC_GET_ENCRYPTION_POLICY ioctl can also retrieve the
encryption policy, if any, for a directory or regular file. However,
unlike `FS_IOC_GET_ENCRYPTION_POLICY_EX`_,
FS_IOC_GET_ENCRYPTION_POLICY only supports the original policy
-version. It takes in a pointer directly to a :c:type:`struct
-fscrypt_policy_v1` rather than a :c:type:`struct
-fscrypt_get_policy_ex_arg`.
+version. It takes in a pointer directly to struct fscrypt_policy_v1
+rather than struct fscrypt_get_policy_ex_arg.
The error codes for FS_IOC_GET_ENCRYPTION_POLICY are the same as those
for FS_IOC_GET_ENCRYPTION_POLICY_EX, except that
@@ -680,8 +679,7 @@ the filesystem, making all files on the filesystem which were
encrypted using that key appear "unlocked", i.e. in plaintext form.
It can be executed on any file or directory on the target filesystem,
but using the filesystem's root directory is recommended. It takes in
-a pointer to a :c:type:`struct fscrypt_add_key_arg`, defined as
-follows::
+a pointer to struct fscrypt_add_key_arg, defined as follows::
struct fscrypt_add_key_arg {
struct fscrypt_key_specifier key_spec;
@@ -710,17 +708,16 @@ follows::
__u8 raw[];
};
-:c:type:`struct fscrypt_add_key_arg` must be zeroed, then initialized
+struct fscrypt_add_key_arg must be zeroed, then initialized
as follows:
- If the key is being added for use by v1 encryption policies, then
``key_spec.type`` must contain FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR, and
``key_spec.u.descriptor`` must contain the descriptor of the key
being added, corresponding to the value in the
- ``master_key_descriptor`` field of :c:type:`struct
- fscrypt_policy_v1`. To add this type of key, the calling process
- must have the CAP_SYS_ADMIN capability in the initial user
- namespace.
+ ``master_key_descriptor`` field of struct fscrypt_policy_v1.
+ To add this type of key, the calling process must have the
+ CAP_SYS_ADMIN capability in the initial user namespace.
Alternatively, if the key is being added for use by v2 encryption
policies, then ``key_spec.type`` must contain
@@ -737,12 +734,13 @@ as follows:
- ``key_id`` is 0 if the raw key is given directly in the ``raw``
field. Otherwise ``key_id`` is the ID of a Linux keyring key of
- type "fscrypt-provisioning" whose payload is a :c:type:`struct
- fscrypt_provisioning_key_payload` whose ``raw`` field contains the
- raw key and whose ``type`` field matches ``key_spec.type``. Since
- ``raw`` is variable-length, the total size of this key's payload
- must be ``sizeof(struct fscrypt_provisioning_key_payload)`` plus the
- raw key size. The process must have Search permission on this key.
+ type "fscrypt-provisioning" whose payload is
+ struct fscrypt_provisioning_key_payload whose ``raw`` field contains
+ the raw key and whose ``type`` field matches ``key_spec.type``.
+ Since ``raw`` is variable-length, the total size of this key's
+ payload must be ``sizeof(struct fscrypt_provisioning_key_payload)``
+ plus the raw key size. The process must have Search permission on
+ this key.
Most users should leave this 0 and specify the raw key directly.
The support for specifying a Linux keyring key is intended mainly to
@@ -860,8 +858,8 @@ The FS_IOC_REMOVE_ENCRYPTION_KEY ioctl removes a claim to a master
encryption key from the filesystem, and possibly removes the key
itself. It can be executed on any file or directory on the target
filesystem, but using the filesystem's root directory is recommended.
-It takes in a pointer to a :c:type:`struct fscrypt_remove_key_arg`,
-defined as follows::
+It takes in a pointer to struct fscrypt_remove_key_arg, defined
+as follows::
struct fscrypt_remove_key_arg {
struct fscrypt_key_specifier key_spec;
@@ -956,8 +954,8 @@ FS_IOC_GET_ENCRYPTION_KEY_STATUS
The FS_IOC_GET_ENCRYPTION_KEY_STATUS ioctl retrieves the status of a
master encryption key. It can be executed on any file or directory on
the target filesystem, but using the filesystem's root directory is
-recommended. It takes in a pointer to a :c:type:`struct
-fscrypt_get_key_status_arg`, defined as follows::
+recommended. It takes in a pointer to
+struct fscrypt_get_key_status_arg, defined as follows::
struct fscrypt_get_key_status_arg {
/* input */
@@ -1148,10 +1146,10 @@ Implementation details
Encryption context
------------------
-An encryption policy is represented on-disk by a :c:type:`struct
-fscrypt_context_v1` or a :c:type:`struct fscrypt_context_v2`. It is
-up to individual filesystems to decide where to store it, but normally
-it would be stored in a hidden extended attribute. It should *not* be
+An encryption policy is represented on-disk by
+struct fscrypt_context_v1 or struct fscrypt_context_v2. It is up to
+individual filesystems to decide where to store it, but normally it
+would be stored in a hidden extended attribute. It should *not* be
exposed by the xattr-related system calls such as getxattr() and
setxattr() because of the special semantics of the encryption xattr.
(In particular, there would be much confusion if an encryption policy
@@ -1249,8 +1247,8 @@ a strong "hash" of the ciphertext filename, along with the optional
filesystem-specific hash(es) needed for directory lookups. This
allows the filesystem to still, with a high degree of confidence, map
the filename given in ->lookup() back to a particular directory entry
-that was previously listed by readdir(). See :c:type:`struct
-fscrypt_nokey_name` in the source for more details.
+that was previously listed by readdir(). See
+struct fscrypt_nokey_name in the source for more details.
Note that the precise way that filenames are presented to userspace
without the key is subject to change in the future. It is only meant
diff --git a/Documentation/filesystems/fsverity.rst b/Documentation/filesystems/fsverity.rst
index 6c8944f6f0f7..895e9711ed88 100644
--- a/Documentation/filesystems/fsverity.rst
+++ b/Documentation/filesystems/fsverity.rst
@@ -84,7 +84,7 @@ FS_IOC_ENABLE_VERITY
--------------------
The FS_IOC_ENABLE_VERITY ioctl enables fs-verity on a file. It takes
-in a pointer to a :c:type:`struct fsverity_enable_arg`, defined as
+in a pointer to a struct fsverity_enable_arg, defined as
follows::
struct fsverity_enable_arg {
diff --git a/Documentation/filesystems/fuse.rst b/Documentation/filesystems/fuse.rst
index cd717f9bf940..8120c3c0cb4e 100644
--- a/Documentation/filesystems/fuse.rst
+++ b/Documentation/filesystems/fuse.rst
@@ -47,7 +47,7 @@ filesystems. A good example is sshfs: a secure network filesystem
using the sftp protocol.
The userspace library and utilities are available from the
-`FUSE homepage: <http://fuse.sourceforge.net/>`_
+`FUSE homepage: <https://github.com/libfuse/>`_
Filesystem type
===============
diff --git a/Documentation/filesystems/overlayfs.rst b/Documentation/filesystems/overlayfs.rst
index 8ea83a51c266..580ab9a0fe31 100644
--- a/Documentation/filesystems/overlayfs.rst
+++ b/Documentation/filesystems/overlayfs.rst
@@ -564,6 +564,25 @@ Note: the mount options index=off,nfs_export=on are conflicting for a
read-write mount and will result in an error.
+Volatile mount
+--------------
+
+This is enabled with the "volatile" mount option. Volatile mounts are not
+guaranteed to survive a crash. It is strongly recommended that volatile
+mounts are only used if data written to the overlay can be recreated
+without significant effort.
+
+The advantage of mounting with the "volatile" option is that all forms of
+sync calls to the upper filesystem are omitted.
+
+When overlay is mounted with "volatile" option, the directory
+"$workdir/work/incompat/volatile" is created. During next mount, overlay
+checks for this directory and refuses to mount if present. This is a strong
+indicator that user should throw away upper and work directories and create
+fresh one. In very limited cases where the user knows that the system has
+not crashed and contents of upperdir are intact, The "volatile" directory
+can be removed.
+
Testsuite
---------
diff --git a/Documentation/filesystems/zonefs.rst b/Documentation/filesystems/zonefs.rst
index 6c18bc8ce332..6b213fe9a33e 100644
--- a/Documentation/filesystems/zonefs.rst
+++ b/Documentation/filesystems/zonefs.rst
@@ -326,6 +326,21 @@ discover the amount of data that has been written to the zone. In the case of a
read-only zone discovered at run-time, as indicated in the previous section.
The size of the zone file is left unchanged from its last updated value.
+A zoned block device (e.g. an NVMe Zoned Namespace device) may have limits on
+the number of zones that can be active, that is, zones that are in the
+implicit open, explicit open or closed conditions. This potential limitation
+translates into a risk for applications to see write IO errors due to this
+limit being exceeded if the zone of a file is not already active when a write
+request is issued by the user.
+
+To avoid these potential errors, the "explicit-open" mount option forces zones
+to be made active using an open zone command when a file is opened for writing
+for the first time. If the zone open command succeeds, the application is then
+guaranteed that write requests can be processed. Conversely, the
+"explicit-open" mount option will result in a zone close command being issued
+to the device on the last close() of a zone file if the zone is not full nor
+empty.
+
Zonefs User Space Tools
=======================
diff --git a/Documentation/gpu/amdgpu.rst b/Documentation/gpu/amdgpu.rst
index 57047dcb8d19..1f9ea8221f80 100644
--- a/Documentation/gpu/amdgpu.rst
+++ b/Documentation/gpu/amdgpu.rst
@@ -206,8 +206,8 @@ pp_power_profile_mode
.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
:doc: pp_power_profile_mode
-*_busy_percent
-~~~~~~~~~~~~~~
+\*_busy_percent
+~~~~~~~~~~~~~~~
.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
:doc: gpu_busy_percent
diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst
index 33cc6ddf8f64..cff1f154b473 100644
--- a/Documentation/gpu/i915.rst
+++ b/Documentation/gpu/i915.rst
@@ -636,15 +636,36 @@ i915 Perf Observation Architecture Stream
.. kernel-doc:: drivers/gpu/drm/i915/i915_perf.c
:functions: i915_oa_poll_wait
-All i915 Perf Internals
------------------------
+Other i915 Perf Internals
+-------------------------
-This section simply includes all currently documented i915 perf internals, in
-no particular order, but may include some more minor utilities or platform
+This section simply includes all other currently documented i915 perf internals,
+in no particular order, but may include some more minor utilities or platform
specific details than found in the more high-level sections.
.. kernel-doc:: drivers/gpu/drm/i915/i915_perf.c
:internal:
+ :no-identifiers:
+ i915_perf_init
+ i915_perf_fini
+ i915_perf_register
+ i915_perf_unregister
+ i915_perf_open_ioctl
+ i915_perf_release
+ i915_perf_add_config_ioctl
+ i915_perf_remove_config_ioctl
+ read_properties_unlocked
+ i915_perf_open_ioctl_locked
+ i915_perf_destroy_locked
+ i915_perf_read i915_perf_ioctl
+ i915_perf_enable_locked
+ i915_perf_disable_locked
+ i915_perf_poll i915_perf_poll_locked
+ i915_oa_stream_init i915_oa_read
+ i915_oa_stream_enable
+ i915_oa_stream_disable
+ i915_oa_wait_unlocked
+ i915_oa_poll_wait
Style
=====
diff --git a/Documentation/networking/ieee802154.rst b/Documentation/networking/ieee802154.rst
index 6f4bf8447a21..f27856d77c8b 100644
--- a/Documentation/networking/ieee802154.rst
+++ b/Documentation/networking/ieee802154.rst
@@ -26,7 +26,9 @@ The stack is composed of three main parts:
Socket API
==========
-.. c:function:: int sd = socket(PF_IEEE802154, SOCK_DGRAM, 0);
+::
+
+ int sd = socket(PF_IEEE802154, SOCK_DGRAM, 0);
The address family, socket addresses etc. are defined in the
include/net/af_ieee802154.h header or in the special header
@@ -131,12 +133,12 @@ Register PHY in the system.
Freeing registered PHY.
-.. c:function:: void ieee802154_rx_irqsafe(struct ieee802154_hw *hw, struct sk_buff *skb, u8 lqi):
+.. c:function:: void ieee802154_rx_irqsafe(struct ieee802154_hw *hw, struct sk_buff *skb, u8 lqi)
Telling 802.15.4 module there is a new received frame in the skb with
the RF Link Quality Indicator (LQI) from the hardware device.
-.. c:function:: void ieee802154_xmit_complete(struct ieee802154_hw *hw, struct sk_buff *skb, bool ifs_handling):
+.. c:function:: void ieee802154_xmit_complete(struct ieee802154_hw *hw, struct sk_buff *skb, bool ifs_handling)
Telling 802.15.4 module the frame in the skb is or going to be
transmitted through the hardware device
@@ -155,25 +157,25 @@ operations structure at least::
...
};
-.. c:function:: int start(struct ieee802154_hw *hw):
+.. c:function:: int start(struct ieee802154_hw *hw)
Handler that 802.15.4 module calls for the hardware device initialization.
-.. c:function:: void stop(struct ieee802154_hw *hw):
+.. c:function:: void stop(struct ieee802154_hw *hw)
Handler that 802.15.4 module calls for the hardware device cleanup.
-.. c:function:: int xmit_async(struct ieee802154_hw *hw, struct sk_buff *skb):
+.. c:function:: int xmit_async(struct ieee802154_hw *hw, struct sk_buff *skb)
Handler that 802.15.4 module calls for each frame in the skb going to be
transmitted through the hardware device.
-.. c:function:: int ed(struct ieee802154_hw *hw, u8 *level):
+.. c:function:: int ed(struct ieee802154_hw *hw, u8 *level)
Handler that 802.15.4 module calls for Energy Detection from the hardware
device.
-.. c:function:: int set_channel(struct ieee802154_hw *hw, u8 page, u8 channel):
+.. c:function:: int set_channel(struct ieee802154_hw *hw, u8 page, u8 channel)
Set radio for listening on specific channel of the hardware device.
diff --git a/Documentation/powerpc/isa-versions.rst b/Documentation/powerpc/isa-versions.rst
index a363d8c1603c..dfcb1097dce4 100644
--- a/Documentation/powerpc/isa-versions.rst
+++ b/Documentation/powerpc/isa-versions.rst
@@ -7,6 +7,7 @@ Mapping of some CPU versions to relevant ISA versions.
========= ====================================================================
CPU Architecture version
========= ====================================================================
+Power10 Power ISA v3.1
Power9 Power ISA v3.0B
Power8 Power ISA v2.07
Power7 Power ISA v2.06
@@ -32,6 +33,7 @@ Key Features
========== ==================
CPU VMX (aka. Altivec)
========== ==================
+Power10 Yes
Power9 Yes
Power8 Yes
Power7 Yes
@@ -47,6 +49,7 @@ PPC970 Yes
========== ====
CPU VSX
========== ====
+Power10 Yes
Power9 Yes
Power8 Yes
Power7 Yes
@@ -62,6 +65,7 @@ PPC970 No
========== ====================================
CPU Transactional Memory
========== ====================================
+Power10 No (* see Power ISA v3.1, "Appendix A. Notes on the Removal of Transactional Memory from the Architecture")
Power9 Yes (* see transactional_memory.txt)
Power8 Yes
Power7 No
diff --git a/Documentation/powerpc/ptrace.rst b/Documentation/powerpc/ptrace.rst
index 864d4b6dddd1..77725d69eb4a 100644
--- a/Documentation/powerpc/ptrace.rst
+++ b/Documentation/powerpc/ptrace.rst
@@ -46,6 +46,7 @@ features will have bits indicating whether there is support for::
#define PPC_DEBUG_FEATURE_DATA_BP_RANGE 0x4
#define PPC_DEBUG_FEATURE_DATA_BP_MASK 0x8
#define PPC_DEBUG_FEATURE_DATA_BP_DAWR 0x10
+ #define PPC_DEBUG_FEATURE_DATA_BP_ARCH_31 0x20
2. PTRACE_SETHWDEBUG
diff --git a/Documentation/powerpc/syscall64-abi.rst b/Documentation/powerpc/syscall64-abi.rst
index 379817ca64d2..cf9b2857c72a 100644
--- a/Documentation/powerpc/syscall64-abi.rst
+++ b/Documentation/powerpc/syscall64-abi.rst
@@ -49,22 +49,22 @@ Register preservation rules
Register preservation rules match the ELF ABI calling sequence with the
following differences:
---- For the sc instruction, differences with the ELF ABI ---
-=========== ============= ========================================
-r0 Volatile (System call number.)
-r3 Volatile (Parameter 1, and return value.)
-r4-r8 Volatile (Parameters 2-6.)
-cr0 Volatile (cr0.SO is the return error condition.)
-cr1, cr5-7 Nonvolatile
-lr Nonvolatile
-=========== ============= ========================================
-
---- For the scv 0 instruction, differences with the ELF ABI ---
-=========== ============= ========================================
-r0 Volatile (System call number.)
-r3 Volatile (Parameter 1, and return value.)
-r4-r8 Volatile (Parameters 2-6.)
-=========== ============= ========================================
++------------------------------------------------------------------------+
+| For the sc instruction, differences with the ELF ABI |
++--------------+--------------+------------------------------------------+
+| r0 | Volatile | (System call number.) |
+| rr3 | Volatile | (Parameter 1, and return value.) |
+| rr4-r8 | Volatile | (Parameters 2-6.) |
+| rcr0 | Volatile | (cr0.SO is the return error condition.) |
+| rcr1, cr5-7 | Nonvolatile | |
+| rlr | Nonvolatile | |
++--------------+--------------+------------------------------------------+
+| For the scv 0 instruction, differences with the ELF ABI |
++--------------+--------------+------------------------------------------+
+| r0 | Volatile | (System call number.) |
+| r3 | Volatile | (Parameter 1, and return value.) |
+| r4-r8 | Volatile | (Parameters 2-6.) |
++--------------+--------------+------------------------------------------+
All floating point and vector data registers as well as control and status
registers are nonvolatile.
diff --git a/Documentation/sound/designs/tracepoints.rst b/Documentation/sound/designs/tracepoints.rst
index 78bc5572f829..b0a7e3010187 100644
--- a/Documentation/sound/designs/tracepoints.rst
+++ b/Documentation/sound/designs/tracepoints.rst
@@ -34,20 +34,20 @@ substream. In this procedure, PCM hardware parameters are decided by
interaction between applications and ALSA PCM core. Once decided, runtime of
the PCM substream keeps the parameters.
-The parameters are described in :c:type:`struct snd_pcm_hw_params`. This
+The parameters are described in struct snd_pcm_hw_params. This
structure includes several types of parameters. Applications set preferable
value to these parameters, then execute ioctl(2) with SNDRV_PCM_IOCTL_HW_REFINE
or SNDRV_PCM_IOCTL_HW_PARAMS. The former is used just for refining available
set of parameters. The latter is used for an actual decision of the parameters.
-The :c:type:`struct snd_pcm_hw_params` structure has below members:
+The struct snd_pcm_hw_params structure has below members:
``flags``
Configurable. ALSA PCM core and some drivers handle this flag to select
convenient parameters or change their behaviour.
``masks``
Configurable. This type of parameter is described in
- :c:type:`struct snd_mask` and represent mask values. As of PCM protocol
+ struct snd_mask and represent mask values. As of PCM protocol
v2.0.13, three types are defined.
- SNDRV_PCM_HW_PARAM_ACCESS
@@ -55,7 +55,7 @@ The :c:type:`struct snd_pcm_hw_params` structure has below members:
- SNDRV_PCM_HW_PARAM_SUBFORMAT
``intervals``
Configurable. This type of parameter is described in
- :c:type:`struct snd_interval` and represent values with a range. As of
+ struct snd_interval and represent values with a range. As of
PCM protocol v2.0.13, twelve types are defined.
- SNDRV_PCM_HW_PARAM_SAMPLE_BITS
@@ -78,7 +78,7 @@ The :c:type:`struct snd_pcm_hw_params` structure has below members:
are going to be changed.
``cmask``
Read-only. After returning from ioctl(2), buffer in user space for
- :c:type:`struct snd_pcm_hw_params` includes result of each operation.
+ struct snd_pcm_hw_params includes result of each operation.
This mask represents which mask/interval parameter is actually changed.
``info``
Read-only. This represents hardware/driver capabilities as bit flags
@@ -110,10 +110,10 @@ The :c:type:`struct snd_pcm_hw_params` structure has below members:
value to this parameter but some drivers intentionally set zero with
a care of hardware design or data transmission protocol.
-ALSA PCM core handles buffer of :c:type:`struct snd_pcm_hw_params` when
+ALSA PCM core handles buffer of struct snd_pcm_hw_params when
applications execute ioctl(2) with SNDRV_PCM_HW_REFINE or SNDRV_PCM_HW_PARAMS.
Parameters in the buffer are changed according to
-:c:type:`struct snd_pcm_hardware` and rules of constraints in the runtime. The
+struct snd_pcm_hardware and rules of constraints in the runtime. The
structure describes capabilities of handled hardware. The rules describes
dependencies on which a parameter is decided according to several parameters.
A rule has a callback function, and drivers can register arbitrary functions
@@ -121,17 +121,17 @@ to compute the target parameter. ALSA PCM core registers some rules to the
runtime as a default.
Each driver can join in the interaction as long as it prepared for two stuffs
-in a callback of :c:type:`struct snd_pcm_ops.open`.
+in a callback of struct snd_pcm_ops.open.
1. In the callback, drivers are expected to change a member of
- :c:type:`struct snd_pcm_hardware` type in the runtime, according to
+ struct snd_pcm_hardware type in the runtime, according to
capacities of corresponding hardware.
2. In the same callback, drivers are also expected to register additional rules
of constraints into the runtime when several parameters have dependencies
due to hardware design.
The driver can refers to result of the interaction in a callback of
-:c:type:`struct snd_pcm_ops.hw_params`, however it should not change the
+struct snd_pcm_ops.hw_params, however it should not change the
content.
Tracepoints in this category are designed to trace changes of the
@@ -163,7 +163,7 @@ fields are different according to type of the parameter. For parameters of mask
type, the fields represent hexadecimal dump of content of the parameter. For
parameters of interval type, the fields represent values of each member of
``empty``, ``integer``, ``openmin``, ``min``, ``max``, ``openmax`` in
-:c:type:`struct snd_interval` in this order.
+struct snd_interval in this order.
Tracepoints in drivers
======================
diff --git a/Documentation/sound/kernel-api/alsa-driver-api.rst b/Documentation/sound/kernel-api/alsa-driver-api.rst
index c8cc651eccf7..d24c64df7069 100644
--- a/Documentation/sound/kernel-api/alsa-driver-api.rst
+++ b/Documentation/sound/kernel-api/alsa-driver-api.rst
@@ -132,3 +132,4 @@ ISA DMA Helpers
Other Helper Macros
-------------------
.. kernel-doc:: include/sound/core.h
+.. kernel-doc:: sound/sound_core.c
diff --git a/Documentation/sound/kernel-api/writing-an-alsa-driver.rst b/Documentation/sound/kernel-api/writing-an-alsa-driver.rst
index aa9d5ab183d2..73bbd59afc33 100644
--- a/Documentation/sound/kernel-api/writing-an-alsa-driver.rst
+++ b/Documentation/sound/kernel-api/writing-an-alsa-driver.rst
@@ -194,7 +194,7 @@ The minimum flow for PCI soundcards is as follows:
- create ``remove`` callback.
-- create a :c:type:`struct pci_driver <pci_driver>` structure
+- create a struct pci_driver structure
containing the three pointers above.
- create an ``init`` function just calling the
@@ -487,7 +487,7 @@ The destructor, remove callback, simply releases the card instance. Then
the ALSA middle layer will release all the attached components
automatically.
-It would be typically just :c:func:`calling snd_card_free()`:
+It would be typically just calling :c:func:`snd_card_free()`:
::
@@ -560,16 +560,15 @@ return the card instance. The extra_size argument is used to allocate
card->private_data for the chip-specific data. Note that these data are
allocated by :c:func:`snd_card_new()`.
-The first argument, the pointer of struct :c:type:`struct device
-<device>`, specifies the parent device. For PCI devices, typically
-``&pci->`` is passed there.
+The first argument, the pointer of struct device, specifies the parent
+device. For PCI devices, typically ``&pci->`` is passed there.
Components
----------
After the card is created, you can attach the components (devices) to
the card instance. In an ALSA driver, a component is represented as a
-:c:type:`struct snd_device <snd_device>` object. A component
+struct snd_device object. A component
can be a PCM instance, a control interface, a raw MIDI interface, etc.
Each such instance has one component entry.
@@ -628,7 +627,7 @@ argument of :c:func:`snd_card_new()`, i.e.
err = snd_card_new(&pci->dev, index[dev], id[dev], THIS_MODULE,
sizeof(struct mychip), &card);
-:c:type:`struct mychip <mychip>` is the type of the chip record.
+struct mychip is the type of the chip record.
In return, the allocated record can be accessed as
@@ -890,7 +889,7 @@ functions. These resources must be released in the destructor
function (see below).
Now assume that the PCI device has an I/O port with 8 bytes and an
-interrupt. Then :c:type:`struct mychip <mychip>` will have the
+interrupt. Then struct mychip will have the
following fields:
::
@@ -1094,7 +1093,7 @@ PCI Entries
-----------
So far, so good. Let's finish the missing PCI stuff. At first, we need a
-:c:type:`struct pci_device_id <pci_device_id>` table for
+struct pci_device_id table for
this chipset. It's a table of PCI vendor/device ID number, and some
masks.
@@ -1110,19 +1109,17 @@ For example,
};
MODULE_DEVICE_TABLE(pci, snd_mychip_ids);
-The first and second fields of the :c:type:`struct pci_device_id
-<pci_device_id>` structure are the vendor and device IDs. If you
-have no reason to filter the matching devices, you can leave the
-remaining fields as above. The last field of the :c:type:`struct
-pci_device_id <pci_device_id>` struct contains private data
-for this entry. You can specify any value here, for example, to define
-specific operations for supported device IDs. Such an example is found
-in the intel8x0 driver.
+The first and second fields of the struct pci_device_id are the vendor
+and device IDs. If you have no reason to filter the matching devices, you can
+leave the remaining fields as above. The last field of the
+struct pci_device_id contains private data for this entry. You can specify
+any value here, for example, to define specific operations for supported
+device IDs. Such an example is found in the intel8x0 driver.
The last entry of this list is the terminator. You must specify this
all-zero entry.
-Then, prepare the :c:type:`struct pci_driver <pci_driver>`
+Then, prepare the struct pci_driver
record:
::
@@ -1439,8 +1436,8 @@ corresponding argument.
If a chip supports multiple playbacks or captures, you can specify more
numbers, but they must be handled properly in open/close, etc.
callbacks. When you need to know which substream you are referring to,
-then it can be obtained from :c:type:`struct snd_pcm_substream
-<snd_pcm_substream>` data passed to each callback as follows:
+then it can be obtained from struct snd_pcm_substream data passed to each
+callback as follows:
::
@@ -1639,10 +1636,9 @@ In the sections below, important records are explained.
Hardware Description
~~~~~~~~~~~~~~~~~~~~
-The hardware descriptor (:c:type:`struct snd_pcm_hardware
-<snd_pcm_hardware>`) contains the definitions of the fundamental
-hardware configuration. Above all, you'll need to define this in the
-`PCM open callback`_. Note that the runtime instance holds the copy of
+The hardware descriptor (struct snd_pcm_hardware) contains the definitions of
+the fundamental hardware configuration. Above all, you'll need to define this
+in the `PCM open callback`_. Note that the runtime instance holds the copy of
the descriptor, not the pointer to the existing descriptor. That is,
in the open callback, you can modify the copied descriptor
(``runtime->hw``) as you need. For example, if the maximum number of
@@ -1800,14 +1796,13 @@ Running Status
~~~~~~~~~~~~~~
The running status can be referred via ``runtime->status``. This is
-the pointer to the :c:type:`struct snd_pcm_mmap_status
-<snd_pcm_mmap_status>` record. For example, you can get the current
+the pointer to the struct snd_pcm_mmap_status record.
+For example, you can get the current
DMA hardware pointer via ``runtime->status->hw_ptr``.
The DMA application pointer can be referred via ``runtime->control``,
-which points to the :c:type:`struct snd_pcm_mmap_control
-<snd_pcm_mmap_control>` record. However, accessing directly to
-this value is not recommended.
+which points to the struct snd_pcm_mmap_control record.
+However, accessing directly to this value is not recommended.
Private Data
~~~~~~~~~~~~
@@ -1843,8 +1838,8 @@ error number such as ``-EINVAL``. To choose an appropriate error
number, it is advised to check what value other parts of the kernel
return when the same kind of request fails.
-The callback function takes at least the argument with :c:type:`struct
-snd_pcm_substream <snd_pcm_substream>` pointer. To retrieve the chip
+The callback function takes at least the argument with
+struct snd_pcm_substream pointer. To retrieve the chip
record from the given substream instance, you can use the following
macro.
@@ -2313,10 +2308,10 @@ non-atomic contexts. For example, the function
:c:func:`snd_pcm_period_elapsed()` is called typically from the
interrupt handler. But, if you set up the driver to use a threaded
interrupt handler, this call can be in non-atomic context, too. In such
-a case, you can set ``nonatomic`` filed of :c:type:`struct snd_pcm
-<snd_pcm>` object after creating it. When this flag is set, mutex
-and rwsem are used internally in the PCM core instead of spin and
-rwlocks, so that you can call all PCM functions safely in a non-atomic
+a case, you can set ``nonatomic`` filed of struct snd_pcm object
+after creating it. When this flag is set, mutex and rwsem are used internally
+in the PCM core instead of spin and rwlocks, so that you can call all PCM
+functions safely in a non-atomic
context.
Constraints
@@ -2357,8 +2352,7 @@ There are many different constraints. Look at ``sound/pcm.h`` for a
complete list. You can even define your own constraint rules. For
example, let's suppose my_chip can manage a substream of 1 channel if
and only if the format is ``S16_LE``, otherwise it supports any format
-specified in the :c:type:`struct snd_pcm_hardware
-<snd_pcm_hardware>` structure (or in any other
+specified in struct snd_pcm_hardware> (or in any other
constraint_list). You can build a rule like this:
::
@@ -2467,7 +2461,7 @@ Definition of Controls
To create a new control, you need to define the following three
callbacks: ``info``, ``get`` and ``put``. Then, define a
-:c:type:`struct snd_kcontrol_new <snd_kcontrol_new>` record, such as:
+struct snd_kcontrol_new record, such as:
::
@@ -2602,8 +2596,8 @@ info callback
~~~~~~~~~~~~~
The ``info`` callback is used to get detailed information on this
-control. This must store the values of the given :c:type:`struct
-snd_ctl_elem_info <snd_ctl_elem_info>` object. For example,
+control. This must store the values of the given
+struct snd_ctl_elem_info object. For example,
for a boolean control with a single element:
::
@@ -2774,13 +2768,11 @@ In the simplest way, you can do like this:
if (err < 0)
return err;
-where ``my_control`` is the :c:type:`struct snd_kcontrol_new
-<snd_kcontrol_new>` object defined above, and chip is the object
-pointer to be passed to kcontrol->private_data which can be referred
-to in callbacks.
+where ``my_control`` is the struct snd_kcontrol_new object defined above,
+and chip is the object pointer to be passed to kcontrol->private_data which
+can be referred to in callbacks.
-:c:func:`snd_ctl_new1()` allocates a new :c:type:`struct
-snd_kcontrol <snd_kcontrol>` instance, and
+:c:func:`snd_ctl_new1()` allocates a new struct snd_kcontrol instance, and
:c:func:`snd_ctl_add()` assigns the given control component to the
card.
@@ -2797,10 +2789,9 @@ can call :c:func:`snd_ctl_notify()`. For example,
This function takes the card pointer, the event-mask, and the control id
pointer for the notification. The event-mask specifies the types of
notification, for example, in the above example, the change of control
-values is notified. The id pointer is the pointer of :c:type:`struct
-snd_ctl_elem_id <snd_ctl_elem_id>` to be notified. You can
-find some examples in ``es1938.c`` or ``es1968.c`` for hardware volume
-interrupts.
+values is notified. The id pointer is the pointer of struct snd_ctl_elem_id
+to be notified. You can find some examples in ``es1938.c`` or ``es1968.c``
+for hardware volume interrupts.
Metadata
--------
@@ -2915,9 +2906,8 @@ with an ``ac97_bus_ops_t`` record with callback functions.
The bus record is shared among all belonging ac97 instances.
-And then call :c:func:`snd_ac97_mixer()` with an :c:type:`struct
-snd_ac97_template <snd_ac97_template>` record together with
-the bus pointer created above.
+And then call :c:func:`snd_ac97_mixer()` with an struct snd_ac97_template
+record together with the bus pointer created above.
::
@@ -3118,11 +3108,10 @@ devices on the card, set ``MPU401_INFO_IRQ_HOOK`` (see
Usually, the port address corresponds to the command port and port + 1
corresponds to the data port. If not, you may change the ``cport``
-field of :c:type:`struct snd_mpu401 <snd_mpu401>` manually afterward.
-However, :c:type:`struct snd_mpu401 <snd_mpu401>` pointer is
+field of struct snd_mpu401 manually afterward.
+However, struct snd_mpu401 pointer is
not returned explicitly by :c:func:`snd_mpu401_uart_new()`. You
-need to cast ``rmidi->private_data`` to :c:type:`struct snd_mpu401
-<snd_mpu401>` explicitly,
+need to cast ``rmidi->private_data`` to struct snd_mpu401 explicitly,
::
@@ -3326,8 +3315,7 @@ data and removes them from the buffer at once:
}
If you know beforehand how many bytes you can accept, you can use a
-buffer size greater than one with the
-:c:func:`snd_rawmidi_transmit\*()` functions.
+buffer size greater than one with the ``snd_rawmidi_transmit*()`` functions.
The ``trigger`` callback must not sleep. If the hardware FIFO is full
before the substream buffer has been emptied, you have to continue
@@ -3772,7 +3760,7 @@ For creating the SG-buffer handler, call
:c:func:`snd_pcm_set_managed_buffer_all()` with
``SNDRV_DMA_TYPE_DEV_SG`` in the PCM constructor like other PCI
pre-allocator. You need to pass ``&pci->dev``, where pci is
-the :c:type:`struct pci_dev <pci_dev>` pointer of the chip as
+the struct pci_dev pointer of the chip as
well.
::
@@ -3927,7 +3915,7 @@ the maximum size of the proc file access.
The read/write callbacks of raw mode are more direct than the text mode.
You need to use a low-level I/O functions such as
-:c:func:`copy_from/to_user()` to transfer the data.
+:c:func:`copy_from_user()` and :c:func:`copy_to_user()` to transfer the data.
::
diff --git a/Documentation/sphinx/automarkup.py b/Documentation/sphinx/automarkup.py
index a1b0f554cd82..409dbc4100de 100644
--- a/Documentation/sphinx/automarkup.py
+++ b/Documentation/sphinx/automarkup.py
@@ -22,13 +22,34 @@ from itertools import chain
# :c:func: block (i.e. ":c:func:`mmap()`s" flakes out), so the last
# bit tries to restrict matches to things that won't create trouble.
#
-RE_function = re.compile(r'(([\w_][\w\d_]+)\(\))')
-RE_type = re.compile(r'(struct|union|enum|typedef)\s+([\w_][\w\d_]+)')
+RE_function = re.compile(r'\b(([a-zA-Z_]\w+)\(\))', flags=re.ASCII)
+
+#
+# Sphinx 2 uses the same :c:type role for struct, union, enum and typedef
+#
+RE_generic_type = re.compile(r'\b(struct|union|enum|typedef)\s+([a-zA-Z_]\w+)',
+ flags=re.ASCII)
+
+#
+# Sphinx 3 uses a different C role for each one of struct, union, enum and
+# typedef
+#
+RE_struct = re.compile(r'\b(struct)\s+([a-zA-Z_]\w+)', flags=re.ASCII)
+RE_union = re.compile(r'\b(union)\s+([a-zA-Z_]\w+)', flags=re.ASCII)
+RE_enum = re.compile(r'\b(enum)\s+([a-zA-Z_]\w+)', flags=re.ASCII)
+RE_typedef = re.compile(r'\b(typedef)\s+([a-zA-Z_]\w+)', flags=re.ASCII)
+
#
# Detects a reference to a documentation page of the form Documentation/... with
# an optional extension
#
-RE_doc = re.compile(r'Documentation(/[\w\-_/]+)(\.\w+)*')
+RE_doc = re.compile(r'\bDocumentation(/[\w\-_/]+)(\.\w+)*')
+
+#
+# Reserved C words that we should skip when cross-referencing
+#
+Skipnames = [ 'for', 'if', 'register', 'sizeof', 'struct', 'unsigned' ]
+
#
# Many places in the docs refer to common system calls. It is
@@ -48,9 +69,22 @@ def markup_refs(docname, app, node):
#
# Associate each regex with the function that will markup its matches
#
- markup_func = {RE_type: markup_c_ref,
- RE_function: markup_c_ref,
- RE_doc: markup_doc_ref}
+ markup_func_sphinx2 = {RE_doc: markup_doc_ref,
+ RE_function: markup_c_ref,
+ RE_generic_type: markup_c_ref}
+
+ markup_func_sphinx3 = {RE_doc: markup_doc_ref,
+ RE_function: markup_func_ref_sphinx3,
+ RE_struct: markup_c_ref,
+ RE_union: markup_c_ref,
+ RE_enum: markup_c_ref,
+ RE_typedef: markup_c_ref}
+
+ if sphinx.version_info[0] >= 3:
+ markup_func = markup_func_sphinx3
+ else:
+ markup_func = markup_func_sphinx2
+
match_iterators = [regex.finditer(t) for regex in markup_func]
#
# Sort all references by the starting position in text
@@ -75,12 +109,63 @@ def markup_refs(docname, app, node):
return repl
#
-# Try to replace a C reference (function() or struct/union/enum/typedef
-# type_name) with an appropriate cross reference.
+# In sphinx3 we can cross-reference to C macro and function, each one with its
+# own C role, but both match the same regex, so we try both.
#
+def markup_func_ref_sphinx3(docname, app, match):
+ class_str = ['c-func', 'c-macro']
+ reftype_str = ['function', 'macro']
+
+ cdom = app.env.domains['c']
+ #
+ # Go through the dance of getting an xref out of the C domain
+ #
+ target = match.group(2)
+ target_text = nodes.Text(match.group(0))
+ xref = None
+ if not (target in Skipfuncs or target in Skipnames):
+ for class_s, reftype_s in zip(class_str, reftype_str):
+ lit_text = nodes.literal(classes=['xref', 'c', class_s])
+ lit_text += target_text
+ pxref = addnodes.pending_xref('', refdomain = 'c',
+ reftype = reftype_s,
+ reftarget = target, modname = None,
+ classname = None)
+ #
+ # XXX The Latex builder will throw NoUri exceptions here,
+ # work around that by ignoring them.
+ #
+ try:
+ xref = cdom.resolve_xref(app.env, docname, app.builder,
+ reftype_s, target, pxref,
+ lit_text)
+ except NoUri:
+ xref = None
+
+ if xref:
+ return xref
+
+ return target_text
+
def markup_c_ref(docname, app, match):
- class_str = {RE_function: 'c-func', RE_type: 'c-type'}
- reftype_str = {RE_function: 'function', RE_type: 'type'}
+ class_str = {# Sphinx 2 only
+ RE_function: 'c-func',
+ RE_generic_type: 'c-type',
+ # Sphinx 3+ only
+ RE_struct: 'c-struct',
+ RE_union: 'c-union',
+ RE_enum: 'c-enum',
+ RE_typedef: 'c-type',
+ }
+ reftype_str = {# Sphinx 2 only
+ RE_function: 'function',
+ RE_generic_type: 'type',
+ # Sphinx 3+ only
+ RE_struct: 'struct',
+ RE_union: 'union',
+ RE_enum: 'enum',
+ RE_typedef: 'type',
+ }
cdom = app.env.domains['c']
#
@@ -89,7 +174,8 @@ def markup_c_ref(docname, app, match):
target = match.group(2)
target_text = nodes.Text(match.group(0))
xref = None
- if not (match.re == RE_function and target in Skipfuncs):
+ if not ((match.re == RE_function and target in Skipfuncs)
+ or (target in Skipnames)):
lit_text = nodes.literal(classes=['xref', 'c', class_str[match.re]])
lit_text += target_text
pxref = addnodes.pending_xref('', refdomain = 'c',
diff --git a/Documentation/sphinx/cdomain.py b/Documentation/sphinx/cdomain.py
index cbac8e608dc4..014a5229e57a 100644
--- a/Documentation/sphinx/cdomain.py
+++ b/Documentation/sphinx/cdomain.py
@@ -40,14 +40,94 @@ from sphinx import addnodes
from sphinx.domains.c import c_funcptr_sig_re, c_sig_re
from sphinx.domains.c import CObject as Base_CObject
from sphinx.domains.c import CDomain as Base_CDomain
+from itertools import chain
+import re
-__version__ = '1.0'
+__version__ = '1.1'
# Get Sphinx version
major, minor, patch = sphinx.version_info[:3]
+# Namespace to be prepended to the full name
+namespace = None
+
+#
+# Handle trivial newer c domain tags that are part of Sphinx 3.1 c domain tags
+# - Store the namespace if ".. c:namespace::" tag is found
+#
+RE_namespace = re.compile(r'^\s*..\s*c:namespace::\s*(\S+)\s*$')
+
+def markup_namespace(match):
+ global namespace
+
+ namespace = match.group(1)
+
+ return ""
+
+#
+# Handle c:macro for function-style declaration
+#
+RE_macro = re.compile(r'^\s*..\s*c:macro::\s*(\S+)\s+(\S.*)\s*$')
+def markup_macro(match):
+ return ".. c:function:: " + match.group(1) + ' ' + match.group(2)
+
+#
+# Handle newer c domain tags that are evaluated as .. c:type: for
+# backward-compatibility with Sphinx < 3.0
+#
+RE_ctype = re.compile(r'^\s*..\s*c:(struct|union|enum|enumerator|alias)::\s*(.*)$')
+
+def markup_ctype(match):
+ return ".. c:type:: " + match.group(2)
+
+#
+# Handle newer c domain tags that are evaluated as :c:type: for
+# backward-compatibility with Sphinx < 3.0
+#
+RE_ctype_refs = re.compile(r':c:(var|struct|union|enum|enumerator)::`([^\`]+)`')
+def markup_ctype_refs(match):
+ return ":c:type:`" + match.group(2) + '`'
+
+#
+# Simply convert :c:expr: and :c:texpr: into a literal block.
+#
+RE_expr = re.compile(r':c:(expr|texpr):`([^\`]+)`')
+def markup_c_expr(match):
+ return '\ ``' + match.group(2) + '``\ '
+
+#
+# Parse Sphinx 3.x C markups, replacing them by backward-compatible ones
+#
+def c_markups(app, docname, source):
+ result = ""
+ markup_func = {
+ RE_namespace: markup_namespace,
+ RE_expr: markup_c_expr,
+ RE_macro: markup_macro,
+ RE_ctype: markup_ctype,
+ RE_ctype_refs: markup_ctype_refs,
+ }
+
+ lines = iter(source[0].splitlines(True))
+ for n in lines:
+ match_iterators = [regex.finditer(n) for regex in markup_func]
+ matches = sorted(chain(*match_iterators), key=lambda m: m.start())
+ for m in matches:
+ n = n[:m.start()] + markup_func[m.re](m) + n[m.end():]
+
+ result = result + n
+
+ source[0] = result
+
+#
+# Now implements support for the cdomain namespacing logic
+#
+
def setup(app):
+ # Handle easy Sphinx 3.1+ simple new tags: :c:expr and .. c:namespace::
+ app.connect('source-read', c_markups)
+
if (major == 1 and minor < 8):
app.override_domain(CDomain)
else:
@@ -75,6 +155,8 @@ class CObject(Base_CObject):
function-like macro, the name of the macro is returned. Otherwise
``False`` is returned. """
+ global namespace
+
if not self.objtype == 'function':
return False
@@ -107,11 +189,16 @@ class CObject(Base_CObject):
param += nodes.emphasis(argname, argname)
paramlist += param
+ if namespace:
+ fullname = namespace + "." + fullname
+
return fullname
def handle_signature(self, sig, signode):
"""Transform a C signature into RST nodes."""
+ global namespace
+
fullname = self.handle_func_like_macro(sig, signode)
if not fullname:
fullname = super(CObject, self).handle_signature(sig, signode)
@@ -122,6 +209,10 @@ class CObject(Base_CObject):
else:
# FIXME: handle :name: value of other declaration types?
pass
+ else:
+ if namespace:
+ fullname = namespace + "." + fullname
+
return fullname
def add_target_and_index(self, name, sig, signode):
diff --git a/Documentation/sphinx/kerneldoc.py b/Documentation/sphinx/kerneldoc.py
index 4bcbd6ae01cd..e9857ab904f1 100644
--- a/Documentation/sphinx/kerneldoc.py
+++ b/Documentation/sphinx/kerneldoc.py
@@ -62,6 +62,7 @@ class KernelDocDirective(Directive):
'export': directives.unchanged,
'internal': directives.unchanged,
'identifiers': directives.unchanged,
+ 'no-identifiers': directives.unchanged,
'functions': directives.unchanged,
}
has_content = False
@@ -70,6 +71,11 @@ class KernelDocDirective(Directive):
env = self.state.document.settings.env
cmd = [env.config.kerneldoc_bin, '-rst', '-enable-lineno']
+ # Pass the version string to kernel-doc, as it needs to use a different
+ # dialect, depending what the C domain supports for each specific
+ # Sphinx versions
+ cmd += ['-sphinx-version', sphinx.__version__]
+
filename = env.config.kerneldoc_srctree + '/' + self.arguments[0]
export_file_patterns = []
@@ -99,6 +105,12 @@ class KernelDocDirective(Directive):
else:
cmd += ['-no-doc-sections']
+ if 'no-identifiers' in self.options:
+ no_identifiers = self.options.get('no-identifiers').split()
+ if no_identifiers:
+ for i in no_identifiers:
+ cmd += ['-nosymbol', i]
+
for pattern in export_file_patterns:
for f in glob.glob(env.config.kerneldoc_srctree + '/' + pattern):
env.note_dependency(os.path.abspath(f))
@@ -136,7 +148,8 @@ class KernelDocDirective(Directive):
lineoffset = int(match.group(1)) - 1
# we must eat our comments since the upset the markup
else:
- result.append(line, filename, lineoffset)
+ doc = env.srcdir + "/" + env.docname + ":" + str(self.lineno)
+ result.append(line, doc + ": " + filename, lineoffset)
lineoffset += 1
node = nodes.section()
diff --git a/Documentation/sphinx/parse-headers.pl b/Documentation/sphinx/parse-headers.pl
index 00a69aceff44..1910079f984f 100755
--- a/Documentation/sphinx/parse-headers.pl
+++ b/Documentation/sphinx/parse-headers.pl
@@ -110,7 +110,7 @@ while (<IN>) {
) {
my $s = $1;
- $structs{$s} = "struct :c:type:`$s`\\ ";
+ $structs{$s} = "struct $s\\ ";
next;
}
}
diff --git a/Documentation/trace/ftrace-uses.rst b/Documentation/trace/ftrace-uses.rst
index 2a05e770618a..a4955f7e3d19 100644
--- a/Documentation/trace/ftrace-uses.rst
+++ b/Documentation/trace/ftrace-uses.rst
@@ -55,17 +55,17 @@ an ftrace_ops with ftrace:
Both .flags and .private are optional. Only .func is required.
-To enable tracing call:
+To enable tracing call::
-.. c:function:: register_ftrace_function(&ops);
+ register_ftrace_function(&ops);
-To disable tracing call:
+To disable tracing call::
-.. c:function:: unregister_ftrace_function(&ops);
+ unregister_ftrace_function(&ops);
-The above is defined by including the header:
+The above is defined by including the header::
-.. c:function:: #include <linux/ftrace.h>
+ #include <linux/ftrace.h>
The registered callback will start being called some time after the
register_ftrace_function() is called and before it returns. The exact time
diff --git a/Documentation/translations/it_IT/kernel-hacking/hacking.rst b/Documentation/translations/it_IT/kernel-hacking/hacking.rst
index 6aab27a8d323..3d30b69f1ec1 100644
--- a/Documentation/translations/it_IT/kernel-hacking/hacking.rst
+++ b/Documentation/translations/it_IT/kernel-hacking/hacking.rst
@@ -402,7 +402,7 @@ il valore convertito. Tutte le varianti supportano anche il processo inverso:
:c:func:`be32_to_cpu()`, eccetera.
Queste funzioni hanno principalmente due varianti: la variante per
-puntatori, come :c:func:`cpu_to_be32p(), che prende un puntatore
+puntatori, come :c:func:`cpu_to_be32p()`, che prende un puntatore
ad un tipo, e ritorna il valore convertito. L'altra variante per
la famiglia di conversioni "in-situ", come :c:func:`cpu_to_be32s()`,
che convertono il valore puntato da un puntatore, e ritornano void.
diff --git a/Documentation/translations/it_IT/kernel-hacking/locking.rst b/Documentation/translations/it_IT/kernel-hacking/locking.rst
index 4615df5723fb..bf1acd6204ef 100644
--- a/Documentation/translations/it_IT/kernel-hacking/locking.rst
+++ b/Documentation/translations/it_IT/kernel-hacking/locking.rst
@@ -1,5 +1,7 @@
.. include:: ../disclaimer-ita.rst
+.. c:namespace:: it_IT
+
:Original: :ref:`Documentation/kernel-hacking/locking.rst <kernel_hacking_lock>`
:Translator: Federico Vaga <federico.vaga@vaga.pv.it>
diff --git a/Documentation/translations/zh_CN/arm64/amu.rst b/Documentation/translations/zh_CN/arm64/amu.rst
index bd875f221330..ab7180f91394 100644
--- a/Documentation/translations/zh_CN/arm64/amu.rst
+++ b/Documentation/translations/zh_CN/arm64/amu.rst
@@ -4,9 +4,9 @@
Translator: Bailu Lin <bailu.lin@vivo.com>
-=================================
+==================================
AArch64 Linux 中扩展的活动监控单元
-=================================
+==================================
作者: Ionela Voinescu <ionela.voinescu@arm.com>
diff --git a/Documentation/userspace-api/media/cec/cec-func-close.rst b/Documentation/userspace-api/media/cec/cec-func-close.rst
index 33c563f414a8..409e70a5f80f 100644
--- a/Documentation/userspace-api/media/cec/cec-func-close.rst
+++ b/Documentation/userspace-api/media/cec/cec-func-close.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: CEC
.. _cec-func-close:
@@ -11,7 +12,6 @@ Name
cec-close - Close a cec device
-
Synopsis
========
@@ -19,16 +19,13 @@ Synopsis
#include <unistd.h>
-
.. c:function:: int close( int fd )
- :name: cec-close
Arguments
=========
``fd``
- File descriptor returned by :c:func:`open() <cec-open>`.
-
+ File descriptor returned by :c:func:`open()`.
Description
===========
@@ -36,11 +33,10 @@ Description
Closes the cec device. Resources associated with the file descriptor are
freed. The device configuration remain unchanged.
-
Return Value
============
-:c:func:`close() <cec-close>` returns 0 on success. On error, -1 is returned, and
+:c:func:`close()` returns 0 on success. On error, -1 is returned, and
``errno`` is set appropriately. Possible error codes are:
``EBADF``
diff --git a/Documentation/userspace-api/media/cec/cec-func-ioctl.rst b/Documentation/userspace-api/media/cec/cec-func-ioctl.rst
index 3b88230fad80..7c93f86de6cc 100644
--- a/Documentation/userspace-api/media/cec/cec-func-ioctl.rst
+++ b/Documentation/userspace-api/media/cec/cec-func-ioctl.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: CEC
.. _cec-func-ioctl:
@@ -18,15 +19,13 @@ Synopsis
#include <sys/ioctl.h>
-
-.. c:function:: int ioctl( int fd, int request, void *argp )
- :name: cec-ioctl
+``int ioctl(int fd, int request, void *argp)``
Arguments
=========
``fd``
- File descriptor returned by :c:func:`open() <cec-open>`.
+ File descriptor returned by :c:func:`open()`.
``request``
CEC ioctl request code as defined in the cec.h header file, for
@@ -35,11 +34,10 @@ Arguments
``argp``
Pointer to a request-specific structure.
-
Description
===========
-The :c:func:`ioctl() <cec-ioctl>` function manipulates cec device parameters. The
+The :c:func:`ioctl()` function manipulates cec device parameters. The
argument ``fd`` must be an open file descriptor.
The ioctl ``request`` code specifies the cec function to be called. It
@@ -51,7 +49,6 @@ their parameters are located in the cec.h header file. All cec ioctl
requests, their respective function and parameters are specified in
:ref:`cec-user-func`.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/cec/cec-func-open.rst b/Documentation/userspace-api/media/cec/cec-func-open.rst
index 887bfd2a755e..d86563a34b9e 100644
--- a/Documentation/userspace-api/media/cec/cec-func-open.rst
+++ b/Documentation/userspace-api/media/cec/cec-func-open.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: CEC
.. _cec-func-open:
@@ -18,10 +19,7 @@ Synopsis
#include <fcntl.h>
-
.. c:function:: int open( const char *device_name, int flags )
- :name: cec-open
-
Arguments
=========
@@ -42,11 +40,10 @@ Arguments
Other flags have no effect.
-
Description
===========
-To open a cec device applications call :c:func:`open() <cec-open>` with the
+To open a cec device applications call :c:func:`open()` with the
desired device name. The function has no side effects; the device
configuration remain unchanged.
@@ -54,11 +51,10 @@ When the device is opened in read-only mode, attempts to modify its
configuration will result in an error, and ``errno`` will be set to
EBADF.
-
Return Value
============
-:c:func:`open() <cec-open>` returns the new file descriptor on success. On error,
+:c:func:`open()` returns the new file descriptor on success. On error,
-1 is returned, and ``errno`` is set appropriately. Possible error codes
include:
diff --git a/Documentation/userspace-api/media/cec/cec-func-poll.rst b/Documentation/userspace-api/media/cec/cec-func-poll.rst
index 2d87136e9a3f..980bbfc0bcce 100644
--- a/Documentation/userspace-api/media/cec/cec-func-poll.rst
+++ b/Documentation/userspace-api/media/cec/cec-func-poll.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: CEC
.. _cec-func-poll:
@@ -11,7 +12,6 @@ Name
cec-poll - Wait for some event on a file descriptor
-
Synopsis
========
@@ -19,9 +19,7 @@ Synopsis
#include <sys/poll.h>
-
.. c:function:: int poll( struct pollfd *ufds, unsigned int nfds, int timeout )
- :name: cec-poll
Arguments
=========
@@ -35,14 +33,13 @@ Arguments
``timeout``
Timeout to wait for events
-
Description
===========
-With the :c:func:`poll() <cec-poll>` function applications can wait for CEC
+With the :c:func:`poll()` function applications can wait for CEC
events.
-On success :c:func:`poll() <cec-poll>` returns the number of file descriptors
+On success :c:func:`poll()` returns the number of file descriptors
that have been selected (that is, file descriptors for which the
``revents`` field of the respective struct :c:type:`pollfd`
is non-zero). CEC devices set the ``POLLIN`` and ``POLLRDNORM`` flags in
@@ -53,13 +50,12 @@ then the ``POLLPRI`` flag is set. When the function times out it returns
a value of zero, on failure it returns -1 and the ``errno`` variable is
set appropriately.
-For more details see the :c:func:`poll() <cec-poll>` manual page.
-
+For more details see the :c:func:`poll()` manual page.
Return Value
============
-On success, :c:func:`poll() <cec-poll>` returns the number structures which have
+On success, :c:func:`poll()` returns the number structures which have
non-zero ``revents`` fields, or zero if the call timed out. On error -1
is returned, and the ``errno`` variable is set appropriately:
diff --git a/Documentation/userspace-api/media/cec/cec-ioc-adap-g-caps.rst b/Documentation/userspace-api/media/cec/cec-ioc-adap-g-caps.rst
index 7f25365ce0fb..c7309a2fcbce 100644
--- a/Documentation/userspace-api/media/cec/cec-ioc-adap-g-caps.rst
+++ b/Documentation/userspace-api/media/cec/cec-ioc-adap-g-caps.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: CEC
.. _CEC_ADAP_G_CAPS:
@@ -14,18 +15,18 @@ CEC_ADAP_G_CAPS - Query device capabilities
Synopsis
========
-.. c:function:: int ioctl( int fd, CEC_ADAP_G_CAPS, struct cec_caps *argp )
- :name: CEC_ADAP_G_CAPS
+.. c:macro:: CEC_ADAP_G_CAPS
+
+``int ioctl(int fd, CEC_ADAP_G_CAPS, struct cec_caps *argp)``
Arguments
=========
``fd``
- File descriptor returned by :c:func:`open() <cec-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
-
Description
===========
@@ -62,7 +63,6 @@ returns the information to the application. The ioctl never fails.
- CEC Framework API version, formatted with the ``KERNEL_VERSION()``
macro.
-
.. tabularcolumns:: |p{4.4cm}|p{2.5cm}|p{10.6cm}|
.. _cec-capabilities:
diff --git a/Documentation/userspace-api/media/cec/cec-ioc-adap-g-conn-info.rst b/Documentation/userspace-api/media/cec/cec-ioc-adap-g-conn-info.rst
index 6818ddf1495c..13116b0b5c17 100644
--- a/Documentation/userspace-api/media/cec/cec-ioc-adap-g-conn-info.rst
+++ b/Documentation/userspace-api/media/cec/cec-ioc-adap-g-conn-info.rst
@@ -2,6 +2,8 @@
..
.. Copyright 2019 Google LLC
..
+.. c:namespace:: CEC
+
.. _CEC_ADAP_G_CONNECTOR_INFO:
*******************************
@@ -16,18 +18,18 @@ CEC_ADAP_G_CONNECTOR_INFO - Query HDMI connector information
Synopsis
========
-.. c:function:: int ioctl( int fd, CEC_ADAP_G_CONNECTOR_INFO, struct cec_connector_info *argp )
- :name: CEC_ADAP_G_CONNECTOR_INFO
+.. c:macro:: CEC_ADAP_G_CONNECTOR_INFO
+
+``int ioctl(int fd, CEC_ADAP_G_CONNECTOR_INFO, struct cec_connector_info *argp)``
Arguments
=========
``fd``
- File descriptor returned by :c:func:`open() <cec-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
-
Description
===========
@@ -57,7 +59,6 @@ is only available if the ``CEC_CAP_CONNECTOR_INFO`` capability is set.
* - }
-
-
.. tabularcolumns:: |p{4.4cm}|p{2.5cm}|p{10.6cm}|
.. _connector-type:
diff --git a/Documentation/userspace-api/media/cec/cec-ioc-adap-g-log-addrs.rst b/Documentation/userspace-api/media/cec/cec-ioc-adap-g-log-addrs.rst
index 1ca893270ae9..c760c07b6b3f 100644
--- a/Documentation/userspace-api/media/cec/cec-ioc-adap-g-log-addrs.rst
+++ b/Documentation/userspace-api/media/cec/cec-ioc-adap-g-log-addrs.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: CEC
.. _CEC_ADAP_LOG_ADDRS:
.. _CEC_ADAP_G_LOG_ADDRS:
@@ -13,21 +14,22 @@ Name
CEC_ADAP_G_LOG_ADDRS, CEC_ADAP_S_LOG_ADDRS - Get or set the logical addresses
-
Synopsis
========
-.. c:function:: int ioctl( int fd, CEC_ADAP_G_LOG_ADDRS, struct cec_log_addrs *argp )
- :name: CEC_ADAP_G_LOG_ADDRS
+.. c:macro:: CEC_ADAP_G_LOG_ADDRS
+
+``int ioctl(int fd, CEC_ADAP_G_LOG_ADDRS, struct cec_log_addrs *argp)``
+
+.. c:macro:: CEC_ADAP_S_LOG_ADDRS
-.. c:function:: int ioctl( int fd, CEC_ADAP_S_LOG_ADDRS, struct cec_log_addrs *argp )
- :name: CEC_ADAP_S_LOG_ADDRS
+``int ioctl(int fd, CEC_ADAP_S_LOG_ADDRS, struct cec_log_addrs *argp)``
Arguments
=========
``fd``
- File descriptor returned by :c:func:`open() <cec-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`cec_log_addrs`.
@@ -148,7 +150,6 @@ logical address types are already defined will return with error ``EBUSY``.
give the CEC framework more information about the device type, even
though the framework won't use it directly in the CEC message.
-
.. tabularcolumns:: |p{7.8cm}|p{1.0cm}|p{8.7cm}|
.. _cec-log-addrs-flags:
@@ -185,7 +186,6 @@ logical address types are already defined will return with error ``EBUSY``.
All other messages are ignored.
-
.. tabularcolumns:: |p{7.8cm}|p{1.0cm}|p{8.7cm}|
.. _cec-versions:
@@ -211,7 +211,6 @@ logical address types are already defined will return with error ``EBUSY``.
- 6
- CEC version according to the HDMI 2.0 standard.
-
.. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
.. _cec-prim-dev-types:
@@ -257,7 +256,6 @@ logical address types are already defined will return with error ``EBUSY``.
- 7
- Use for a video processor device.
-
.. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
.. _cec-log-addr-types:
@@ -306,7 +304,6 @@ logical address types are already defined will return with error ``EBUSY``.
Control).
-
.. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
.. _cec-all-dev-types-flags:
@@ -348,7 +345,6 @@ logical address types are already defined will return with error ``EBUSY``.
- This supports the CEC Switch or Video Processing type.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/cec/cec-ioc-adap-g-phys-addr.rst b/Documentation/userspace-api/media/cec/cec-ioc-adap-g-phys-addr.rst
index a10443be1b26..fb22f6894f26 100644
--- a/Documentation/userspace-api/media/cec/cec-ioc-adap-g-phys-addr.rst
+++ b/Documentation/userspace-api/media/cec/cec-ioc-adap-g-phys-addr.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: CEC
.. _CEC_ADAP_PHYS_ADDR:
.. _CEC_ADAP_G_PHYS_ADDR:
@@ -13,21 +14,22 @@ Name
CEC_ADAP_G_PHYS_ADDR, CEC_ADAP_S_PHYS_ADDR - Get or set the physical address
-
Synopsis
========
-.. c:function:: int ioctl( int fd, CEC_ADAP_G_PHYS_ADDR, __u16 *argp )
- :name: CEC_ADAP_G_PHYS_ADDR
+.. c:macro:: CEC_ADAP_G_PHYS_ADDR
+
+``int ioctl(int fd, CEC_ADAP_G_PHYS_ADDR, __u16 *argp)``
-.. c:function:: int ioctl( int fd, CEC_ADAP_S_PHYS_ADDR, __u16 *argp )
- :name: CEC_ADAP_S_PHYS_ADDR
+.. c:macro:: CEC_ADAP_S_PHYS_ADDR
+
+``int ioctl(int fd, CEC_ADAP_S_PHYS_ADDR, __u16 *argp)``
Arguments
=========
``fd``
- File descriptor returned by :c:func:`open() <cec-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to the CEC address.
@@ -71,7 +73,6 @@ For example, the EDID for each HDMI input of the TV will have a
different physical address of the form a.0.0.0 that the sources will
read out and use as their physical address.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/cec/cec-ioc-dqevent.rst b/Documentation/userspace-api/media/cec/cec-ioc-dqevent.rst
index 3bc81fc5a73f..736fda5ad73d 100644
--- a/Documentation/userspace-api/media/cec/cec-ioc-dqevent.rst
+++ b/Documentation/userspace-api/media/cec/cec-ioc-dqevent.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: CEC
.. _CEC_DQEVENT:
@@ -11,22 +12,21 @@ Name
CEC_DQEVENT - Dequeue a CEC event
-
Synopsis
========
-.. c:function:: int ioctl( int fd, CEC_DQEVENT, struct cec_event *argp )
- :name: CEC_DQEVENT
+.. c:macro:: CEC_DQEVENT
+
+``int ioctl(int fd, CEC_DQEVENT, struct cec_event *argp)``
Arguments
=========
``fd``
- File descriptor returned by :c:func:`open() <cec-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
-
Description
===========
@@ -72,7 +72,6 @@ it is guaranteed that the state did change in between the two events.
the HDMI driver is still configuring the device or because the HDMI
device was unbound.
-
.. c:type:: cec_event_lost_msgs
.. tabularcolumns:: |p{1.0cm}|p{2.0cm}|p{14.5cm}|
@@ -94,7 +93,6 @@ it is guaranteed that the state did change in between the two events.
replied to within a second according to the CEC specification,
this is more than enough.
-
.. tabularcolumns:: |p{1.0cm}|p{4.4cm}|p{2.5cm}|p{9.6cm}|
.. c:type:: cec_event
@@ -130,7 +128,6 @@ it is guaranteed that the state did change in between the two events.
* - }
-
-
.. tabularcolumns:: |p{5.6cm}|p{0.9cm}|p{11.0cm}|
.. _cec-events:
@@ -204,7 +201,6 @@ it is guaranteed that the state did change in between the two events.
if the 5V is high, then an initial event will be generated for that
filehandle.
-
.. tabularcolumns:: |p{6.0cm}|p{0.6cm}|p{10.9cm}|
.. _cec-event-flags:
@@ -230,7 +226,6 @@ it is guaranteed that the state did change in between the two events.
This is an indication that the application cannot keep up.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/cec/cec-ioc-g-mode.rst b/Documentation/userspace-api/media/cec/cec-ioc-g-mode.rst
index 2093e373c93c..d3387b1fa7c5 100644
--- a/Documentation/userspace-api/media/cec/cec-ioc-g-mode.rst
+++ b/Documentation/userspace-api/media/cec/cec-ioc-g-mode.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: CEC
.. _CEC_MODE:
.. _CEC_G_MODE:
@@ -13,17 +14,19 @@ CEC_G_MODE, CEC_S_MODE - Get or set exclusive use of the CEC adapter
Synopsis
========
-.. c:function:: int ioctl( int fd, CEC_G_MODE, __u32 *argp )
- :name: CEC_G_MODE
+.. c:macro:: CEC_G_MODE
-.. c:function:: int ioctl( int fd, CEC_S_MODE, __u32 *argp )
- :name: CEC_S_MODE
+``int ioctl(int fd, CEC_G_MODE, __u32 *argp)``
+
+.. c:macro:: CEC_S_MODE
+
+``int ioctl(int fd, CEC_S_MODE, __u32 *argp)``
Arguments
=========
``fd``
- File descriptor returned by :c:func:`open() <cec-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to CEC mode.
@@ -101,7 +104,6 @@ Available initiator modes are:
then an attempt to become one will return the ``EBUSY`` error code
error.
-
Available follower modes are:
.. tabularcolumns:: |p{6.6cm}|p{0.9cm}|p{10.0cm}|
@@ -193,7 +195,6 @@ Available follower modes are:
the process has the ``CAP_NET_ADMIN`` capability. If that is not
set, then the ``EPERM`` error code is returned.
-
Core message processing details:
.. tabularcolumns:: |p{6.6cm}|p{10.9cm}|
@@ -272,7 +273,6 @@ Core message processing details:
and then just pass the message on to the follower(s).
-
Return Value
============
diff --git a/Documentation/userspace-api/media/cec/cec-ioc-receive.rst b/Documentation/userspace-api/media/cec/cec-ioc-receive.rst
index 9d629d46973c..b2fc051e99f4 100644
--- a/Documentation/userspace-api/media/cec/cec-ioc-receive.rst
+++ b/Documentation/userspace-api/media/cec/cec-ioc-receive.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: CEC
.. _CEC_TRANSMIT:
.. _CEC_RECEIVE:
@@ -12,21 +13,22 @@ Name
CEC_RECEIVE, CEC_TRANSMIT - Receive or transmit a CEC message
-
Synopsis
========
-.. c:function:: int ioctl( int fd, CEC_RECEIVE, struct cec_msg \*argp )
- :name: CEC_RECEIVE
+.. c:macro:: CEC_RECEIVE
+
+``int ioctl(int fd, CEC_RECEIVE, struct cec_msg *argp)``
-.. c:function:: int ioctl( int fd, CEC_TRANSMIT, struct cec_msg \*argp )
- :name: CEC_TRANSMIT
+.. c:macro:: CEC_TRANSMIT
+
+``int ioctl(int fd, CEC_TRANSMIT, struct cec_msg *argp)``
Arguments
=========
``fd``
- File descriptor returned by :c:func:`open() <cec-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct cec_msg.
@@ -194,7 +196,6 @@ View On' messages from initiator 0xf ('Unregistered') to destination 0 ('TV').
supports this, otherwise it is always 0. This counter is only
valid if the :ref:`CEC_TX_STATUS_ERROR <CEC-TX-STATUS-ERROR>` status bit is set.
-
.. tabularcolumns:: |p{6.2cm}|p{1.0cm}|p{10.3cm}|
.. _cec-msg-flags:
@@ -228,7 +229,6 @@ View On' messages from initiator 0xf ('Unregistered') to destination 0 ('TV').
capability. If that is not set, then the ``EPERM`` error code is
returned.
-
.. tabularcolumns:: |p{5.6cm}|p{0.9cm}|p{11.0cm}|
.. _cec-tx-status:
@@ -298,7 +298,6 @@ View On' messages from initiator 0xf ('Unregistered') to destination 0 ('TV').
- The transmit timed out. This should not normally happen and this
indicates a driver problem.
-
.. tabularcolumns:: |p{5.6cm}|p{0.9cm}|p{11.0cm}|
.. _cec-rx-status:
@@ -335,7 +334,6 @@ View On' messages from initiator 0xf ('Unregistered') to destination 0 ('TV').
reply was interrupted.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/dvb/audio-bilingual-channel-select.rst b/Documentation/userspace-api/media/dvb/audio-bilingual-channel-select.rst
index ba4f48b30d29..33b5363317f1 100644
--- a/Documentation/userspace-api/media/dvb/audio-bilingual-channel-select.rst
+++ b/Documentation/userspace-api/media/dvb/audio-bilingual-channel-select.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.audio
.. _AUDIO_BILINGUAL_CHANNEL_SELECT:
@@ -16,9 +17,9 @@ AUDIO_BILINGUAL_CHANNEL_SELECT
Synopsis
--------
-.. c:function:: int ioctl(int fd, AUDIO_BILINGUAL_CHANNEL_SELECT, struct *audio_channel_select)
- :name: AUDIO_BILINGUAL_CHANNEL_SELECT
+.. c:macro:: AUDIO_BILINGUAL_CHANNEL_SELECT
+``int ioctl(int fd, AUDIO_BILINGUAL_CHANNEL_SELECT, struct audio_channel_select *select)``
Arguments
---------
@@ -39,7 +40,6 @@ Arguments
- Select the output format of the audio (mono left/right, stereo).
-
Description
-----------
@@ -50,7 +50,6 @@ for MPEG decoders controlled through V4L2.
This ioctl call asks the Audio Device to select the requested channel
for bilingual streams if possible.
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/audio-channel-select.rst b/Documentation/userspace-api/media/dvb/audio-channel-select.rst
index ba83b639d955..74093df92a68 100644
--- a/Documentation/userspace-api/media/dvb/audio-channel-select.rst
+++ b/Documentation/userspace-api/media/dvb/audio-channel-select.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.audio
.. _AUDIO_CHANNEL_SELECT:
@@ -16,9 +17,9 @@ AUDIO_CHANNEL_SELECT
Synopsis
--------
-.. c:function:: int ioctl(int fd, AUDIO_CHANNEL_SELECT, struct *audio_channel_select)
- :name: AUDIO_CHANNEL_SELECT
+.. c:macro:: AUDIO_CHANNEL_SELECT
+``int ioctl(int fd, AUDIO_CHANNEL_SELECT, struct audio_channel_select *select)``
Arguments
---------
@@ -27,7 +28,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
-
- int fd
@@ -40,7 +40,6 @@ Arguments
- Select the output format of the audio (mono left/right, stereo).
-
Description
-----------
@@ -50,7 +49,6 @@ V4L2 ``V4L2_CID_MPEG_AUDIO_DEC_PLAYBACK`` control instead.
This ioctl call asks the Audio Device to select the requested channel if
possible.
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/audio-clear-buffer.rst b/Documentation/userspace-api/media/dvb/audio-clear-buffer.rst
index 7035a40c0e3a..a0ebb0278260 100644
--- a/Documentation/userspace-api/media/dvb/audio-clear-buffer.rst
+++ b/Documentation/userspace-api/media/dvb/audio-clear-buffer.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.audio
.. _AUDIO_CLEAR_BUFFER:
@@ -16,8 +17,9 @@ AUDIO_CLEAR_BUFFER
Synopsis
--------
-.. c:function:: int ioctl(int fd, AUDIO_CLEAR_BUFFER)
- :name: AUDIO_CLEAR_BUFFER
+.. c:macro:: AUDIO_CLEAR_BUFFER
+
+``int ioctl(int fd, AUDIO_CLEAR_BUFFER)``
Arguments
---------
@@ -26,7 +28,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- int fd
@@ -39,7 +40,6 @@ Description
This ioctl call asks the Audio Device to clear all software and hardware
buffers of the audio decoder device.
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/audio-continue.rst b/Documentation/userspace-api/media/dvb/audio-continue.rst
index c8d514a4eeb0..a2e9850f37f2 100644
--- a/Documentation/userspace-api/media/dvb/audio-continue.rst
+++ b/Documentation/userspace-api/media/dvb/audio-continue.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.audio
.. _AUDIO_CONTINUE:
@@ -16,9 +17,9 @@ AUDIO_CONTINUE
Synopsis
--------
-.. c:function:: int ioctl(int fd, AUDIO_CONTINUE)
- :name: AUDIO_CONTINUE
+.. c:macro:: AUDIO_CONTINUE
+``int ioctl(int fd, AUDIO_CONTINUE)``
Arguments
---------
@@ -27,7 +28,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- int fd
@@ -40,7 +40,6 @@ Description
This ioctl restarts the decoding and playing process previously paused
with AUDIO_PAUSE command.
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/audio-fclose.rst b/Documentation/userspace-api/media/dvb/audio-fclose.rst
index c968177b1e3f..77857d578e83 100644
--- a/Documentation/userspace-api/media/dvb/audio-fclose.rst
+++ b/Documentation/userspace-api/media/dvb/audio-fclose.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.audio
.. _audio_fclose:
@@ -17,8 +18,6 @@ Synopsis
--------
.. c:function:: int close(int fd)
- :name: dvb-audio-close
-
Arguments
---------
@@ -27,20 +26,17 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- int fd
- File descriptor returned by a previous call to open().
-
Description
-----------
This system call closes a previously opened audio device.
-
Return Value
------------
@@ -48,7 +44,6 @@ Return Value
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- ``EBADF``
diff --git a/Documentation/userspace-api/media/dvb/audio-fopen.rst b/Documentation/userspace-api/media/dvb/audio-fopen.rst
index d34001e038dc..774daaab3bad 100644
--- a/Documentation/userspace-api/media/dvb/audio-fopen.rst
+++ b/Documentation/userspace-api/media/dvb/audio-fopen.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.audio
.. _audio_fopen:
@@ -17,8 +18,6 @@ Synopsis
--------
.. c:function:: int open(const char *deviceName, int flags)
- :name: dvb-audio-open
-
Arguments
---------
@@ -27,7 +26,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- const char \*deviceName
@@ -60,7 +58,6 @@ Arguments
-
- (blocking mode is the default)
-
Description
-----------
@@ -78,7 +75,6 @@ fail, and an error code will be returned. If the Audio Device is opened
in O_RDONLY mode, the only ioctl call that can be used is
AUDIO_GET_STATUS. All other call will return with an error code.
-
Return Value
------------
@@ -88,7 +84,6 @@ Return Value
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- ``ENODEV``
diff --git a/Documentation/userspace-api/media/dvb/audio-fwrite.rst b/Documentation/userspace-api/media/dvb/audio-fwrite.rst
index d17ec719e231..7b096ac2b6c4 100644
--- a/Documentation/userspace-api/media/dvb/audio-fwrite.rst
+++ b/Documentation/userspace-api/media/dvb/audio-fwrite.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.audio
.. _audio_fwrite:
@@ -17,8 +18,6 @@ Synopsis
--------
.. c:function:: size_t write(int fd, const void *buf, size_t count)
- :name: dvb-audio-write
-
Arguments
---------
@@ -27,7 +26,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- int fd
@@ -46,7 +44,6 @@ Arguments
- Size of buf.
-
Description
-----------
@@ -56,7 +53,6 @@ PES format. If O_NONBLOCK is not specified the function will block
until buffer space is available. The amount of data to be transferred is
implied by count.
-
Return Value
------------
@@ -64,7 +60,6 @@ Return Value
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- ``EPERM``
diff --git a/Documentation/userspace-api/media/dvb/audio-get-capabilities.rst b/Documentation/userspace-api/media/dvb/audio-get-capabilities.rst
index 33907e40e48c..6d9eb71dad17 100644
--- a/Documentation/userspace-api/media/dvb/audio-get-capabilities.rst
+++ b/Documentation/userspace-api/media/dvb/audio-get-capabilities.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.audio
.. _AUDIO_GET_CAPABILITIES:
@@ -16,9 +17,9 @@ AUDIO_GET_CAPABILITIES
Synopsis
--------
-.. c:function:: int ioctl(int fd, AUDIO_GET_CAPABILITIES, unsigned int *cap)
- :name: AUDIO_GET_CAPABILITIES
+.. c:macro:: AUDIO_GET_CAPABILITIES
+``int ioctl(int fd, AUDIO_GET_CAPABILITIES, unsigned int *cap)``
Arguments
---------
@@ -27,7 +28,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
-
- int fd
@@ -40,14 +40,12 @@ Arguments
- Returns a bit array of supported sound formats.
-
Description
-----------
This ioctl call asks the Audio Device to tell us about the decoding
capabilities of the audio hardware.
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/audio-get-status.rst b/Documentation/userspace-api/media/dvb/audio-get-status.rst
index 4213d076c6ed..7ae8db2e65e9 100644
--- a/Documentation/userspace-api/media/dvb/audio-get-status.rst
+++ b/Documentation/userspace-api/media/dvb/audio-get-status.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.audio
.. _AUDIO_GET_STATUS:
@@ -16,9 +17,9 @@ AUDIO_GET_STATUS
Synopsis
--------
-.. c:function:: int ioctl(int fd, AUDIO_GET_STATUS, struct audio_status *status)
- :name: AUDIO_GET_STATUS
+.. c:macro:: AUDIO_GET_STATUS
+``int ioctl(int fd, AUDIO_GET_STATUS, struct audio_status *status)``
Arguments
---------
@@ -27,7 +28,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
-
- int fd
@@ -40,14 +40,12 @@ Arguments
- Returns the current state of Audio Device.
-
Description
-----------
This ioctl call asks the Audio Device to return the current state of the
Audio Device.
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/audio-pause.rst b/Documentation/userspace-api/media/dvb/audio-pause.rst
index 2de74f1662cf..d37d1ddce4df 100644
--- a/Documentation/userspace-api/media/dvb/audio-pause.rst
+++ b/Documentation/userspace-api/media/dvb/audio-pause.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.audio
.. _AUDIO_PAUSE:
@@ -16,8 +17,9 @@ AUDIO_PAUSE
Synopsis
--------
-.. c:function:: int ioctl(int fd, AUDIO_PAUSE)
- :name: AUDIO_PAUSE
+.. c:macro:: AUDIO_PAUSE
+
+``int ioctl(int fd, AUDIO_PAUSE)``
Arguments
---------
@@ -26,14 +28,12 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- int fd
- File descriptor returned by a previous call to open().
-
Description
-----------
@@ -41,7 +41,6 @@ This ioctl call suspends the audio stream being played. Decoding and
playing are paused. It is then possible to restart again decoding and
playing process of the audio stream using AUDIO_CONTINUE command.
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/audio-play.rst b/Documentation/userspace-api/media/dvb/audio-play.rst
index d4e4eacb8686..e591930b6ca7 100644
--- a/Documentation/userspace-api/media/dvb/audio-play.rst
+++ b/Documentation/userspace-api/media/dvb/audio-play.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.audio
.. _AUDIO_PLAY:
@@ -16,9 +17,9 @@ AUDIO_PLAY
Synopsis
--------
-.. c:function:: int ioctl(int fd, AUDIO_PLAY)
- :name: AUDIO_PLAY
+.. c:macro:: AUDIO_PLAY
+``int ioctl(int fd, AUDIO_PLAY)``
Arguments
---------
@@ -27,7 +28,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- int fd
@@ -40,7 +40,6 @@ Description
This ioctl call asks the Audio Device to start playing an audio stream
from the selected source.
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/audio-select-source.rst b/Documentation/userspace-api/media/dvb/audio-select-source.rst
index fb09f914cb8f..6a0c0f365eb1 100644
--- a/Documentation/userspace-api/media/dvb/audio-select-source.rst
+++ b/Documentation/userspace-api/media/dvb/audio-select-source.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.audio
.. _AUDIO_SELECT_SOURCE:
@@ -16,9 +17,9 @@ AUDIO_SELECT_SOURCE
Synopsis
--------
-.. c:function:: int ioctl(int fd, AUDIO_SELECT_SOURCE, struct audio_stream_source *source)
- :name: AUDIO_SELECT_SOURCE
+.. c:macro:: AUDIO_SELECT_SOURCE
+``int ioctl(int fd, AUDIO_SELECT_SOURCE, struct audio_stream_source *source)``
Arguments
---------
@@ -27,7 +28,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
-
- int fd
@@ -40,7 +40,6 @@ Arguments
- Indicates the source that shall be used for the Audio stream.
-
Description
-----------
@@ -49,7 +48,6 @@ the input data. The possible sources are demux or memory. If
AUDIO_SOURCE_MEMORY is selected, the data is fed to the Audio Device
through the write command.
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/audio-set-av-sync.rst b/Documentation/userspace-api/media/dvb/audio-set-av-sync.rst
index 5bcb9b1ed19d..85a8016bf025 100644
--- a/Documentation/userspace-api/media/dvb/audio-set-av-sync.rst
+++ b/Documentation/userspace-api/media/dvb/audio-set-av-sync.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.audio
.. _AUDIO_SET_AV_SYNC:
@@ -16,9 +17,9 @@ AUDIO_SET_AV_SYNC
Synopsis
--------
-.. c:function:: int ioctl(int fd, AUDIO_SET_AV_SYNC, boolean state)
- :name: AUDIO_SET_AV_SYNC
+.. c:macro:: AUDIO_SET_AV_SYNC
+``int ioctl(int fd, AUDIO_SET_AV_SYNC, boolean state)``
Arguments
---------
@@ -27,7 +28,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
-
- int fd
@@ -44,14 +44,12 @@ Arguments
FALSE: AV-sync OFF
-
Description
-----------
This ioctl call asks the Audio Device to turn ON or OFF A/V
synchronization.
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/audio-set-bypass-mode.rst b/Documentation/userspace-api/media/dvb/audio-set-bypass-mode.rst
index f24a18bbb567..ecac02f1b2fc 100644
--- a/Documentation/userspace-api/media/dvb/audio-set-bypass-mode.rst
+++ b/Documentation/userspace-api/media/dvb/audio-set-bypass-mode.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.audio
.. _AUDIO_SET_BYPASS_MODE:
@@ -16,8 +17,9 @@ AUDIO_SET_BYPASS_MODE
Synopsis
--------
-.. c:function:: int ioctl(int fd, AUDIO_SET_BYPASS_MODE, boolean mode)
- :name: AUDIO_SET_BYPASS_MODE
+.. c:macro:: AUDIO_SET_BYPASS_MODE
+
+``int ioctl(int fd, AUDIO_SET_BYPASS_MODE, boolean mode)``
Arguments
---------
@@ -26,7 +28,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
-
- int fd
@@ -44,7 +45,6 @@ Arguments
FALSE: Bypass is enabled
-
Description
-----------
@@ -54,7 +54,6 @@ that can’t be handled by the Digital TV system shall be decoded. Dolby
DigitalTM streams are automatically forwarded by the Digital TV subsystem if
the hardware can handle it.
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/audio-set-id.rst b/Documentation/userspace-api/media/dvb/audio-set-id.rst
index 0227e1071d0c..39ad846d412d 100644
--- a/Documentation/userspace-api/media/dvb/audio-set-id.rst
+++ b/Documentation/userspace-api/media/dvb/audio-set-id.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.audio
.. _AUDIO_SET_ID:
@@ -16,8 +17,9 @@ AUDIO_SET_ID
Synopsis
--------
-.. c:function:: int ioctl(int fd, AUDIO_SET_ID, int id)
- :name: AUDIO_SET_ID
+.. c:macro:: AUDIO_SET_ID
+
+``int ioctl(int fd, AUDIO_SET_ID, int id)``
Arguments
---------
@@ -26,7 +28,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
-
- int fd
@@ -39,7 +40,6 @@ Arguments
- audio sub-stream id
-
Description
-----------
@@ -51,7 +51,6 @@ other stream types. If the stream type is set the id just specifies the
substream id of the audio stream and only the first 5 bits are
recognized.
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/audio-set-mixer.rst b/Documentation/userspace-api/media/dvb/audio-set-mixer.rst
index 58f18cf8240d..45dbdf4801e0 100644
--- a/Documentation/userspace-api/media/dvb/audio-set-mixer.rst
+++ b/Documentation/userspace-api/media/dvb/audio-set-mixer.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.audio
.. _AUDIO_SET_MIXER:
@@ -16,8 +17,9 @@ AUDIO_SET_MIXER
Synopsis
--------
-.. c:function:: int ioctl(int fd, AUDIO_SET_MIXER, struct audio_mixer *mix)
- :name: AUDIO_SET_MIXER
+.. c:macro:: AUDIO_SET_MIXER
+
+``int ioctl(int fd, AUDIO_SET_MIXER, struct audio_mixer *mix)``
Arguments
---------
@@ -26,7 +28,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
-
- int fd
@@ -39,13 +40,11 @@ Arguments
- mixer settings.
-
Description
-----------
This ioctl lets you adjust the mixer settings of the audio decoder.
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/audio-set-mute.rst b/Documentation/userspace-api/media/dvb/audio-set-mute.rst
index 7ea7d8663e6b..987751f92967 100644
--- a/Documentation/userspace-api/media/dvb/audio-set-mute.rst
+++ b/Documentation/userspace-api/media/dvb/audio-set-mute.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.audio
.. _AUDIO_SET_MUTE:
@@ -16,9 +17,9 @@ AUDIO_SET_MUTE
Synopsis
--------
-.. c:function:: int ioctl(int fd, AUDIO_SET_MUTE, boolean state)
- :name: AUDIO_SET_MUTE
+.. c:macro:: AUDIO_SET_MUTE
+``int ioctl(int fd, AUDIO_SET_MUTE, boolean state)``
Arguments
---------
@@ -27,7 +28,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
-
- int fd
@@ -44,7 +44,6 @@ Arguments
FALSE: Audio Un-mute
-
Description
-----------
@@ -55,7 +54,6 @@ V4L2 :ref:`VIDIOC_DECODER_CMD` with the
This ioctl call asks the audio device to mute the stream that is
currently being played.
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/audio-set-streamtype.rst b/Documentation/userspace-api/media/dvb/audio-set-streamtype.rst
index d9f4924afdbe..77d73c74882f 100644
--- a/Documentation/userspace-api/media/dvb/audio-set-streamtype.rst
+++ b/Documentation/userspace-api/media/dvb/audio-set-streamtype.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.audio
.. _AUDIO_SET_STREAMTYPE:
@@ -16,9 +17,9 @@ AUDIO_SET_STREAMTYPE
Synopsis
--------
-.. c:function:: int ioctl(fd, AUDIO_SET_STREAMTYPE, int type)
- :name: AUDIO_SET_STREAMTYPE
+.. c:macro:: AUDIO_SET_STREAMTYPE
+``int ioctl(fd, AUDIO_SET_STREAMTYPE, int type)``
Arguments
---------
@@ -27,7 +28,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
-
- int fd
@@ -40,7 +40,6 @@ Arguments
- stream type
-
Description
-----------
@@ -48,7 +47,6 @@ This ioctl tells the driver which kind of audio stream to expect. This
is useful if the stream offers several audio sub-streams like LPCM and
AC3.
-
Return Value
------------
@@ -57,12 +55,10 @@ appropriately. The generic error codes are described at the
:ref:`Generic Error Codes <gen-errors>` chapter.
-
.. flat-table::
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- ``EINVAL``
diff --git a/Documentation/userspace-api/media/dvb/audio-stop.rst b/Documentation/userspace-api/media/dvb/audio-stop.rst
index 3a2bc328626d..d77f786fd797 100644
--- a/Documentation/userspace-api/media/dvb/audio-stop.rst
+++ b/Documentation/userspace-api/media/dvb/audio-stop.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.audio
.. _AUDIO_STOP:
@@ -16,8 +17,9 @@ AUDIO_STOP
Synopsis
--------
-.. c:function:: int ioctl(int fd, AUDIO_STOP)
- :name: AUDIO_STOP
+.. c:macro:: AUDIO_STOP
+
+``int ioctl(int fd, AUDIO_STOP)``
Arguments
---------
@@ -26,21 +28,18 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- int fd
- File descriptor returned by a previous call to open().
-
Description
-----------
This ioctl call asks the Audio Device to stop playing the current
stream.
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/ca-fclose.rst b/Documentation/userspace-api/media/dvb/ca-fclose.rst
index 00379ee7e9ed..27f217a350e7 100644
--- a/Documentation/userspace-api/media/dvb/ca-fclose.rst
+++ b/Documentation/userspace-api/media/dvb/ca-fclose.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.ca
.. _ca_fclose:
@@ -11,26 +12,22 @@ Name
Digital TV CA close()
-
Synopsis
--------
.. c:function:: int close(int fd)
- :name: dvb-ca-close
-
Arguments
---------
``fd``
- File descriptor returned by a previous call to :c:func:`open() <dvb-ca-open>`.
+ File descriptor returned by a previous call to :c:func:`open()`.
Description
-----------
This system call closes a previously opened CA device.
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/ca-fopen.rst b/Documentation/userspace-api/media/dvb/ca-fopen.rst
index 9ca4bd1d8d5f..7f99908fff2c 100644
--- a/Documentation/userspace-api/media/dvb/ca-fopen.rst
+++ b/Documentation/userspace-api/media/dvb/ca-fopen.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.ca
.. _ca_fopen:
@@ -11,13 +12,10 @@ Name
Digital TV CA open()
-
Synopsis
--------
.. c:function:: int open(const char *name, int flags)
- :name: dvb-ca-open
-
Arguments
---------
@@ -45,7 +43,6 @@ Arguments
- open in non-blocking mode
(blocking mode is the default)
-
Description
-----------
@@ -63,11 +60,9 @@ Only one user can open the CA Device in ``O_RDWR`` mode. All other
attempts to open the device in this mode will fail, and an error code
will be returned.
-
Return Value
------------
-
On success 0 is returned.
On error -1 is returned, and the ``errno`` variable is set
diff --git a/Documentation/userspace-api/media/dvb/ca-get-cap.rst b/Documentation/userspace-api/media/dvb/ca-get-cap.rst
index 93742a5d941d..9b29513eeda8 100644
--- a/Documentation/userspace-api/media/dvb/ca-get-cap.rst
+++ b/Documentation/userspace-api/media/dvb/ca-get-cap.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.ca
.. _CA_GET_CAP:
@@ -11,19 +12,18 @@ Name
CA_GET_CAP
-
Synopsis
--------
-.. c:function:: int ioctl(fd, CA_GET_CAP, struct ca_caps *caps)
- :name: CA_GET_CAP
+.. c:macro:: CA_GET_CAP
+``int ioctl(fd, CA_GET_CAP, struct ca_caps *caps)``
Arguments
---------
``fd``
- File descriptor returned by a previous call to :c:func:`open() <dvb-ca-open>`.
+ File descriptor returned by a previous call to :c:func:`open()`.
``caps``
Pointer to struct :c:type:`ca_caps`.
diff --git a/Documentation/userspace-api/media/dvb/ca-get-descr-info.rst b/Documentation/userspace-api/media/dvb/ca-get-descr-info.rst
index be7dec053685..0cfdcdab33a8 100644
--- a/Documentation/userspace-api/media/dvb/ca-get-descr-info.rst
+++ b/Documentation/userspace-api/media/dvb/ca-get-descr-info.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.ca
.. _CA_GET_DESCR_INFO:
@@ -11,18 +12,18 @@ Name
CA_GET_DESCR_INFO
-
Synopsis
--------
-.. c:function:: int ioctl(fd, CA_GET_DESCR_INFO, struct ca_descr_info *desc)
- :name: CA_GET_DESCR_INFO
+.. c:macro:: CA_GET_DESCR_INFO
+
+``int ioctl(fd, CA_GET_DESCR_INFO, struct ca_descr_info *desc)``
Arguments
---------
``fd``
- File descriptor returned by a previous call to :c:func:`open() <dvb-ca-open>`.
+ File descriptor returned by a previous call to :c:func:`open()`.
``desc``
Pointer to struct :c:type:`ca_descr_info`.
diff --git a/Documentation/userspace-api/media/dvb/ca-get-msg.rst b/Documentation/userspace-api/media/dvb/ca-get-msg.rst
index e8802b4c5fa1..7c9a8d197343 100644
--- a/Documentation/userspace-api/media/dvb/ca-get-msg.rst
+++ b/Documentation/userspace-api/media/dvb/ca-get-msg.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.ca
.. _CA_GET_MSG:
@@ -11,19 +12,18 @@ Name
CA_GET_MSG
-
Synopsis
--------
-.. c:function:: int ioctl(fd, CA_GET_MSG, struct ca_msg *msg)
- :name: CA_GET_MSG
+.. c:macro:: CA_GET_MSG
+``int ioctl(fd, CA_GET_MSG, struct ca_msg *msg)``
Arguments
---------
``fd``
- File descriptor returned by a previous call to :c:func:`open() <dvb-ca-open>`.
+ File descriptor returned by a previous call to :c:func:`open()`.
``msg``
Pointer to struct :c:type:`ca_msg`.
@@ -38,11 +38,9 @@ Receives a message via a CI CA module.
Please notice that, on most drivers, this is done by reading from
the /dev/adapter?/ca? device node.
-
Return Value
------------
-
On success 0 is returned.
On error -1 is returned, and the ``errno`` variable is set
diff --git a/Documentation/userspace-api/media/dvb/ca-get-slot-info.rst b/Documentation/userspace-api/media/dvb/ca-get-slot-info.rst
index d283df335914..582444af7003 100644
--- a/Documentation/userspace-api/media/dvb/ca-get-slot-info.rst
+++ b/Documentation/userspace-api/media/dvb/ca-get-slot-info.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.ca
.. _CA_GET_SLOT_INFO:
@@ -11,19 +12,18 @@ Name
CA_GET_SLOT_INFO
-
Synopsis
--------
-.. c:function:: int ioctl(fd, CA_GET_SLOT_INFO, struct ca_slot_info *info)
- :name: CA_GET_SLOT_INFO
+.. c:macro:: CA_GET_SLOT_INFO
+``int ioctl(fd, CA_GET_SLOT_INFO, struct ca_slot_info *info)``
Arguments
---------
``fd``
- File descriptor returned by a previous call to :c:func:`open() <cec-open>`.
+ File descriptor returned by a previous call to :c:func:`open()`.
``info``
Pointer to struct :c:type:`ca_slot_info`.
@@ -34,7 +34,6 @@ Description
Returns information about a CA slot identified by
:c:type:`ca_slot_info`.slot_num.
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/ca-reset.rst b/Documentation/userspace-api/media/dvb/ca-reset.rst
index fc49ef2ffcdb..b01ca48f0b50 100644
--- a/Documentation/userspace-api/media/dvb/ca-reset.rst
+++ b/Documentation/userspace-api/media/dvb/ca-reset.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.ca
.. _CA_RESET:
@@ -11,19 +12,18 @@ Name
CA_RESET
-
Synopsis
--------
-.. c:function:: int ioctl(fd, CA_RESET)
- :name: CA_RESET
+.. c:macro:: CA_RESET
+``int ioctl(fd, CA_RESET)``
Arguments
---------
``fd``
- File descriptor returned by a previous call to :c:func:`open() <cec-open>`.
+ File descriptor returned by a previous call to :c:func:`open()`.
Description
-----------
@@ -31,7 +31,6 @@ Description
Puts the Conditional Access hardware on its initial state. It should
be called before start using the CA hardware.
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/ca-send-msg.rst b/Documentation/userspace-api/media/dvb/ca-send-msg.rst
index cf423fe881b8..7dd2ab4ef675 100644
--- a/Documentation/userspace-api/media/dvb/ca-send-msg.rst
+++ b/Documentation/userspace-api/media/dvb/ca-send-msg.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.ca
.. _CA_SEND_MSG:
@@ -11,24 +12,22 @@ Name
CA_SEND_MSG
-
Synopsis
--------
-.. c:function:: int ioctl(fd, CA_SEND_MSG, struct ca_msg *msg)
- :name: CA_SEND_MSG
+.. c:macro:: CA_SEND_MSG
+``int ioctl(fd, CA_SEND_MSG, struct ca_msg *msg)``
Arguments
---------
``fd``
- File descriptor returned by a previous call to :c:func:`open() <cec-open>`.
+ File descriptor returned by a previous call to :c:func:`open()`.
``msg``
Pointer to struct :c:type:`ca_msg`.
-
Description
-----------
diff --git a/Documentation/userspace-api/media/dvb/ca-set-descr.rst b/Documentation/userspace-api/media/dvb/ca-set-descr.rst
index a5c628a4d3cd..a740af34c872 100644
--- a/Documentation/userspace-api/media/dvb/ca-set-descr.rst
+++ b/Documentation/userspace-api/media/dvb/ca-set-descr.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.ca
.. _CA_SET_DESCR:
@@ -11,19 +12,18 @@ Name
CA_SET_DESCR
-
Synopsis
--------
-.. c:function:: int ioctl(fd, CA_SET_DESCR, struct ca_descr *desc)
- :name: CA_SET_DESCR
+.. c:macro:: CA_SET_DESCR
+``int ioctl(fd, CA_SET_DESCR, struct ca_descr *desc)``
Arguments
---------
``fd``
- File descriptor returned by a previous call to :c:func:`open() <cec-open>`.
+ File descriptor returned by a previous call to :c:func:`open()`.
``msg``
Pointer to struct :c:type:`ca_descr`.
diff --git a/Documentation/userspace-api/media/dvb/dmx-add-pid.rst b/Documentation/userspace-api/media/dvb/dmx-add-pid.rst
index 3f08ecd88b0c..ea0c7dd91e05 100644
--- a/Documentation/userspace-api/media/dvb/dmx-add-pid.rst
+++ b/Documentation/userspace-api/media/dvb/dmx-add-pid.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.dmx
.. _DMX_ADD_PID:
@@ -11,24 +12,22 @@ Name
DMX_ADD_PID
-
Synopsis
--------
-.. c:function:: int ioctl(fd, DMX_ADD_PID, __u16 *pid)
- :name: DMX_ADD_PID
+.. c:macro:: DMX_ADD_PID
+``int ioctl(fd, DMX_ADD_PID, __u16 *pid)``
Arguments
---------
``fd``
- File descriptor returned by :c:func:`open() <dvb-dmx-open>`.
+ File descriptor returned by :c:func:`open()`.
``pid``
PID number to be filtered.
-
Description
-----------
@@ -36,7 +35,6 @@ This ioctl call allows to add multiple PIDs to a transport stream filter
previously set up with :ref:`DMX_SET_PES_FILTER` and output equal to
:c:type:`DMX_OUT_TSDEMUX_TAP <dmx_output>`.
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/dmx-expbuf.rst b/Documentation/userspace-api/media/dvb/dmx-expbuf.rst
index cde2b78a35fa..5cdc2035e3b7 100644
--- a/Documentation/userspace-api/media/dvb/dmx-expbuf.rst
+++ b/Documentation/userspace-api/media/dvb/dmx-expbuf.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.dmx
.. _DMX_EXPBUF:
@@ -13,24 +14,22 @@ DMX_EXPBUF - Export a buffer as a DMABUF file descriptor.
.. warning:: this API is still experimental
-
Synopsis
========
-.. c:function:: int ioctl( int fd, DMX_EXPBUF, struct dmx_exportbuffer *argp )
- :name: DMX_EXPBUF
+.. c:macro:: DMX_EXPBUF
+``int ioctl(int fd, DMX_EXPBUF, struct dmx_exportbuffer *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <dmx_fopen>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`dmx_exportbuffer`.
-
Description
===========
@@ -54,11 +53,9 @@ driver, on success. This is a DMABUF file descriptor. The application may
pass it to other DMABUF-aware devices. It is recommended to close a DMABUF
file when it is no longer used to allow the associated memory to be reclaimed.
-
Examples
========
-
.. code-block:: c
int buffer_export(int v4lfd, enum dmx_buf_type bt, int index, int *dmafd)
diff --git a/Documentation/userspace-api/media/dvb/dmx-fclose.rst b/Documentation/userspace-api/media/dvb/dmx-fclose.rst
index af036792f13d..719ac1d4f686 100644
--- a/Documentation/userspace-api/media/dvb/dmx-fclose.rst
+++ b/Documentation/userspace-api/media/dvb/dmx-fclose.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.dmx
.. _dmx_fclose:
@@ -11,27 +12,23 @@ Name
Digital TV demux close()
-
Synopsis
--------
.. c:function:: int close(int fd)
- :name: dvb-dmx-close
-
Arguments
---------
``fd``
File descriptor returned by a previous call to
- :c:func:`open() <dvb-dmx-open>`.
+ :c:func:`open()`.
Description
-----------
This system call deactivates and deallocates a filter that was
-previously allocated via the :c:func:`open() <dvb-dmx-open>` call.
-
+previously allocated via the :c:func:`open()` call.
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/dmx-fopen.rst b/Documentation/userspace-api/media/dvb/dmx-fopen.rst
index 7117c9bc4325..8f0a2b831d4a 100644
--- a/Documentation/userspace-api/media/dvb/dmx-fopen.rst
+++ b/Documentation/userspace-api/media/dvb/dmx-fopen.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.dmx
.. _dmx_fopen:
@@ -11,12 +12,10 @@ Name
Digital TV demux open()
-
Synopsis
--------
.. c:function:: int open(const char *deviceName, int flags)
- :name: dvb-dmx-open
Arguments
---------
@@ -47,7 +46,6 @@ Arguments
- open in non-blocking mode
(blocking mode is the default)
-
Description
-----------
@@ -68,7 +66,6 @@ affect the semantics of the ``open()`` call itself. A device opened
in blocking mode can later be put into non-blocking mode (and vice versa)
using the ``F_SETFL`` command of the fcntl system call.
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/dmx-fread.rst b/Documentation/userspace-api/media/dvb/dmx-fread.rst
index c708a240abae..78e9daef595a 100644
--- a/Documentation/userspace-api/media/dvb/dmx-fread.rst
+++ b/Documentation/userspace-api/media/dvb/dmx-fread.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.dmx
.. _dmx_fread:
@@ -11,18 +12,16 @@ Name
Digital TV demux read()
-
Synopsis
--------
.. c:function:: size_t read(int fd, void *buf, size_t count)
- :name: dvb-dmx-read
Arguments
---------
``fd``
- File descriptor returned by a previous call to :c:func:`open() <dvb-ca-open>`.
+ File descriptor returned by a previous call to :c:func:`open()`.
``buf``
Buffer to be filled
@@ -44,7 +43,6 @@ to be transferred is implied by count.
:c:type:`DMX_CHECK_CRC <dmx_sct_filter_params>` flag set,
data that fails on CRC check will be silently ignored.
-
Return Value
------------
@@ -75,6 +73,5 @@ appropriately.
- The driver failed to write to the callers buffer due to an
invalid \*buf pointer.
-
The generic error codes are described at the
:ref:`Generic Error Codes <gen-errors>` chapter.
diff --git a/Documentation/userspace-api/media/dvb/dmx-fwrite.rst b/Documentation/userspace-api/media/dvb/dmx-fwrite.rst
index bef565a35c59..e11ee0ba84a5 100644
--- a/Documentation/userspace-api/media/dvb/dmx-fwrite.rst
+++ b/Documentation/userspace-api/media/dvb/dmx-fwrite.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.dmx
.. _dmx_fwrite:
@@ -11,18 +12,16 @@ Name
Digital TV demux write()
-
Synopsis
--------
.. c:function:: ssize_t write(int fd, const void *buf, size_t count)
- :name: dvb-dmx-write
Arguments
---------
``fd``
- File descriptor returned by a previous call to :c:func:`open() <dvb-ca-open>`.
+ File descriptor returned by a previous call to :c:func:`open()`.
``buf``
Buffer with data to be written
@@ -40,7 +39,6 @@ digitally recorded Transport Stream. Matching filters have to be defined
in the corresponding physical demux device, ``/dev/dvb/adapter?/demux?``.
The amount of data to be transferred is implied by count.
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/dmx-get-pes-pids.rst b/Documentation/userspace-api/media/dvb/dmx-get-pes-pids.rst
index e92d94d93b18..4f5f0505c0d5 100644
--- a/Documentation/userspace-api/media/dvb/dmx-get-pes-pids.rst
+++ b/Documentation/userspace-api/media/dvb/dmx-get-pes-pids.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.dmx
.. _DMX_GET_PES_PIDS:
@@ -11,23 +12,22 @@ Name
DMX_GET_PES_PIDS
-
Synopsis
--------
-.. c:function:: int ioctl(fd, DMX_GET_PES_PIDS, __u16 pids[5])
- :name: DMX_GET_PES_PIDS
+.. c:macro:: DMX_GET_PES_PIDS
+
+``int ioctl(fd, DMX_GET_PES_PIDS, __u16 pids[5])``
Arguments
---------
``fd``
- File descriptor returned by :c:func:`open() <dvb-dmx-open>`.
+ File descriptor returned by :c:func:`open()`.
``pids``
Array used to store 5 Program IDs.
-
Description
-----------
@@ -45,13 +45,11 @@ pids[DMX_PES_SUBTITLE] 3 first subtitle PID
pids[DMX_PES_PCR] 4 first Program Clock Reference PID
======================= ======== =======================================
-
.. note::
A value equal to 0xffff means that the PID was not filled by the
Kernel.
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/dmx-get-stc.rst b/Documentation/userspace-api/media/dvb/dmx-get-stc.rst
index 3762efcf1355..6ada74f6eb18 100644
--- a/Documentation/userspace-api/media/dvb/dmx-get-stc.rst
+++ b/Documentation/userspace-api/media/dvb/dmx-get-stc.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.dmx
.. _DMX_GET_STC:
@@ -11,23 +12,22 @@ Name
DMX_GET_STC
-
Synopsis
--------
-.. c:function:: int ioctl( int fd, DMX_GET_STC, struct dmx_stc *stc)
- :name: DMX_GET_STC
+.. c:macro:: DMX_GET_STC
+
+``int ioctl(int fd, DMX_GET_STC, struct dmx_stc *stc)``
Arguments
---------
``fd``
- File descriptor returned by :c:func:`open() <dvb-dmx-open>`.
+ File descriptor returned by :c:func:`open()`.
``stc``
Pointer to :c:type:`dmx_stc` where the stc data is to be stored.
-
Description
-----------
@@ -39,7 +39,6 @@ The result is returned in form of a ratio with a 64 bit numerator
and a 32 bit denominator, so the real 90kHz STC value is
``stc->stc / stc->base``.
-
Return Value
------------
@@ -61,6 +60,5 @@ appropriately.
- Invalid stc number.
-
The generic error codes are described at the
:ref:`Generic Error Codes <gen-errors>` chapter.
diff --git a/Documentation/userspace-api/media/dvb/dmx-mmap.rst b/Documentation/userspace-api/media/dvb/dmx-mmap.rst
index efa9b04be700..8826c6226fb0 100644
--- a/Documentation/userspace-api/media/dvb/dmx-mmap.rst
+++ b/Documentation/userspace-api/media/dvb/dmx-mmap.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.dmx
.. _dmx-mmap:
@@ -21,9 +22,7 @@ Synopsis
#include <unistd.h>
#include <sys/mman.h>
-
.. c:function:: void *mmap( void *start, size_t length, int prot, int flags, int fd, off_t offset )
- :name: dmx-mmap
Arguments
=========
@@ -54,7 +53,7 @@ Arguments
``MAP_FIXED`` requests that the driver selects no other address than
the one specified. If the specified address cannot be used,
- :ref:`mmap() <dmx-mmap>` will fail. If ``MAP_FIXED`` is specified,
+ :c:func:`mmap()` will fail. If ``MAP_FIXED`` is specified,
``start`` must be a multiple of the pagesize. Use of this option is
discouraged.
@@ -69,17 +68,16 @@ Arguments
flags.
``fd``
- File descriptor returned by :ref:`open() <dmx_fopen>`.
+ File descriptor returned by :c:func:`open()`.
``offset``
Offset of the buffer in device memory, as returned by
:ref:`DMX_QUERYBUF` ioctl.
-
Description
===========
-The :ref:`mmap() <dmx-mmap>` function asks to map ``length`` bytes starting at
+The :c:func:`mmap()` function asks to map ``length`` bytes starting at
``offset`` in the memory of the device specified by ``fd`` into the
application address space, preferably at address ``start``. This latter
address is a hint only, and is usually specified as 0.
@@ -88,13 +86,12 @@ Suitable length and offset parameters are queried with the
:ref:`DMX_QUERYBUF` ioctl. Buffers must be allocated with the
:ref:`DMX_REQBUFS` ioctl before they can be queried.
-To unmap buffers the :ref:`munmap() <dmx-munmap>` function is used.
-
+To unmap buffers the :c:func:`munmap()` function is used.
Return Value
============
-On success :ref:`mmap() <dmx-mmap>` returns a pointer to the mapped buffer. On
+On success :c:func:`mmap()` returns a pointer to the mapped buffer. On
error ``MAP_FAILED`` (-1) is returned, and the ``errno`` variable is set
appropriately. Possible error codes are:
diff --git a/Documentation/userspace-api/media/dvb/dmx-munmap.rst b/Documentation/userspace-api/media/dvb/dmx-munmap.rst
index 308a9593919c..66bbc11e5c40 100644
--- a/Documentation/userspace-api/media/dvb/dmx-munmap.rst
+++ b/Documentation/userspace-api/media/dvb/dmx-munmap.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.dmx
.. _dmx-munmap:
@@ -13,7 +14,6 @@ dmx-munmap - Unmap device memory
.. warning:: This API is still experimental.
-
Synopsis
========
@@ -22,33 +22,29 @@ Synopsis
#include <unistd.h>
#include <sys/mman.h>
-
.. c:function:: int munmap( void *start, size_t length )
- :name: dmx-munmap
Arguments
=========
``start``
Address of the mapped buffer as returned by the
- :ref:`mmap() <dmx-mmap>` function.
+ :c:func:`mmap()` function.
``length``
Length of the mapped buffer. This must be the same value as given to
- :ref:`mmap() <dmx-mmap>`.
-
+ :c:func:`mmap()`.
Description
===========
-Unmaps a previously with the :ref:`mmap() <dmx-mmap>` function mapped
+Unmaps a previously with the :c:func:`mmap()` function mapped
buffer and frees it, if possible.
-
Return Value
============
-On success :ref:`munmap() <dmx-munmap>` returns 0, on failure -1 and the
+On success :c:func:`munmap()` returns 0, on failure -1 and the
``errno`` variable is set appropriately:
EINVAL
diff --git a/Documentation/userspace-api/media/dvb/dmx-qbuf.rst b/Documentation/userspace-api/media/dvb/dmx-qbuf.rst
index fcb1c55dd49a..17e70143c1b0 100644
--- a/Documentation/userspace-api/media/dvb/dmx-qbuf.rst
+++ b/Documentation/userspace-api/media/dvb/dmx-qbuf.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.dmx
.. _DMX_QBUF:
@@ -13,27 +14,26 @@ DMX_QBUF - DMX_DQBUF - Exchange a buffer with the driver
.. warning:: this API is still experimental
-
Synopsis
========
-.. c:function:: int ioctl( int fd, DMX_QBUF, struct dmx_buffer *argp )
- :name: DMX_QBUF
+.. c:macro:: DMX_QBUF
+
+``int ioctl(int fd, DMX_QBUF, struct dmx_buffer *argp)``
-.. c:function:: int ioctl( int fd, DMX_DQBUF, struct dmx_buffer *argp )
- :name: DMX_DQBUF
+.. c:macro:: DMX_DQBUF
+``int ioctl(int fd, DMX_DQBUF, struct dmx_buffer *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <dmx_fopen>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`dmx_buffer`.
-
Description
===========
@@ -60,13 +60,12 @@ the driver fills the remaining fields or returns an error code.
By default ``DMX_DQBUF`` blocks when no buffer is in the outgoing
queue. When the ``O_NONBLOCK`` flag was given to the
-:ref:`open() <dmx_fopen>` function, ``DMX_DQBUF`` returns
+:c:func:`open()` function, ``DMX_DQBUF`` returns
immediately with an ``EAGAIN`` error code when no buffer is available.
The struct :c:type:`dmx_buffer` structure is specified in
:ref:`buffer`.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/dvb/dmx-querybuf.rst b/Documentation/userspace-api/media/dvb/dmx-querybuf.rst
index df13e2b053c9..08ee9853d6b4 100644
--- a/Documentation/userspace-api/media/dvb/dmx-querybuf.rst
+++ b/Documentation/userspace-api/media/dvb/dmx-querybuf.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.dmx
.. _DMX_QUERYBUF:
@@ -13,24 +14,22 @@ DMX_QUERYBUF - Query the status of a buffer
.. warning:: this API is still experimental
-
Synopsis
========
-.. c:function:: int ioctl( int fd, DMX_QUERYBUF, struct dvb_buffer *argp )
- :name: DMX_QUERYBUF
+.. c:macro:: DMX_QUERYBUF
+``int ioctl(int fd, DMX_QUERYBUF, struct dvb_buffer *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <dmx_fopen>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`dvb_buffer`.
-
Description
===========
diff --git a/Documentation/userspace-api/media/dvb/dmx-remove-pid.rst b/Documentation/userspace-api/media/dvb/dmx-remove-pid.rst
index ce408d0d7c77..f75b33e5e49a 100644
--- a/Documentation/userspace-api/media/dvb/dmx-remove-pid.rst
+++ b/Documentation/userspace-api/media/dvb/dmx-remove-pid.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.dmx
.. _DMX_REMOVE_PID:
@@ -11,24 +12,22 @@ Name
DMX_REMOVE_PID
-
Synopsis
--------
-.. c:function:: int ioctl(fd, DMX_REMOVE_PID, __u16 *pid)
- :name: DMX_REMOVE_PID
+.. c:macro:: DMX_REMOVE_PID
+``int ioctl(fd, DMX_REMOVE_PID, __u16 *pid)``
Arguments
---------
``fd``
- File descriptor returned by :c:func:`open() <dvb-dmx-open>`.
+ File descriptor returned by :c:func:`open()`.
``pid``
PID of the PES filter to be removed.
-
Description
-----------
@@ -37,7 +36,6 @@ transport stream filter, e. g. a filter previously set up with output
equal to :c:type:`DMX_OUT_TSDEMUX_TAP <dmx_output>`, created via either
:ref:`DMX_SET_PES_FILTER` or :ref:`DMX_ADD_PID`.
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/dmx-reqbufs.rst b/Documentation/userspace-api/media/dvb/dmx-reqbufs.rst
index 433aed632f92..d2bb1909ec98 100644
--- a/Documentation/userspace-api/media/dvb/dmx-reqbufs.rst
+++ b/Documentation/userspace-api/media/dvb/dmx-reqbufs.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.dmx
.. _DMX_REQBUFS:
@@ -13,19 +14,18 @@ DMX_REQBUFS - Initiate Memory Mapping and/or DMA buffer I/O
.. warning:: this API is still experimental
-
Synopsis
========
-.. c:function:: int ioctl( int fd, DMX_REQBUFS, struct dmx_requestbuffers *argp )
- :name: DMX_REQBUFS
+.. c:macro:: DMX_REQBUFS
+``int ioctl(int fd, DMX_REQBUFS, struct dmx_requestbuffers *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <dmx_fopen>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`dmx_requestbuffers`.
@@ -64,7 +64,6 @@ buffers, however this cannot succeed when any buffers are still mapped.
A ``count`` value of zero frees all buffers, after aborting or finishing
any DMA in progress.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/dvb/dmx-set-buffer-size.rst b/Documentation/userspace-api/media/dvb/dmx-set-buffer-size.rst
index e803cbab220b..13ce4092c2d2 100644
--- a/Documentation/userspace-api/media/dvb/dmx-set-buffer-size.rst
+++ b/Documentation/userspace-api/media/dvb/dmx-set-buffer-size.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.dmx
.. _DMX_SET_BUFFER_SIZE:
@@ -11,19 +12,18 @@ Name
DMX_SET_BUFFER_SIZE
-
Synopsis
--------
-.. c:function:: int ioctl( int fd, DMX_SET_BUFFER_SIZE, unsigned long size)
- :name: DMX_SET_BUFFER_SIZE
+.. c:macro:: DMX_SET_BUFFER_SIZE
+``int ioctl(int fd, DMX_SET_BUFFER_SIZE, unsigned long size)``
Arguments
---------
``fd``
- File descriptor returned by :c:func:`open() <dvb-dmx-open>`.
+ File descriptor returned by :c:func:`open()`.
``size``
Unsigned long size
@@ -36,11 +36,9 @@ filtered data. The default size is two maximum sized sections, i.e. if
this function is not called a buffer size of ``2 * 4096`` bytes will be
used.
-
Return Value
------------
-
On success 0 is returned.
On error -1 is returned, and the ``errno`` variable is set
diff --git a/Documentation/userspace-api/media/dvb/dmx-set-filter.rst b/Documentation/userspace-api/media/dvb/dmx-set-filter.rst
index 4cd3db512b4e..f43455b7adae 100644
--- a/Documentation/userspace-api/media/dvb/dmx-set-filter.rst
+++ b/Documentation/userspace-api/media/dvb/dmx-set-filter.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.dmx
.. _DMX_SET_FILTER:
@@ -11,24 +12,23 @@ Name
DMX_SET_FILTER
-
Synopsis
--------
-.. c:function:: int ioctl( int fd, DMX_SET_FILTER, struct dmx_sct_filter_params *params)
- :name: DMX_SET_FILTER
+.. c:macro:: DMX_SET_FILTER
+
+``int ioctl(int fd, DMX_SET_FILTER, struct dmx_sct_filter_params *params)``
Arguments
---------
``fd``
- File descriptor returned by :c:func:`open() <dvb-dmx-open>`.
+ File descriptor returned by :c:func:`open()`.
``params``
Pointer to structure containing filter parameters.
-
Description
-----------
@@ -43,11 +43,9 @@ operation should be started immediately (without waiting for a
:ref:`DMX_START` ioctl call). If a filter was previously set-up, this
filter will be canceled, and the receive buffer will be flushed.
-
Return Value
------------
-
On success 0 is returned.
On error -1 is returned, and the ``errno`` variable is set
diff --git a/Documentation/userspace-api/media/dvb/dmx-set-pes-filter.rst b/Documentation/userspace-api/media/dvb/dmx-set-pes-filter.rst
index 8e54fd2636ed..5bb682e4a88f 100644
--- a/Documentation/userspace-api/media/dvb/dmx-set-pes-filter.rst
+++ b/Documentation/userspace-api/media/dvb/dmx-set-pes-filter.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.dmx
.. _DMX_SET_PES_FILTER:
@@ -11,25 +12,22 @@ Name
DMX_SET_PES_FILTER
-
Synopsis
--------
-.. c:function:: int ioctl( int fd, DMX_SET_PES_FILTER, struct dmx_pes_filter_params *params)
- :name: DMX_SET_PES_FILTER
+.. c:macro:: DMX_SET_PES_FILTER
+``int ioctl(int fd, DMX_SET_PES_FILTER, struct dmx_pes_filter_params *params)``
Arguments
---------
-
``fd``
- File descriptor returned by :c:func:`open() <dvb-dmx-open>`.
+ File descriptor returned by :c:func:`open()`.
``params``
Pointer to structure containing filter parameters.
-
Description
-----------
@@ -38,7 +36,6 @@ provided. By a PES filter is meant a filter that is based just on the
packet identifier (PID), i.e. no PES header or payload filtering
capability is supported.
-
Return Value
------------
@@ -54,7 +51,6 @@ appropriately.
:stub-columns: 0
:widths: 1 16
-
- .. row 1
- ``EBUSY``
@@ -64,6 +60,5 @@ appropriately.
Make sure that these filters are stopped before starting this
filter.
-
The generic error codes are described at the
:ref:`Generic Error Codes <gen-errors>` chapter.
diff --git a/Documentation/userspace-api/media/dvb/dmx-start.rst b/Documentation/userspace-api/media/dvb/dmx-start.rst
index 6f1413e13936..aedccf952a14 100644
--- a/Documentation/userspace-api/media/dvb/dmx-start.rst
+++ b/Documentation/userspace-api/media/dvb/dmx-start.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.dmx
.. _DMX_START:
@@ -11,19 +12,18 @@ Name
DMX_START
-
Synopsis
--------
-.. c:function:: int ioctl( int fd, DMX_START)
- :name: DMX_START
+.. c:macro:: DMX_START
+``int ioctl(int fd, DMX_START)``
Arguments
---------
``fd``
- File descriptor returned by :c:func:`open() <dvb-dmx-open>`.
+ File descriptor returned by :c:func:`open()`.
Description
-----------
@@ -31,7 +31,6 @@ Description
This ioctl call is used to start the actual filtering operation defined
via the ioctl calls :ref:`DMX_SET_FILTER` or :ref:`DMX_SET_PES_FILTER`.
-
Return Value
------------
@@ -46,7 +45,6 @@ appropriately.
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- ``EINVAL``
@@ -63,6 +61,5 @@ appropriately.
Make sure that these filters are stopped before starting this
filter.
-
The generic error codes are described at the
:ref:`Generic Error Codes <gen-errors>` chapter.
diff --git a/Documentation/userspace-api/media/dvb/dmx-stop.rst b/Documentation/userspace-api/media/dvb/dmx-stop.rst
index cbc3956fd71d..8661e6772104 100644
--- a/Documentation/userspace-api/media/dvb/dmx-stop.rst
+++ b/Documentation/userspace-api/media/dvb/dmx-stop.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.dmx
.. _DMX_STOP:
@@ -11,19 +12,18 @@ Name
DMX_STOP
-
Synopsis
--------
-.. c:function:: int ioctl( int fd, DMX_STOP)
- :name: DMX_STOP
+.. c:macro:: DMX_STOP
+``int ioctl(int fd, DMX_STOP)``
Arguments
---------
``fd``
- File descriptor returned by :c:func:`open() <dvb-dmx-open>`.
+ File descriptor returned by :c:func:`open()`.
Description
-----------
@@ -32,7 +32,6 @@ This ioctl call is used to stop the actual filtering operation defined
via the ioctl calls :ref:`DMX_SET_FILTER` or :ref:`DMX_SET_PES_FILTER` and
started via the :ref:`DMX_START` command.
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/fe-diseqc-recv-slave-reply.rst b/Documentation/userspace-api/media/dvb/fe-diseqc-recv-slave-reply.rst
index 115cced97e66..d9be817f0390 100644
--- a/Documentation/userspace-api/media/dvb/fe-diseqc-recv-slave-reply.rst
+++ b/Documentation/userspace-api/media/dvb/fe-diseqc-recv-slave-reply.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.fe
.. _FE_DISEQC_RECV_SLAVE_REPLY:
@@ -11,24 +12,22 @@ Name
FE_DISEQC_RECV_SLAVE_REPLY - Receives reply from a DiSEqC 2.0 command
-
Synopsis
========
-.. c:function:: int ioctl( int fd, FE_DISEQC_RECV_SLAVE_REPLY, struct dvb_diseqc_slave_reply *argp )
- :name: FE_DISEQC_RECV_SLAVE_REPLY
+.. c:macro:: FE_DISEQC_RECV_SLAVE_REPLY
+``int ioctl(int fd, FE_DISEQC_RECV_SLAVE_REPLY, struct dvb_diseqc_slave_reply *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <frontend_f_open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
pointer to struct :c:type:`dvb_diseqc_slave_reply`.
-
Description
===========
diff --git a/Documentation/userspace-api/media/dvb/fe-diseqc-reset-overload.rst b/Documentation/userspace-api/media/dvb/fe-diseqc-reset-overload.rst
index 5ffc34a6496e..d36f7d1157c6 100644
--- a/Documentation/userspace-api/media/dvb/fe-diseqc-reset-overload.rst
+++ b/Documentation/userspace-api/media/dvb/fe-diseqc-reset-overload.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.fe
.. _FE_DISEQC_RESET_OVERLOAD:
@@ -11,19 +12,18 @@ Name
FE_DISEQC_RESET_OVERLOAD - Restores the power to the antenna subsystem, if it was powered off due - to power overload.
-
Synopsis
========
-.. c:function:: int ioctl( int fd, FE_DISEQC_RESET_OVERLOAD, NULL )
- :name: FE_DISEQC_RESET_OVERLOAD
+.. c:macro:: FE_DISEQC_RESET_OVERLOAD
+``int ioctl(int fd, FE_DISEQC_RESET_OVERLOAD, NULL)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <frontend_f_open>`.
+ File descriptor returned by :c:func:`open()`.
Description
===========
@@ -33,7 +33,6 @@ this ioctl call restores the power to the bus. The call requires
read/write access to the device. This call has no effect if the device
is manually powered off. Not all Digital TV adapters support this ioctl.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/dvb/fe-diseqc-send-burst.rst b/Documentation/userspace-api/media/dvb/fe-diseqc-send-burst.rst
index fd59afe814f3..8fb73ee29951 100644
--- a/Documentation/userspace-api/media/dvb/fe-diseqc-send-burst.rst
+++ b/Documentation/userspace-api/media/dvb/fe-diseqc-send-burst.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.fe
.. _FE_DISEQC_SEND_BURST:
@@ -11,24 +12,22 @@ Name
FE_DISEQC_SEND_BURST - Sends a 22KHz tone burst for 2x1 mini DiSEqC satellite selection.
-
Synopsis
========
-.. c:function:: int ioctl( int fd, FE_DISEQC_SEND_BURST, enum fe_sec_mini_cmd tone )
- :name: FE_DISEQC_SEND_BURST
+.. c:macro:: FE_DISEQC_SEND_BURST
+``int ioctl(int fd, FE_DISEQC_SEND_BURST, enum fe_sec_mini_cmd tone)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <frontend_f_open>`.
+ File descriptor returned by :c:func:`open()`.
``tone``
An integer enumered value described at :c:type:`fe_sec_mini_cmd`.
-
Description
===========
@@ -39,7 +38,6 @@ read/write permissions.
It provides support for what's specified at
`Digital Satellite Equipment Control (DiSEqC) - Simple "ToneBurst" Detection Circuit specification. <http://www.eutelsat.com/files/contributed/satellites/pdf/Diseqc/associated%20docs/simple_tone_burst_detec.pdf>`__
-
Return Value
============
diff --git a/Documentation/userspace-api/media/dvb/fe-diseqc-send-master-cmd.rst b/Documentation/userspace-api/media/dvb/fe-diseqc-send-master-cmd.rst
index faa2a8364e10..c97029def2ee 100644
--- a/Documentation/userspace-api/media/dvb/fe-diseqc-send-master-cmd.rst
+++ b/Documentation/userspace-api/media/dvb/fe-diseqc-send-master-cmd.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.fe
.. _FE_DISEQC_SEND_MASTER_CMD:
@@ -11,25 +12,23 @@ Name
FE_DISEQC_SEND_MASTER_CMD - Sends a DiSEqC command
-
Synopsis
========
-.. c:function:: int ioctl( int fd, FE_DISEQC_SEND_MASTER_CMD, struct dvb_diseqc_master_cmd *argp )
- :name: FE_DISEQC_SEND_MASTER_CMD
+.. c:macro:: FE_DISEQC_SEND_MASTER_CMD
+``int ioctl(int fd, FE_DISEQC_SEND_MASTER_CMD, struct dvb_diseqc_master_cmd *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <frontend_f_open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
pointer to struct
:c:type:`dvb_diseqc_master_cmd`
-
Description
===========
diff --git a/Documentation/userspace-api/media/dvb/fe-dishnetwork-send-legacy-cmd.rst b/Documentation/userspace-api/media/dvb/fe-dishnetwork-send-legacy-cmd.rst
index 60d69bb6da71..d1dba74c55a9 100644
--- a/Documentation/userspace-api/media/dvb/fe-dishnetwork-send-legacy-cmd.rst
+++ b/Documentation/userspace-api/media/dvb/fe-dishnetwork-send-legacy-cmd.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.fe
.. _FE_DISHNETWORK_SEND_LEGACY_CMD:
@@ -11,24 +12,22 @@ Name
FE_DISHNETWORK_SEND_LEGACY_CMD
-
Synopsis
========
-.. c:function:: int ioctl(int fd, FE_DISHNETWORK_SEND_LEGACY_CMD, unsigned long cmd)
- :name: FE_DISHNETWORK_SEND_LEGACY_CMD
+.. c:macro:: FE_DISHNETWORK_SEND_LEGACY_CMD
+``int ioctl(int fd, FE_DISHNETWORK_SEND_LEGACY_CMD, unsigned long cmd)``
Arguments
=========
``fd``
- File descriptor returned by :c:func:`open() <dvb-fe-open>`.
+ File descriptor returned by :c:func:`open()`.
``cmd``
Sends the specified raw cmd to the dish via DISEqC.
-
Description
===========
@@ -42,7 +41,6 @@ frontend, for Dish Network legacy switches.
As support for this ioctl were added in 2004, this means that such
dishes were already legacy in 2004.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/dvb/fe-enable-high-lnb-voltage.rst b/Documentation/userspace-api/media/dvb/fe-enable-high-lnb-voltage.rst
index df0cc91384d9..40d7320f82f7 100644
--- a/Documentation/userspace-api/media/dvb/fe-enable-high-lnb-voltage.rst
+++ b/Documentation/userspace-api/media/dvb/fe-enable-high-lnb-voltage.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.fe
.. _FE_ENABLE_HIGH_LNB_VOLTAGE:
@@ -11,19 +12,18 @@ Name
FE_ENABLE_HIGH_LNB_VOLTAGE - Select output DC level between normal LNBf voltages or higher LNBf - voltages.
-
Synopsis
========
-.. c:function:: int ioctl( int fd, FE_ENABLE_HIGH_LNB_VOLTAGE, unsigned int high )
- :name: FE_ENABLE_HIGH_LNB_VOLTAGE
+.. c:macro:: FE_ENABLE_HIGH_LNB_VOLTAGE
+``int ioctl(int fd, FE_ENABLE_HIGH_LNB_VOLTAGE, unsigned int high)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <frontend_f_open>`.
+ File descriptor returned by :c:func:`open()`.
``high``
Valid flags:
@@ -33,7 +33,6 @@ Arguments
- >0 - enables slightly higher voltages instead of 13/18V, in order
to compensate for long antenna cables.
-
Description
===========
@@ -41,7 +40,6 @@ Select output DC level between normal LNBf voltages or higher LNBf
voltages between 0 (normal) or a value grater than 0 for higher
voltages.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/dvb/fe-get-event.rst b/Documentation/userspace-api/media/dvb/fe-get-event.rst
index 723bb3a4a630..f63029eca90e 100644
--- a/Documentation/userspace-api/media/dvb/fe-get-event.rst
+++ b/Documentation/userspace-api/media/dvb/fe-get-event.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.fe
.. _FE_GET_EVENT:
@@ -13,24 +14,22 @@ FE_GET_EVENT
.. attention:: This ioctl is deprecated.
-
Synopsis
========
-.. c:function:: int ioctl(int fd, FE_GET_EVENT, struct dvb_frontend_event *ev)
- :name: FE_GET_EVENT
+.. c:macro:: FE_GET_EVENT
+``int ioctl(int fd, FE_GET_EVENT, struct dvb_frontend_event *ev)``
Arguments
=========
``fd``
- File descriptor returned by :c:func:`open() <dvb-fe-open>`.
+ File descriptor returned by :c:func:`open()`.
``ev``
Points to the location where the event, if any, is to be stored.
-
Description
===========
@@ -40,7 +39,6 @@ or non-blocking mode. In the latter case, the call fails immediately
with errno set to ``EWOULDBLOCK``. In the former case, the call blocks until
an event becomes available.
-
Return Value
============
@@ -49,12 +47,10 @@ On success 0 is returned.
On error -1 is returned, and the ``errno`` variable is set
appropriately.
-
.. flat-table::
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- ``EWOULDBLOCK``
diff --git a/Documentation/userspace-api/media/dvb/fe-get-frontend.rst b/Documentation/userspace-api/media/dvb/fe-get-frontend.rst
index 2bfc1f1a3dc5..40700533e7e7 100644
--- a/Documentation/userspace-api/media/dvb/fe-get-frontend.rst
+++ b/Documentation/userspace-api/media/dvb/fe-get-frontend.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.fe
.. _FE_GET_FRONTEND:
@@ -13,32 +14,28 @@ FE_GET_FRONTEND
.. attention:: This ioctl is deprecated.
-
Synopsis
========
-.. c:function:: int ioctl(int fd, FE_GET_FRONTEND, struct dvb_frontend_parameters *p)
- :name: FE_GET_FRONTEND
+.. c:macro:: FE_GET_FRONTEND
+``int ioctl(int fd, FE_GET_FRONTEND, struct dvb_frontend_parameters *p)``
Arguments
=========
``fd``
- File descriptor returned by :c:func:`open() <dvb-fe-open>`.
-
+ File descriptor returned by :c:func:`open()`.
``p``
Points to parameters for tuning operation.
-
Description
===========
This ioctl call queries the currently effective frontend parameters. For
this command, read-only access to the device is sufficient.
-
Return Value
============
@@ -51,7 +48,6 @@ appropriately.
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- ``EINVAL``
diff --git a/Documentation/userspace-api/media/dvb/fe-get-info.rst b/Documentation/userspace-api/media/dvb/fe-get-info.rst
index eba115c03471..2e5f0209846f 100644
--- a/Documentation/userspace-api/media/dvb/fe-get-info.rst
+++ b/Documentation/userspace-api/media/dvb/fe-get-info.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.fe
.. _FE_GET_INFO:
@@ -12,24 +13,22 @@ Name
FE_GET_INFO - Query Digital TV frontend capabilities and returns information
about the - front-end. This call only requires read-only access to the device.
-
Synopsis
========
-.. c:function:: int ioctl( int fd, FE_GET_INFO, struct dvb_frontend_info *argp )
- :name: FE_GET_INFO
+.. c:macro:: FE_GET_INFO
+``int ioctl(int fd, FE_GET_INFO, struct dvb_frontend_info *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <frontend_f_open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
pointer to struct :c:type:`dvb_frontend_info`
-
Description
===========
@@ -40,7 +39,6 @@ takes a pointer to dvb_frontend_info which is filled by the driver.
When the driver is not compatible with this specification the ioctl
returns an error.
-
frontend capabilities
=====================
@@ -49,7 +47,6 @@ supported only on some specific frontend types.
The frontend capabilities are described at :c:type:`fe_caps`.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/dvb/fe-get-property.rst b/Documentation/userspace-api/media/dvb/fe-get-property.rst
index 10e1db172d8a..29363dc4a0c3 100644
--- a/Documentation/userspace-api/media/dvb/fe-get-property.rst
+++ b/Documentation/userspace-api/media/dvb/fe-get-property.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.fe
.. _FE_GET_PROPERTY:
@@ -11,27 +12,26 @@ Name
FE_SET_PROPERTY - FE_GET_PROPERTY - FE_SET_PROPERTY sets one or more frontend properties. - FE_GET_PROPERTY returns one or more frontend properties.
-
Synopsis
========
-.. c:function:: int ioctl( int fd, FE_GET_PROPERTY, struct dtv_properties *argp )
- :name: FE_GET_PROPERTY
+.. c:macro:: FE_GET_PROPERTY
+
+``int ioctl(int fd, FE_GET_PROPERTY, struct dtv_properties *argp)``
-.. c:function:: int ioctl( int fd, FE_SET_PROPERTY, struct dtv_properties *argp )
- :name: FE_SET_PROPERTY
+.. c:macro:: FE_SET_PROPERTY
+``int ioctl(int fd, FE_SET_PROPERTY, struct dtv_properties *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <frontend_f_open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`dtv_properties`.
-
Description
===========
@@ -63,7 +63,6 @@ depends on the delivery system and on the device:
- This call only requires read-only access to the device.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/dvb/fe-read-ber.rst b/Documentation/userspace-api/media/dvb/fe-read-ber.rst
index 2200eb1d06f9..f33f1dd20501 100644
--- a/Documentation/userspace-api/media/dvb/fe-read-ber.rst
+++ b/Documentation/userspace-api/media/dvb/fe-read-ber.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.fe
.. _FE_READ_BER:
@@ -16,20 +17,19 @@ FE_READ_BER
Synopsis
========
-.. c:function:: int ioctl(int fd, FE_READ_BER, uint32_t *ber)
- :name: FE_READ_BER
+.. c:macro:: FE_READ_BER
+``int ioctl(int fd, FE_READ_BER, uint32_t *ber)``
Arguments
=========
``fd``
- File descriptor returned by :c:func:`open() <dvb-fe-open>`.
+ File descriptor returned by :c:func:`open()`.
``ber``
The bit error rate is stored into \*ber.
-
Description
===========
@@ -37,7 +37,6 @@ This ioctl call returns the bit error rate for the signal currently
received/demodulated by the front-end. For this command, read-only
access to the device is sufficient.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/dvb/fe-read-signal-strength.rst b/Documentation/userspace-api/media/dvb/fe-read-signal-strength.rst
index 4832efad2a2e..2b7d06145cb1 100644
--- a/Documentation/userspace-api/media/dvb/fe-read-signal-strength.rst
+++ b/Documentation/userspace-api/media/dvb/fe-read-signal-strength.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.fe
.. _FE_READ_SIGNAL_STRENGTH:
@@ -16,20 +17,19 @@ FE_READ_SIGNAL_STRENGTH
Synopsis
========
-.. c:function:: int ioctl( int fd, FE_READ_SIGNAL_STRENGTH, uint16_t *strength)
- :name: FE_READ_SIGNAL_STRENGTH
+.. c:macro:: FE_READ_SIGNAL_STRENGTH
+``int ioctl(int fd, FE_READ_SIGNAL_STRENGTH, uint16_t *strength)``
Arguments
=========
``fd``
- File descriptor returned by :c:func:`open() <dvb-fe-open>`.
+ File descriptor returned by :c:func:`open()`.
``strength``
The signal strength value is stored into \*strength.
-
Description
===========
@@ -37,7 +37,6 @@ This ioctl call returns the signal strength value for the signal
currently received by the front-end. For this command, read-only access
to the device is sufficient.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/dvb/fe-read-snr.rst b/Documentation/userspace-api/media/dvb/fe-read-snr.rst
index 141e4fc661c2..e44e559ab7e8 100644
--- a/Documentation/userspace-api/media/dvb/fe-read-snr.rst
+++ b/Documentation/userspace-api/media/dvb/fe-read-snr.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.fe
.. _FE_READ_SNR:
@@ -16,20 +17,19 @@ FE_READ_SNR
Synopsis
========
-.. c:function:: int ioctl(int fd, FE_READ_SNR, int16_t *snr)
- :name: FE_READ_SNR
+.. c:macro:: FE_READ_SNR
+``int ioctl(int fd, FE_READ_SNR, int16_t *snr)``
Arguments
=========
``fd``
- File descriptor returned by :c:func:`open() <dvb-fe-open>`.
+ File descriptor returned by :c:func:`open()`.
``snr``
The signal-to-noise ratio is stored into \*snr.
-
Description
===========
@@ -37,7 +37,6 @@ This ioctl call returns the signal-to-noise ratio for the signal
currently received by the front-end. For this command, read-only access
to the device is sufficient.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/dvb/fe-read-status.rst b/Documentation/userspace-api/media/dvb/fe-read-status.rst
index ba61feb5e535..75c6ee60ac9c 100644
--- a/Documentation/userspace-api/media/dvb/fe-read-status.rst
+++ b/Documentation/userspace-api/media/dvb/fe-read-status.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.fe
.. _FE_READ_STATUS:
@@ -11,25 +12,23 @@ Name
FE_READ_STATUS - Returns status information about the front-end. This call only requires - read-only access to the device
-
Synopsis
========
-.. c:function:: int ioctl( int fd, FE_READ_STATUS, unsigned int *status )
- :name: FE_READ_STATUS
+.. c:macro:: FE_READ_STATUS
+``int ioctl(int fd, FE_READ_STATUS, unsigned int *status)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <frontend_f_open>`.
+ File descriptor returned by :c:func:`open()`.
``status``
pointer to a bitmask integer filled with the values defined by enum
:c:type:`fe_status`.
-
Description
===========
@@ -44,7 +43,6 @@ written.
varies according with the architecture. This needs to be fixed in the
future.
-
int fe_status
=============
@@ -52,7 +50,6 @@ The fe_status parameter is used to indicate the current state and/or
state changes of the frontend hardware. It is produced using the enum
:c:type:`fe_status` values on a bitmask
-
Return Value
============
diff --git a/Documentation/userspace-api/media/dvb/fe-read-uncorrected-blocks.rst b/Documentation/userspace-api/media/dvb/fe-read-uncorrected-blocks.rst
index bf9746f8a671..653cd99a66f5 100644
--- a/Documentation/userspace-api/media/dvb/fe-read-uncorrected-blocks.rst
+++ b/Documentation/userspace-api/media/dvb/fe-read-uncorrected-blocks.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.fe
.. _FE_READ_UNCORRECTED_BLOCKS:
@@ -16,20 +17,19 @@ FE_READ_UNCORRECTED_BLOCKS
Synopsis
========
-.. c:function:: int ioctl( int fd, FE_READ_UNCORRECTED_BLOCKS, uint32_t *ublocks)
- :name: FE_READ_UNCORRECTED_BLOCKS
+.. c:macro:: FE_READ_UNCORRECTED_BLOCKS
+``int ioctl(int fd, FE_READ_UNCORRECTED_BLOCKS, uint32_t *ublocks)``
Arguments
=========
``fd``
- File descriptor returned by :c:func:`open() <dvb-fe-open>`.
+ File descriptor returned by :c:func:`open()`.
``ublocks``
The total number of uncorrected blocks seen by the driver so far.
-
Description
===========
@@ -39,7 +39,6 @@ increment in block count during a specific time interval should be
calculated. For this command, read-only access to the device is
sufficient.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/dvb/fe-set-frontend-tune-mode.rst b/Documentation/userspace-api/media/dvb/fe-set-frontend-tune-mode.rst
index f0e178e3a180..56923c1a66b0 100644
--- a/Documentation/userspace-api/media/dvb/fe-set-frontend-tune-mode.rst
+++ b/Documentation/userspace-api/media/dvb/fe-set-frontend-tune-mode.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.fe
.. _FE_SET_FRONTEND_TUNE_MODE:
@@ -11,19 +12,18 @@ Name
FE_SET_FRONTEND_TUNE_MODE - Allow setting tuner mode flags to the frontend.
-
Synopsis
========
-.. c:function:: int ioctl( int fd, FE_SET_FRONTEND_TUNE_MODE, unsigned int flags )
- :name: FE_SET_FRONTEND_TUNE_MODE
+.. c:macro:: FE_SET_FRONTEND_TUNE_MODE
+``int ioctl(int fd, FE_SET_FRONTEND_TUNE_MODE, unsigned int flags)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <frontend_f_open>`.
+ File descriptor returned by :c:func:`open()`.
``flags``
Valid flags:
@@ -37,14 +37,12 @@ Arguments
is closed, this flag will be automatically turned off when the
device is reopened read-write.
-
Description
===========
Allow setting tuner mode flags to the frontend, between 0 (normal) or
``FE_TUNE_MODE_ONESHOT`` mode
-
Return Value
============
diff --git a/Documentation/userspace-api/media/dvb/fe-set-frontend.rst b/Documentation/userspace-api/media/dvb/fe-set-frontend.rst
index 2b169778e0d6..d1b857632059 100644
--- a/Documentation/userspace-api/media/dvb/fe-set-frontend.rst
+++ b/Documentation/userspace-api/media/dvb/fe-set-frontend.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.fe
.. _FE_SET_FRONTEND:
@@ -13,24 +14,22 @@ Name
FE_SET_FRONTEND
-
Synopsis
========
-.. c:function:: int ioctl(int fd, FE_SET_FRONTEND, struct dvb_frontend_parameters *p)
- :name: FE_SET_FRONTEND
+.. c:macro:: FE_SET_FRONTEND
+``int ioctl(int fd, FE_SET_FRONTEND, struct dvb_frontend_parameters *p)``
Arguments
=========
``fd``
- File descriptor returned by :c:func:`open() <dvb-fe-open>`.
+ File descriptor returned by :c:func:`open()`.
``p``
Points to parameters for tuning operation.
-
Description
===========
@@ -44,7 +43,6 @@ operation is initiated before the previous one was completed, the
previous operation will be aborted in favor of the new one. This command
requires read/write access to the device.
-
Return Value
============
@@ -66,6 +64,5 @@ appropriately.
- Maximum supported symbol rate reached.
-
Generic error codes are described at the
:ref:`Generic Error Codes <gen-errors>` chapter.
diff --git a/Documentation/userspace-api/media/dvb/fe-set-tone.rst b/Documentation/userspace-api/media/dvb/fe-set-tone.rst
index 944d54488e71..9f44bf946183 100644
--- a/Documentation/userspace-api/media/dvb/fe-set-tone.rst
+++ b/Documentation/userspace-api/media/dvb/fe-set-tone.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.fe
.. _FE_SET_TONE:
@@ -11,24 +12,22 @@ Name
FE_SET_TONE - Sets/resets the generation of the continuous 22kHz tone.
-
Synopsis
========
-.. c:function:: int ioctl( int fd, FE_SET_TONE, enum fe_sec_tone_mode tone )
- :name: FE_SET_TONE
+.. c:macro:: FE_SET_TONE
+``int ioctl(int fd, FE_SET_TONE, enum fe_sec_tone_mode tone)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <frontend_f_open>`.
+ File descriptor returned by :c:func:`open()`.
``tone``
an integer enumered value described at :c:type:`fe_sec_tone_mode`
-
Description
===========
@@ -45,7 +44,6 @@ this is done using the DiSEqC ioctls.
capability of selecting the band. So, it is recommended that applications
would change to SEC_TONE_OFF when the device is not used.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/dvb/fe-set-voltage.rst b/Documentation/userspace-api/media/dvb/fe-set-voltage.rst
index 73740be0e7dc..c66771830be1 100644
--- a/Documentation/userspace-api/media/dvb/fe-set-voltage.rst
+++ b/Documentation/userspace-api/media/dvb/fe-set-voltage.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.fe
.. _FE_SET_VOLTAGE:
@@ -11,24 +12,22 @@ Name
FE_SET_VOLTAGE - Allow setting the DC level sent to the antenna subsystem.
-
Synopsis
========
-.. c:function:: int ioctl( int fd, FE_SET_VOLTAGE, enum fe_sec_voltage voltage )
- :name: FE_SET_VOLTAGE
+.. c:macro:: FE_SET_VOLTAGE
+``int ioctl(int fd, FE_SET_VOLTAGE, enum fe_sec_voltage voltage)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <frontend_f_open>`.
+ File descriptor returned by :c:func:`open()`.
``voltage``
an integer enumered value described at :c:type:`fe_sec_voltage`
-
Description
===========
@@ -49,7 +48,6 @@ power up the LNBf.
the voltage to SEC_VOLTAGE_OFF while the device is not is used is
recommended.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/dvb/frontend_f_close.rst b/Documentation/userspace-api/media/dvb/frontend_f_close.rst
index 96e15b4ee76d..52c323a85014 100644
--- a/Documentation/userspace-api/media/dvb/frontend_f_close.rst
+++ b/Documentation/userspace-api/media/dvb/frontend_f_close.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.fe
.. _frontend_f_close:
@@ -11,7 +12,6 @@ Name
fe-close - Close a frontend device
-
Synopsis
========
@@ -19,16 +19,13 @@ Synopsis
#include <unistd.h>
-
.. c:function:: int close( int fd )
- :name: dvb-fe-close
Arguments
=========
``fd``
- File descriptor returned by :c:func:`open() <dvb-fe-open>`.
-
+ File descriptor returned by :c:func:`open()`.
Description
===========
@@ -37,7 +34,6 @@ This system call closes a previously opened front-end device. After
closing a front-end device, its corresponding hardware might be powered
down automatically.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/dvb/frontend_f_open.rst b/Documentation/userspace-api/media/dvb/frontend_f_open.rst
index 49a01dd58d03..bb37eded0870 100644
--- a/Documentation/userspace-api/media/dvb/frontend_f_open.rst
+++ b/Documentation/userspace-api/media/dvb/frontend_f_open.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.fe
.. _frontend_f_open:
@@ -11,7 +12,6 @@ Name
fe-open - Open a frontend device
-
Synopsis
========
@@ -19,9 +19,7 @@ Synopsis
#include <fcntl.h>
-
.. c:function:: int open( const char *device_name, int flags )
- :name: dvb-fe-open
Arguments
=========
@@ -44,7 +42,6 @@ Arguments
Other flags have no effect.
-
Description
===========
@@ -70,16 +67,14 @@ the specified mode. This implies that the corresponding hardware is
powered up, and that other front-ends may have been powered down to make
that possible.
-
Return Value
============
-On success :ref:`open() <frontend_f_open>` returns the new file descriptor.
+On success :c:func:`open()` returns the new file descriptor.
On error, -1 is returned, and the ``errno`` variable is set appropriately.
Possible error codes are:
-
On success 0 is returned, and :c:type:`ca_slot_info` is filled.
On error -1 is returned, and the ``errno`` variable is set
@@ -105,6 +100,5 @@ appropriately.
- The limit on the total number of files open on the system has been
reached.
-
The generic error codes are described at the
:ref:`Generic Error Codes <gen-errors>` chapter.
diff --git a/Documentation/userspace-api/media/dvb/net-add-if.rst b/Documentation/userspace-api/media/dvb/net-add-if.rst
index 0859830b645e..022b4c626249 100644
--- a/Documentation/userspace-api/media/dvb/net-add-if.rst
+++ b/Documentation/userspace-api/media/dvb/net-add-if.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.net
.. _NET_ADD_IF:
@@ -11,24 +12,22 @@ Name
NET_ADD_IF - Creates a new network interface for a given Packet ID.
-
Synopsis
========
-.. c:function:: int ioctl( int fd, NET_ADD_IF, struct dvb_net_if *net_if )
- :name: NET_ADD_IF
+.. c:macro:: NET_ADD_IF
+``int ioctl(int fd, NET_ADD_IF, struct dvb_net_if *net_if)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <frontend_f_open>`.
+ File descriptor returned by :c:func:`open()`.
``net_if``
pointer to struct :c:type:`dvb_net_if`
-
Description
===========
diff --git a/Documentation/userspace-api/media/dvb/net-get-if.rst b/Documentation/userspace-api/media/dvb/net-get-if.rst
index d8c9f939d62c..e99696c9db74 100644
--- a/Documentation/userspace-api/media/dvb/net-get-if.rst
+++ b/Documentation/userspace-api/media/dvb/net-get-if.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.net
.. _NET_GET_IF:
@@ -11,24 +12,22 @@ Name
NET_GET_IF - Read the configuration data of an interface created via - :ref:`NET_ADD_IF <net>`.
-
Synopsis
========
-.. c:function:: int ioctl( int fd, NET_GET_IF, struct dvb_net_if *net_if )
- :name: NET_GET_IF
+.. c:macro:: NET_GET_IF
+``int ioctl(int fd, NET_GET_IF, struct dvb_net_if *net_if)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <frontend_f_open>`.
+ File descriptor returned by :c:func:`open()`.
``net_if``
pointer to struct :c:type:`dvb_net_if`
-
Description
===========
@@ -39,7 +38,6 @@ encapsulation type used on such interface. If the interface was not
created yet with :ref:`NET_ADD_IF <net>`, it will return -1 and fill
the ``errno`` with ``EINVAL`` error code.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/dvb/net-remove-if.rst b/Documentation/userspace-api/media/dvb/net-remove-if.rst
index ecbcacbaf9f7..ac88691c0423 100644
--- a/Documentation/userspace-api/media/dvb/net-remove-if.rst
+++ b/Documentation/userspace-api/media/dvb/net-remove-if.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.net
.. _NET_REMOVE_IF:
@@ -11,31 +12,28 @@ Name
NET_REMOVE_IF - Removes a network interface.
-
Synopsis
========
-.. c:function:: int ioctl( int fd, NET_REMOVE_IF, int ifnum )
- :name: NET_REMOVE_IF
+.. c:macro:: NET_REMOVE_IF
+``int ioctl(int fd, NET_REMOVE_IF, int ifnum)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <frontend_f_open>`.
+ File descriptor returned by :c:func:`open()`.
``net_if``
number of the interface to be removed
-
Description
===========
The NET_REMOVE_IF ioctl deletes an interface previously created via
:ref:`NET_ADD_IF <net>`.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/dvb/video-clear-buffer.rst b/Documentation/userspace-api/media/dvb/video-clear-buffer.rst
index fa1f2f49bdac..a7730559bbb2 100644
--- a/Documentation/userspace-api/media/dvb/video-clear-buffer.rst
+++ b/Documentation/userspace-api/media/dvb/video-clear-buffer.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.video
.. _VIDEO_CLEAR_BUFFER:
@@ -16,9 +17,9 @@ VIDEO_CLEAR_BUFFER
Synopsis
--------
-.. c:function:: int ioctl(fd, VIDEO_CLEAR_BUFFER)
- :name: VIDEO_CLEAR_BUFFER
+.. c:macro:: VIDEO_CLEAR_BUFFER
+``int ioctl(fd, VIDEO_CLEAR_BUFFER)``
Arguments
---------
@@ -27,7 +28,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- int fd
@@ -40,14 +40,12 @@ Arguments
- Equals VIDEO_CLEAR_BUFFER for this command.
-
Description
-----------
This ioctl call clears all video buffers in the driver and in the
decoder hardware.
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/video-command.rst b/Documentation/userspace-api/media/dvb/video-command.rst
index ef0da85d5f92..cae9445eb3af 100644
--- a/Documentation/userspace-api/media/dvb/video-command.rst
+++ b/Documentation/userspace-api/media/dvb/video-command.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.video
.. _VIDEO_COMMAND:
@@ -16,9 +17,9 @@ VIDEO_COMMAND
Synopsis
--------
-.. c:function:: int ioctl(int fd, VIDEO_COMMAND, struct video_command *cmd)
- :name: VIDEO_COMMAND
+.. c:macro:: VIDEO_COMMAND
+``int ioctl(int fd, VIDEO_COMMAND, struct video_command *cmd)``
Arguments
---------
@@ -27,7 +28,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- int fd
@@ -46,7 +46,6 @@ Arguments
- Commands the decoder.
-
Description
-----------
@@ -59,7 +58,7 @@ subset of the ``v4l2_decoder_cmd`` struct, so refer to the
:ref:`VIDIOC_DECODER_CMD` documentation for
more information.
-.. c:type:: struct video_command
+.. c:type:: video_command
.. code-block:: c
@@ -89,7 +88,6 @@ more information.
};
};
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/video-continue.rst b/Documentation/userspace-api/media/dvb/video-continue.rst
index 9a767b50b23b..bc34bf3989e4 100644
--- a/Documentation/userspace-api/media/dvb/video-continue.rst
+++ b/Documentation/userspace-api/media/dvb/video-continue.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.video
.. _VIDEO_CONTINUE:
@@ -16,9 +17,9 @@ VIDEO_CONTINUE
Synopsis
--------
-.. c:function:: int ioctl(fd, VIDEO_CONTINUE)
- :name: VIDEO_CONTINUE
+.. c:macro:: VIDEO_CONTINUE
+``int ioctl(fd, VIDEO_CONTINUE)``
Arguments
---------
@@ -27,7 +28,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- int fd
@@ -40,7 +40,6 @@ Arguments
- Equals VIDEO_CONTINUE for this command.
-
Description
-----------
@@ -50,7 +49,6 @@ V4L2 :ref:`VIDIOC_DECODER_CMD` instead.
This ioctl call restarts decoding and playing processes of the video
stream which was played before a call to VIDEO_FREEZE was made.
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/video-fast-forward.rst b/Documentation/userspace-api/media/dvb/video-fast-forward.rst
index c43a13c7ae75..e71fa8d6965b 100644
--- a/Documentation/userspace-api/media/dvb/video-fast-forward.rst
+++ b/Documentation/userspace-api/media/dvb/video-fast-forward.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.video
.. _VIDEO_FAST_FORWARD:
@@ -16,9 +17,9 @@ VIDEO_FAST_FORWARD
Synopsis
--------
-.. c:function:: int ioctl(fd, VIDEO_FAST_FORWARD, int nFrames)
- :name: VIDEO_FAST_FORWARD
+.. c:macro:: VIDEO_FAST_FORWARD
+``int ioctl(fd, VIDEO_FAST_FORWARD, int nFrames)``
Arguments
---------
@@ -27,7 +28,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- int fd
@@ -46,7 +46,6 @@ Arguments
- The number of frames to skip.
-
Description
-----------
@@ -54,7 +53,6 @@ This ioctl call asks the Video Device to skip decoding of N number of
I-frames. This call can only be used if VIDEO_SOURCE_MEMORY is
selected.
-
Return Value
------------
@@ -63,12 +61,10 @@ appropriately. The generic error codes are described at the
:ref:`Generic Error Codes <gen-errors>` chapter.
-
.. flat-table::
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- ``EPERM``
diff --git a/Documentation/userspace-api/media/dvb/video-fclose.rst b/Documentation/userspace-api/media/dvb/video-fclose.rst
index 27ccb2d6f961..01d24d548439 100644
--- a/Documentation/userspace-api/media/dvb/video-fclose.rst
+++ b/Documentation/userspace-api/media/dvb/video-fclose.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.video
.. _video_fclose:
@@ -18,7 +19,6 @@ Synopsis
.. c:function:: int close(int fd)
-
Arguments
---------
@@ -26,20 +26,17 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- int fd
- File descriptor returned by a previous call to open().
-
Description
-----------
This system call closes a previously opened video device.
-
Return Value
------------
@@ -47,7 +44,6 @@ Return Value
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- ``EBADF``
diff --git a/Documentation/userspace-api/media/dvb/video-fopen.rst b/Documentation/userspace-api/media/dvb/video-fopen.rst
index aa1dc6020fa7..1371b083e4e8 100644
--- a/Documentation/userspace-api/media/dvb/video-fopen.rst
+++ b/Documentation/userspace-api/media/dvb/video-fopen.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.video
.. _video_fopen:
@@ -18,7 +19,6 @@ Synopsis
.. c:function:: int open(const char *deviceName, int flags)
-
Arguments
---------
@@ -26,7 +26,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- const char \*deviceName
@@ -59,7 +58,6 @@ Arguments
-
- (blocking mode is the default)
-
Description
-----------
@@ -79,7 +77,6 @@ returned. If the Video Device is opened in O_RDONLY mode, the only
ioctl call that can be used is VIDEO_GET_STATUS. All other call will
return an error code.
-
Return Value
------------
@@ -89,7 +86,6 @@ Return Value
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- ``ENODEV``
diff --git a/Documentation/userspace-api/media/dvb/video-freeze.rst b/Documentation/userspace-api/media/dvb/video-freeze.rst
index 93e0ae8e79d8..4321f257cb70 100644
--- a/Documentation/userspace-api/media/dvb/video-freeze.rst
+++ b/Documentation/userspace-api/media/dvb/video-freeze.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.video
.. _VIDEO_FREEZE:
@@ -16,9 +17,9 @@ VIDEO_FREEZE
Synopsis
--------
-.. c:function:: int ioctl(fd, VIDEO_FREEZE)
- :name: VIDEO_FREEZE
+.. c:macro:: VIDEO_FREEZE
+``int ioctl(fd, VIDEO_FREEZE)``
Arguments
---------
@@ -27,7 +28,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- int fd
@@ -40,7 +40,6 @@ Arguments
- Equals VIDEO_FREEZE for this command.
-
Description
-----------
@@ -54,7 +53,6 @@ If VIDEO_SOURCE_MEMORY is selected in the ioctl call
VIDEO_SELECT_SOURCE, the Digital TV subsystem will not decode any more data
until the ioctl call VIDEO_CONTINUE or VIDEO_PLAY is performed.
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/video-fwrite.rst b/Documentation/userspace-api/media/dvb/video-fwrite.rst
index 5ccdf78f11e1..a07fd7d7a40e 100644
--- a/Documentation/userspace-api/media/dvb/video-fwrite.rst
+++ b/Documentation/userspace-api/media/dvb/video-fwrite.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.video
.. _video_fwrite:
@@ -18,7 +19,6 @@ Synopsis
.. c:function:: size_t write(int fd, const void *buf, size_t count)
-
Arguments
---------
@@ -26,7 +26,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- int fd
@@ -45,7 +44,6 @@ Arguments
- Size of buf.
-
Description
-----------
@@ -55,7 +53,6 @@ PES format, unless the capability allows other formats. If O_NONBLOCK
is not specified the function will block until buffer space is
available. The amount of data to be transferred is implied by count.
-
Return Value
------------
@@ -63,7 +60,6 @@ Return Value
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- ``EPERM``
diff --git a/Documentation/userspace-api/media/dvb/video-get-capabilities.rst b/Documentation/userspace-api/media/dvb/video-get-capabilities.rst
index 619f78a66b6c..01e09f56656c 100644
--- a/Documentation/userspace-api/media/dvb/video-get-capabilities.rst
+++ b/Documentation/userspace-api/media/dvb/video-get-capabilities.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.video
.. _VIDEO_GET_CAPABILITIES:
@@ -16,9 +17,9 @@ VIDEO_GET_CAPABILITIES
Synopsis
--------
-.. c:function:: int ioctl(fd, VIDEO_GET_CAPABILITIES, unsigned int *cap)
- :name: VIDEO_GET_CAPABILITIES
+.. c:macro:: VIDEO_GET_CAPABILITIES
+``int ioctl(fd, VIDEO_GET_CAPABILITIES, unsigned int *cap)``
Arguments
---------
@@ -27,7 +28,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- int fd
@@ -46,7 +46,6 @@ Arguments
- Pointer to a location where to store the capability information.
-
Description
-----------
@@ -54,7 +53,6 @@ This ioctl call asks the video device about its decoding capabilities.
On success it returns and integer which has bits set according to the
defines in section ??.
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/video-get-event.rst b/Documentation/userspace-api/media/dvb/video-get-event.rst
index 29566a245fcd..90382bc36cfe 100644
--- a/Documentation/userspace-api/media/dvb/video-get-event.rst
+++ b/Documentation/userspace-api/media/dvb/video-get-event.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.video
.. _VIDEO_GET_EVENT:
@@ -16,9 +17,9 @@ VIDEO_GET_EVENT
Synopsis
--------
-.. c:function:: int ioctl(fd, VIDEO_GET_EVENT, struct video_event *ev)
- :name: VIDEO_GET_EVENT
+.. c:macro:: VIDEO_GET_EVENT
+``int ioctl(fd, VIDEO_GET_EVENT, struct video_event *ev)``
Arguments
---------
@@ -27,7 +28,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- int fd
@@ -46,7 +46,6 @@ Arguments
- Points to the location where the event, if any, is to be stored.
-
Description
-----------
@@ -93,7 +92,6 @@ appropriately. The generic error codes are described at the
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- ``EWOULDBLOCK``
diff --git a/Documentation/userspace-api/media/dvb/video-get-frame-count.rst b/Documentation/userspace-api/media/dvb/video-get-frame-count.rst
index 5f65f8dba184..b48ac8c58a41 100644
--- a/Documentation/userspace-api/media/dvb/video-get-frame-count.rst
+++ b/Documentation/userspace-api/media/dvb/video-get-frame-count.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.video
.. _VIDEO_GET_FRAME_COUNT:
@@ -16,9 +17,9 @@ VIDEO_GET_FRAME_COUNT
Synopsis
--------
-.. c:function:: int ioctl(int fd, VIDEO_GET_FRAME_COUNT, __u64 *pts)
- :name: VIDEO_GET_FRAME_COUNT
+.. c:macro:: VIDEO_GET_FRAME_COUNT
+``int ioctl(int fd, VIDEO_GET_FRAME_COUNT, __u64 *pts)``
Arguments
---------
@@ -27,7 +28,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- int fd
@@ -47,7 +47,6 @@ Arguments
- Returns the number of frames displayed since the decoder was
started.
-
Description
-----------
@@ -58,7 +57,6 @@ control.
This ioctl call asks the Video Device to return the number of displayed
frames since the decoder was started.
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/video-get-pts.rst b/Documentation/userspace-api/media/dvb/video-get-pts.rst
index 28655a1a9249..fedaff41be0b 100644
--- a/Documentation/userspace-api/media/dvb/video-get-pts.rst
+++ b/Documentation/userspace-api/media/dvb/video-get-pts.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.video
.. _VIDEO_GET_PTS:
@@ -16,9 +17,9 @@ VIDEO_GET_PTS
Synopsis
--------
-.. c:function:: int ioctl(int fd, VIDEO_GET_PTS, __u64 *pts)
- :name: VIDEO_GET_PTS
+.. c:macro:: VIDEO_GET_PTS
+``int ioctl(int fd, VIDEO_GET_PTS, __u64 *pts)``
Arguments
---------
@@ -27,7 +28,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- int fd
@@ -51,7 +51,6 @@ Arguments
but may also be a value close to it like the PTS of the last
decoded frame or the last PTS extracted by the PES parser.
-
Description
-----------
@@ -62,7 +61,6 @@ control.
This ioctl call asks the Video Device to return the current PTS
timestamp.
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/video-get-size.rst b/Documentation/userspace-api/media/dvb/video-get-size.rst
index a199afbfc1b1..de34331c5bd1 100644
--- a/Documentation/userspace-api/media/dvb/video-get-size.rst
+++ b/Documentation/userspace-api/media/dvb/video-get-size.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.video
.. _VIDEO_GET_SIZE:
@@ -16,9 +17,9 @@ VIDEO_GET_SIZE
Synopsis
--------
-.. c:function:: int ioctl(int fd, VIDEO_GET_SIZE, video_size_t *size)
- :name: VIDEO_GET_SIZE
+.. c:macro:: VIDEO_GET_SIZE
+``int ioctl(int fd, VIDEO_GET_SIZE, video_size_t *size)``
Arguments
---------
@@ -27,7 +28,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- int fd
@@ -46,7 +46,6 @@ Arguments
- Returns the size and aspect ratio.
-
Description
-----------
@@ -62,7 +61,6 @@ This ioctl returns the size and aspect ratio.
video_format_t aspect_ratio;
} video_size_t;
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/video-get-status.rst b/Documentation/userspace-api/media/dvb/video-get-status.rst
index 3f29dac18a21..9b86fbf411d4 100644
--- a/Documentation/userspace-api/media/dvb/video-get-status.rst
+++ b/Documentation/userspace-api/media/dvb/video-get-status.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.video
.. _VIDEO_GET_STATUS:
@@ -16,9 +17,9 @@ VIDEO_GET_STATUS
Synopsis
--------
-.. c:function:: int ioctl(fd, VIDEO_GET_STATUS, struct video_status *status)
- :name: VIDEO_GET_STATUS
+.. c:macro:: VIDEO_GET_STATUS
+``int ioctl(fd, VIDEO_GET_STATUS, struct video_status *status)``
Arguments
---------
@@ -27,7 +28,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- int fd
@@ -46,7 +46,6 @@ Arguments
- Returns the current status of the Video Device.
-
Description
-----------
diff --git a/Documentation/userspace-api/media/dvb/video-play.rst b/Documentation/userspace-api/media/dvb/video-play.rst
index 71db54d840cb..35ac8b98fdbf 100644
--- a/Documentation/userspace-api/media/dvb/video-play.rst
+++ b/Documentation/userspace-api/media/dvb/video-play.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.video
.. _VIDEO_PLAY:
@@ -16,9 +17,9 @@ VIDEO_PLAY
Synopsis
--------
-.. c:function:: int ioctl(fd, VIDEO_PLAY)
- :name: VIDEO_PLAY
+.. c:macro:: VIDEO_PLAY
+``int ioctl(fd, VIDEO_PLAY)``
Arguments
---------
@@ -27,7 +28,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- int fd
@@ -40,7 +40,6 @@ Arguments
- Equals VIDEO_PLAY for this command.
-
Description
-----------
@@ -50,7 +49,6 @@ V4L2 :ref:`VIDIOC_DECODER_CMD` instead.
This ioctl call asks the Video Device to start playing a video stream
from the selected source.
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/video-select-source.rst b/Documentation/userspace-api/media/dvb/video-select-source.rst
index 2e4ee53fa155..929a20985d53 100644
--- a/Documentation/userspace-api/media/dvb/video-select-source.rst
+++ b/Documentation/userspace-api/media/dvb/video-select-source.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.video
.. _VIDEO_SELECT_SOURCE:
@@ -16,9 +17,9 @@ VIDEO_SELECT_SOURCE
Synopsis
--------
-.. c:function:: int ioctl(fd, VIDEO_SELECT_SOURCE, video_stream_source_t source)
- :name: VIDEO_SELECT_SOURCE
+.. c:macro:: VIDEO_SELECT_SOURCE
+``int ioctl(fd, VIDEO_SELECT_SOURCE, video_stream_source_t source)``
Arguments
---------
@@ -27,7 +28,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- int fd
@@ -46,7 +46,6 @@ Arguments
- Indicates which source shall be used for the Video stream.
-
Description
-----------
diff --git a/Documentation/userspace-api/media/dvb/video-set-blank.rst b/Documentation/userspace-api/media/dvb/video-set-blank.rst
index 5454fe7905bd..70249a6ba125 100644
--- a/Documentation/userspace-api/media/dvb/video-set-blank.rst
+++ b/Documentation/userspace-api/media/dvb/video-set-blank.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.video
.. _VIDEO_SET_BLANK:
@@ -16,9 +17,9 @@ VIDEO_SET_BLANK
Synopsis
--------
-.. c:function:: int ioctl(fd, VIDEO_SET_BLANK, boolean mode)
- :name: VIDEO_SET_BLANK
+.. c:macro:: VIDEO_SET_BLANK
+``int ioctl(fd, VIDEO_SET_BLANK, boolean mode)``
Arguments
---------
@@ -27,7 +28,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- int fd
@@ -51,13 +51,11 @@ Arguments
-
- FALSE: Show last decoded frame.
-
Description
-----------
This ioctl call asks the Video Device to blank out the picture.
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/video-set-display-format.rst b/Documentation/userspace-api/media/dvb/video-set-display-format.rst
index ada6113e2f2d..1de4f40ae732 100644
--- a/Documentation/userspace-api/media/dvb/video-set-display-format.rst
+++ b/Documentation/userspace-api/media/dvb/video-set-display-format.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.video
.. _VIDEO_SET_DISPLAY_FORMAT:
@@ -16,9 +17,9 @@ VIDEO_SET_DISPLAY_FORMAT
Synopsis
--------
-.. c:function:: int ioctl(fd, VIDEO_SET_DISPLAY_FORMAT)
- :name: VIDEO_SET_DISPLAY_FORMAT
+.. c:macro:: VIDEO_SET_DISPLAY_FORMAT
+``int ioctl(fd, VIDEO_SET_DISPLAY_FORMAT)``
Arguments
---------
@@ -27,7 +28,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- int fd
@@ -46,14 +46,12 @@ Arguments
- Selects the video format to be used.
-
Description
-----------
This ioctl call asks the Video Device to select the video format to be
applied by the MPEG chip on the video.
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/video-set-format.rst b/Documentation/userspace-api/media/dvb/video-set-format.rst
index 758a5d1642ab..bb64e37ae081 100644
--- a/Documentation/userspace-api/media/dvb/video-set-format.rst
+++ b/Documentation/userspace-api/media/dvb/video-set-format.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.video
.. _VIDEO_SET_FORMAT:
@@ -16,9 +17,9 @@ VIDEO_SET_FORMAT
Synopsis
--------
-.. c:function:: int ioctl(fd, VIDEO_SET_FORMAT, video_format_t format)
- :name: VIDEO_SET_FORMAT
+.. c:macro:: VIDEO_SET_FORMAT
+``int ioctl(fd, VIDEO_SET_FORMAT, video_format_t format)``
Arguments
---------
@@ -27,7 +28,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- int fd
@@ -46,7 +46,6 @@ Arguments
- video format of TV as defined in section ??.
-
Description
-----------
@@ -72,12 +71,10 @@ appropriately. The generic error codes are described at the
:ref:`Generic Error Codes <gen-errors>` chapter.
-
.. flat-table::
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- ``EINVAL``
diff --git a/Documentation/userspace-api/media/dvb/video-set-streamtype.rst b/Documentation/userspace-api/media/dvb/video-set-streamtype.rst
index f3a99858b1db..1f31c048bdbc 100644
--- a/Documentation/userspace-api/media/dvb/video-set-streamtype.rst
+++ b/Documentation/userspace-api/media/dvb/video-set-streamtype.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.video
.. _VIDEO_SET_STREAMTYPE:
@@ -16,9 +17,9 @@ VIDEO_SET_STREAMTYPE
Synopsis
--------
-.. c:function:: int ioctl(fd, VIDEO_SET_STREAMTYPE, int type)
- :name: VIDEO_SET_STREAMTYPE
+.. c:macro:: VIDEO_SET_STREAMTYPE
+``int ioctl(fd, VIDEO_SET_STREAMTYPE, int type)``
Arguments
---------
@@ -27,7 +28,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- int fd
@@ -46,7 +46,6 @@ Arguments
- stream type
-
Description
-----------
@@ -54,7 +53,6 @@ This ioctl tells the driver which kind of stream to expect being written
to it. If this call is not used the default of video PES is used. Some
drivers might not support this call and always expect PES.
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/video-slowmotion.rst b/Documentation/userspace-api/media/dvb/video-slowmotion.rst
index 2ccb84d6a68b..1478fcc30cb8 100644
--- a/Documentation/userspace-api/media/dvb/video-slowmotion.rst
+++ b/Documentation/userspace-api/media/dvb/video-slowmotion.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.video
.. _VIDEO_SLOWMOTION:
@@ -16,9 +17,9 @@ VIDEO_SLOWMOTION
Synopsis
--------
-.. c:function:: int ioctl(fd, VIDEO_SLOWMOTION, int nFrames)
- :name: VIDEO_SLOWMOTION
+.. c:macro:: VIDEO_SLOWMOTION
+``int ioctl(fd, VIDEO_SLOWMOTION, int nFrames)``
Arguments
---------
@@ -27,7 +28,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- int fd
@@ -46,7 +46,6 @@ Arguments
- The number of times to repeat each frame.
-
Description
-----------
@@ -54,7 +53,6 @@ This ioctl call asks the video device to repeat decoding frames N number
of times. This call can only be used if VIDEO_SOURCE_MEMORY is
selected.
-
Return Value
------------
@@ -63,12 +61,10 @@ appropriately. The generic error codes are described at the
:ref:`Generic Error Codes <gen-errors>` chapter.
-
.. flat-table::
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- ``EPERM``
diff --git a/Documentation/userspace-api/media/dvb/video-stillpicture.rst b/Documentation/userspace-api/media/dvb/video-stillpicture.rst
index a04f9f3ed162..d25384222a20 100644
--- a/Documentation/userspace-api/media/dvb/video-stillpicture.rst
+++ b/Documentation/userspace-api/media/dvb/video-stillpicture.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.video
.. _VIDEO_STILLPICTURE:
@@ -16,9 +17,9 @@ VIDEO_STILLPICTURE
Synopsis
--------
-.. c:function:: int ioctl(fd, VIDEO_STILLPICTURE, struct video_still_picture *sp)
- :name: VIDEO_STILLPICTURE
+.. c:macro:: VIDEO_STILLPICTURE
+``int ioctl(fd, VIDEO_STILLPICTURE, struct video_still_picture *sp)``
Arguments
---------
@@ -27,7 +28,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- int fd
@@ -46,7 +46,6 @@ Arguments
- Pointer to a location where an I-frame and size is stored.
-
Description
-----------
@@ -54,7 +53,6 @@ This ioctl call asks the Video Device to display a still picture
(I-frame). The input data shall contain an I-frame. If the pointer is
NULL, then the current displayed still picture is blanked.
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/video-stop.rst b/Documentation/userspace-api/media/dvb/video-stop.rst
index 9318655dce23..96f61c5b48a2 100644
--- a/Documentation/userspace-api/media/dvb/video-stop.rst
+++ b/Documentation/userspace-api/media/dvb/video-stop.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.video
.. _VIDEO_STOP:
@@ -16,9 +17,9 @@ VIDEO_STOP
Synopsis
--------
-.. c:function:: int ioctl(fd, VIDEO_STOP, boolean mode)
- :name: VIDEO_STOP
+.. c:macro:: VIDEO_STOP
+``int ioctl(fd, VIDEO_STOP, boolean mode)``
Arguments
---------
@@ -27,7 +28,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- int fd
@@ -56,7 +56,6 @@ Arguments
-
- FALSE: Show last decoded frame.
-
Description
-----------
@@ -67,7 +66,6 @@ This ioctl call asks the Video Device to stop playing the current
stream. Depending on the input parameter, the screen can be blanked out
or displaying the last decoded frame.
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/dvb/video-try-command.rst b/Documentation/userspace-api/media/dvb/video-try-command.rst
index 430c36035329..79bf3dfb8a32 100644
--- a/Documentation/userspace-api/media/dvb/video-try-command.rst
+++ b/Documentation/userspace-api/media/dvb/video-try-command.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: DTV.video
.. _VIDEO_TRY_COMMAND:
@@ -16,9 +17,9 @@ VIDEO_TRY_COMMAND
Synopsis
--------
-.. c:function:: int ioctl(int fd, VIDEO_TRY_COMMAND, struct video_command *cmd)
- :name: VIDEO_TRY_COMMAND
+.. c:macro:: VIDEO_TRY_COMMAND
+``int ioctl(int fd, VIDEO_TRY_COMMAND, struct video_command *cmd)``
Arguments
---------
@@ -27,7 +28,6 @@ Arguments
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- int fd
@@ -46,7 +46,6 @@ Arguments
- Try a decoder command.
-
Description
-----------
@@ -59,7 +58,6 @@ subset of the ``v4l2_decoder_cmd`` struct, so refer to the
:ref:`VIDIOC_TRY_DECODER_CMD <VIDIOC_DECODER_CMD>` documentation
for more information.
-
Return Value
------------
diff --git a/Documentation/userspace-api/media/mediactl/media-func-close.rst b/Documentation/userspace-api/media/mediactl/media-func-close.rst
index ec571b34ce69..8ac2443e76c1 100644
--- a/Documentation/userspace-api/media/mediactl/media-func-close.rst
+++ b/Documentation/userspace-api/media/mediactl/media-func-close.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: MC
.. _media-func-close:
@@ -11,7 +12,6 @@ Name
media-close - Close a media device
-
Synopsis
========
@@ -19,16 +19,13 @@ Synopsis
#include <unistd.h>
-
.. c:function:: int close( int fd )
- :name: mc-close
Arguments
=========
``fd``
- File descriptor returned by :c:func:`open() <mc-open>`.
-
+ File descriptor returned by :c:func:`open()`.
Description
===========
@@ -36,11 +33,10 @@ Description
Closes the media device. Resources associated with the file descriptor
are freed. The device configuration remain unchanged.
-
Return Value
============
-:ref:`close() <media-func-close>` returns 0 on success. On error, -1 is returned, and
+:c:func:`close()` returns 0 on success. On error, -1 is returned, and
``errno`` is set appropriately. Possible error codes are:
EBADF
diff --git a/Documentation/userspace-api/media/mediactl/media-func-ioctl.rst b/Documentation/userspace-api/media/mediactl/media-func-ioctl.rst
index 35ed549bec0e..9e9a838f4795 100644
--- a/Documentation/userspace-api/media/mediactl/media-func-ioctl.rst
+++ b/Documentation/userspace-api/media/mediactl/media-func-ioctl.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: MC
.. _media-func-ioctl:
@@ -11,7 +12,6 @@ Name
media-ioctl - Control a media device
-
Synopsis
========
@@ -19,15 +19,13 @@ Synopsis
#include <sys/ioctl.h>
-
-.. c:function:: int ioctl( int fd, int request, void *argp )
- :name: mc-ioctl
+``int ioctl(int fd, int request, void *argp)``
Arguments
=========
``fd``
- File descriptor returned by :c:func:`open() <mc-open>`.
+ File descriptor returned by :c:func:`open()`.
``request``
Media ioctl request code as defined in the media.h header file, for
@@ -36,7 +34,6 @@ Arguments
``argp``
Pointer to a request-specific structure.
-
Description
===========
@@ -52,7 +49,6 @@ their parameters are located in the media.h header file. All media ioctl
requests, their respective function and parameters are specified in
:ref:`media-user-func`.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/mediactl/media-func-open.rst b/Documentation/userspace-api/media/mediactl/media-func-open.rst
index 2c2595157ea3..24487cb0a308 100644
--- a/Documentation/userspace-api/media/mediactl/media-func-open.rst
+++ b/Documentation/userspace-api/media/mediactl/media-func-open.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: MC
.. _media-func-open:
@@ -11,7 +12,6 @@ Name
media-open - Open a media device
-
Synopsis
========
@@ -19,9 +19,7 @@ Synopsis
#include <fcntl.h>
-
.. c:function:: int open( const char *device_name, int flags )
- :name: mc-open
Arguments
=========
@@ -33,11 +31,10 @@ Arguments
Open flags. Access mode must be either ``O_RDONLY`` or ``O_RDWR``.
Other flags have no effect.
-
Description
===========
-To open a media device applications call :ref:`open() <media-func-open>` with the
+To open a media device applications call :c:func:`open()` with the
desired device name. The function has no side effects; the device
configuration remain unchanged.
@@ -45,11 +42,10 @@ When the device is opened in read-only mode, attempts to modify its
configuration will result in an error, and ``errno`` will be set to
EBADF.
-
Return Value
============
-:ref:`open() <func-open>` returns the new file descriptor on success. On error,
+:c:func:`open()` returns the new file descriptor on success. On error,
-1 is returned, and ``errno`` is set appropriately. Possible error codes
are:
diff --git a/Documentation/userspace-api/media/mediactl/media-ioc-device-info.rst b/Documentation/userspace-api/media/mediactl/media-ioc-device-info.rst
index cde1ddfc0bfb..0c4c5d2cfcb2 100644
--- a/Documentation/userspace-api/media/mediactl/media-ioc-device-info.rst
+++ b/Documentation/userspace-api/media/mediactl/media-ioc-device-info.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: MC
.. _media_ioc_device_info:
@@ -11,24 +12,22 @@ Name
MEDIA_IOC_DEVICE_INFO - Query device information
-
Synopsis
========
-.. c:function:: int ioctl( int fd, MEDIA_IOC_DEVICE_INFO, struct media_device_info *argp )
- :name: MEDIA_IOC_DEVICE_INFO
+.. c:macro:: MEDIA_IOC_DEVICE_INFO
+``int ioctl(int fd, MEDIA_IOC_DEVICE_INFO, struct media_device_info *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <media-func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`media_device_info`.
-
Description
===========
@@ -38,7 +37,6 @@ a struct :c:type:`media_device_info`. The driver
fills the structure and returns the information to the application. The
ioctl never fails.
-
.. c:type:: media_device_info
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
@@ -48,7 +46,6 @@ ioctl never fails.
:stub-columns: 0
:widths: 1 1 2
-
* - char
- ``driver``\ [16]
- Name of the driver implementing the media API as a NUL-terminated
@@ -94,7 +91,6 @@ ioctl never fails.
- Reserved for future extensions. Drivers and applications must set
this array to zero.
-
The ``serial`` and ``bus_info`` fields can be used to distinguish
between multiple instances of otherwise identical hardware. The serial
number takes precedence when provided and can be assumed to be unique.
@@ -102,7 +98,6 @@ If the serial number is an empty string, the ``bus_info`` field can be
used instead. The ``bus_info`` field is guaranteed to be unique, but can
vary across reboots or device unplug/replug.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/mediactl/media-ioc-enum-entities.rst b/Documentation/userspace-api/media/mediactl/media-ioc-enum-entities.rst
index 93e35f198f47..92dd8ecd538c 100644
--- a/Documentation/userspace-api/media/mediactl/media-ioc-enum-entities.rst
+++ b/Documentation/userspace-api/media/mediactl/media-ioc-enum-entities.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: MC
.. _media_ioc_enum_entities:
@@ -11,24 +12,22 @@ Name
MEDIA_IOC_ENUM_ENTITIES - Enumerate entities and their properties
-
Synopsis
========
-.. c:function:: int ioctl( int fd, MEDIA_IOC_ENUM_ENTITIES, struct media_entity_desc *argp )
- :name: MEDIA_IOC_ENUM_ENTITIES
+.. c:macro:: MEDIA_IOC_ENUM_ENTITIES
+``int ioctl(int fd, MEDIA_IOC_ENUM_ENTITIES, struct media_entity_desc *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <media-func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`media_entity_desc`.
-
Description
===========
@@ -49,7 +48,6 @@ Entity IDs can be non-contiguous. Applications must *not* try to
enumerate entities by calling MEDIA_IOC_ENUM_ENTITIES with increasing
id's until they get an error.
-
.. c:type:: media_entity_desc
.. tabularcolumns:: |p{1.5cm}|p{1.7cm}|p{1.6cm}|p{1.5cm}|p{11.2cm}|
@@ -136,7 +134,6 @@ id's until they get an error.
* - }
-
-
Return Value
============
diff --git a/Documentation/userspace-api/media/mediactl/media-ioc-enum-links.rst b/Documentation/userspace-api/media/mediactl/media-ioc-enum-links.rst
index f3e94c7b5dc3..3bc98a6a2ec5 100644
--- a/Documentation/userspace-api/media/mediactl/media-ioc-enum-links.rst
+++ b/Documentation/userspace-api/media/mediactl/media-ioc-enum-links.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: MC
.. _media_ioc_enum_links:
@@ -11,24 +12,22 @@ Name
MEDIA_IOC_ENUM_LINKS - Enumerate all pads and links for a given entity
-
Synopsis
========
-.. c:function:: int ioctl( int fd, MEDIA_IOC_ENUM_LINKS, struct media_links_enum *argp )
- :name: MEDIA_IOC_ENUM_LINKS
+.. c:macro:: MEDIA_IOC_ENUM_LINKS
+``int ioctl(int fd, MEDIA_IOC_ENUM_LINKS, struct media_links_enum *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <media-func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`media_links_enum`.
-
Description
===========
@@ -53,7 +52,6 @@ outbound links can be retrieved with :ref:`MEDIA_IOC_ENUM_ENTITIES`.
Only forward links that originate at one of the entity's source pads are
returned during the enumeration process.
-
.. c:type:: media_links_enum
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
@@ -82,7 +80,6 @@ returned during the enumeration process.
- Reserved for future extensions. Drivers and applications must set
the array to zero.
-
.. c:type:: media_pad_desc
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
@@ -110,7 +107,6 @@ returned during the enumeration process.
the array to zero.
-
.. c:type:: media_link_desc
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
@@ -137,7 +133,6 @@ returned during the enumeration process.
- Reserved for future extensions. Drivers and applications must set
the array to zero.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/mediactl/media-ioc-g-topology.rst b/Documentation/userspace-api/media/mediactl/media-ioc-g-topology.rst
index 9b7d2296b7fd..8f8b3b586edd 100644
--- a/Documentation/userspace-api/media/mediactl/media-ioc-g-topology.rst
+++ b/Documentation/userspace-api/media/mediactl/media-ioc-g-topology.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: MC
.. _media_ioc_g_topology:
@@ -11,24 +12,22 @@ Name
MEDIA_IOC_G_TOPOLOGY - Enumerate the graph topology and graph element properties
-
Synopsis
========
-.. c:function:: int ioctl( int fd, MEDIA_IOC_G_TOPOLOGY, struct media_v2_topology *argp )
- :name: MEDIA_IOC_G_TOPOLOGY
+.. c:macro:: MEDIA_IOC_G_TOPOLOGY
+``int ioctl(int fd, MEDIA_IOC_G_TOPOLOGY, struct media_v2_topology *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <media-func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`media_v2_topology`.
-
Description
===========
@@ -120,7 +119,6 @@ desired arrays with the media graph elements.
converted to a 64-bits integer. It can be zero. if zero, the ioctl
won't store the links. It will just update ``num_links``
-
.. tabularcolumns:: |p{1.6cm}|p{3.2cm}|p{12.7cm}|
.. c:type:: media_v2_entity
@@ -158,7 +156,6 @@ desired arrays with the media graph elements.
- Reserved for future extensions. Drivers and applications must set
this array to zero.
-
.. tabularcolumns:: |p{1.6cm}|p{3.2cm}|p{12.7cm}|
.. c:type:: media_v2_interface
@@ -192,7 +189,6 @@ desired arrays with the media graph elements.
- Used only for device node interfaces. See
:c:type:`media_v2_intf_devnode` for details.
-
.. tabularcolumns:: |p{1.6cm}|p{3.2cm}|p{12.7cm}|
.. c:type:: media_v2_intf_devnode
@@ -245,7 +241,6 @@ desired arrays with the media graph elements.
- Reserved for future extensions. Drivers and applications must set
this array to zero.
-
.. tabularcolumns:: |p{1.6cm}|p{3.2cm}|p{12.7cm}|
.. c:type:: media_v2_link
@@ -282,7 +277,6 @@ desired arrays with the media graph elements.
- Reserved for future extensions. Drivers and applications must set
this array to zero.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/mediactl/media-ioc-request-alloc.rst b/Documentation/userspace-api/media/mediactl/media-ioc-request-alloc.rst
index ea05ff0c5382..9195b4b8bf20 100644
--- a/Documentation/userspace-api/media/mediactl/media-ioc-request-alloc.rst
+++ b/Documentation/userspace-api/media/mediactl/media-ioc-request-alloc.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GPL-2.0 OR GFDL-1.1-no-invariants-or-later
+.. c:namespace:: MC
.. _media_ioc_request_alloc:
@@ -11,24 +12,22 @@ Name
MEDIA_IOC_REQUEST_ALLOC - Allocate a request
-
Synopsis
========
-.. c:function:: int ioctl( int fd, MEDIA_IOC_REQUEST_ALLOC, int *argp )
- :name: MEDIA_IOC_REQUEST_ALLOC
+.. c:macro:: MEDIA_IOC_REQUEST_ALLOC
+``int ioctl(int fd, MEDIA_IOC_REQUEST_ALLOC, int *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <media-func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to an integer.
-
Description
===========
@@ -51,7 +50,7 @@ Finally, the file descriptor can be :ref:`polled <request-func-poll>` to wait
for the request to complete.
The request will remain allocated until all the file descriptors associated
-with it are closed by :ref:`close() <request-func-close>` and the driver no
+with it are closed by :c:func:`close()` and the driver no
longer uses the request internally. See also
:ref:`here <media-request-life-time>` for more information.
diff --git a/Documentation/userspace-api/media/mediactl/media-ioc-setup-link.rst b/Documentation/userspace-api/media/mediactl/media-ioc-setup-link.rst
index e2aa51015783..23208300cb61 100644
--- a/Documentation/userspace-api/media/mediactl/media-ioc-setup-link.rst
+++ b/Documentation/userspace-api/media/mediactl/media-ioc-setup-link.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: MC
.. _media_ioc_setup_link:
@@ -11,24 +12,22 @@ Name
MEDIA_IOC_SETUP_LINK - Modify the properties of a link
-
Synopsis
========
-.. c:function:: int ioctl( int fd, MEDIA_IOC_SETUP_LINK, struct media_link_desc *argp )
- :name: MEDIA_IOC_SETUP_LINK
+.. c:macro:: MEDIA_IOC_SETUP_LINK
+``int ioctl(int fd, MEDIA_IOC_SETUP_LINK, struct media_link_desc *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <media-func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`media_link_desc`.
-
Description
===========
@@ -53,7 +52,6 @@ non-dynamic link will return an ``EBUSY`` error code.
If the specified link can't be found the driver returns with an ``EINVAL``
error code.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/mediactl/media-request-ioc-queue.rst b/Documentation/userspace-api/media/mediactl/media-request-ioc-queue.rst
index ca1b33196242..04b33db2bb45 100644
--- a/Documentation/userspace-api/media/mediactl/media-request-ioc-queue.rst
+++ b/Documentation/userspace-api/media/mediactl/media-request-ioc-queue.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GPL-2.0 OR GFDL-1.1-no-invariants-or-later
+.. c:namespace:: MC
.. _media_request_ioc_queue:
@@ -11,13 +12,12 @@ Name
MEDIA_REQUEST_IOC_QUEUE - Queue a request
-
Synopsis
========
-.. c:function:: int ioctl( int request_fd, MEDIA_REQUEST_IOC_QUEUE )
- :name: MEDIA_REQUEST_IOC_QUEUE
+.. c:macro:: MEDIA_REQUEST_IOC_QUEUE
+``int ioctl(int request_fd, MEDIA_REQUEST_IOC_QUEUE)``
Arguments
=========
@@ -25,7 +25,6 @@ Arguments
``request_fd``
File descriptor returned by :ref:`MEDIA_IOC_REQUEST_ALLOC`.
-
Description
===========
diff --git a/Documentation/userspace-api/media/mediactl/media-request-ioc-reinit.rst b/Documentation/userspace-api/media/mediactl/media-request-ioc-reinit.rst
index cfd503bdef70..57567b87b985 100644
--- a/Documentation/userspace-api/media/mediactl/media-request-ioc-reinit.rst
+++ b/Documentation/userspace-api/media/mediactl/media-request-ioc-reinit.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GPL-2.0 OR GFDL-1.1-no-invariants-or-later
+.. c:namespace:: MC
.. _media_request_ioc_reinit:
@@ -11,13 +12,12 @@ Name
MEDIA_REQUEST_IOC_REINIT - Re-initialize a request
-
Synopsis
========
-.. c:function:: int ioctl( int request_fd, MEDIA_REQUEST_IOC_REINIT )
- :name: MEDIA_REQUEST_IOC_REINIT
+.. c:macro:: MEDIA_REQUEST_IOC_REINIT
+``int ioctl(int request_fd, MEDIA_REQUEST_IOC_REINIT)``
Arguments
=========
@@ -33,7 +33,7 @@ this request ioctl can be used to re-initialize a previously allocated
request.
Re-initializing a request will clear any existing data from the request.
-This avoids having to :ref:`close() <request-func-close>` a completed
+This avoids having to :c:func:`close()` a completed
request and allocate a new request. Instead the completed request can just
be re-initialized and it is ready to be used again.
diff --git a/Documentation/userspace-api/media/mediactl/request-api.rst b/Documentation/userspace-api/media/mediactl/request-api.rst
index c0fa4dbb2b28..6c4cbd9f08a5 100644
--- a/Documentation/userspace-api/media/mediactl/request-api.rst
+++ b/Documentation/userspace-api/media/mediactl/request-api.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GPL-2.0 OR GFDL-1.1-no-invariants-or-later
+.. c:namespace:: MC
.. _media-request-api:
@@ -93,7 +94,7 @@ regardless of whether a request is in use or not.
Setting the same control through a request and also directly can lead to
undefined behavior!
-User-space can :ref:`poll() <request-func-poll>` a request file descriptor in
+User-space can :c:func:`poll()` a request file descriptor in
order to wait until the request completes. A request is considered complete
once all its associated buffers are available for dequeuing and all the
associated controls have been updated with the values at the time of completion.
@@ -115,7 +116,7 @@ Recycling and Destruction
-------------------------
Finally, a completed request can either be discarded or be reused. Calling
-:ref:`close() <request-func-close>` on a request file descriptor will make
+:c:func:`close()` on a request file descriptor will make
that file descriptor unusable and the request will be freed once it is no
longer in use by the kernel. That is, if the request is queued and then the
file descriptor is closed, then it won't be freed until the driver completed
diff --git a/Documentation/userspace-api/media/mediactl/request-func-close.rst b/Documentation/userspace-api/media/mediactl/request-func-close.rst
index 04e00bb9defd..f4b8eb385ad7 100644
--- a/Documentation/userspace-api/media/mediactl/request-func-close.rst
+++ b/Documentation/userspace-api/media/mediactl/request-func-close.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GPL-2.0 OR GFDL-1.1-no-invariants-or-later
+.. c:namespace:: MC.request
.. _request-func-close:
@@ -11,7 +12,6 @@ Name
request-close - Close a request file descriptor
-
Synopsis
========
@@ -19,9 +19,7 @@ Synopsis
#include <unistd.h>
-
.. c:function:: int close( int fd )
- :name: req-close
Arguments
=========
@@ -29,7 +27,6 @@ Arguments
``fd``
File descriptor returned by :ref:`MEDIA_IOC_REQUEST_ALLOC`.
-
Description
===========
@@ -38,11 +35,10 @@ are freed once all file descriptors associated with the request are closed
and the driver has completed the request.
See :ref:`here <media-request-life-time>` for more information.
-
Return Value
============
-:ref:`close() <request-func-close>` returns 0 on success. On error, -1 is
+:c:func:`close()` returns 0 on success. On error, -1 is
returned, and ``errno`` is set appropriately. Possible error codes are:
EBADF
diff --git a/Documentation/userspace-api/media/mediactl/request-func-ioctl.rst b/Documentation/userspace-api/media/mediactl/request-func-ioctl.rst
index 1e1c5edb860c..4fb3d2ef32d1 100644
--- a/Documentation/userspace-api/media/mediactl/request-func-ioctl.rst
+++ b/Documentation/userspace-api/media/mediactl/request-func-ioctl.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GPL-2.0 OR GFDL-1.1-no-invariants-or-later
+.. c:namespace:: MC
.. _request-func-ioctl:
@@ -11,7 +12,6 @@ Name
request-ioctl - Control a request file descriptor
-
Synopsis
========
@@ -19,9 +19,7 @@ Synopsis
#include <sys/ioctl.h>
-
-.. c:function:: int ioctl( int fd, int cmd, void *argp )
- :name: req-ioctl
+``int ioctl(int fd, int cmd, void *argp)``
Arguments
=========
@@ -36,7 +34,6 @@ Arguments
``argp``
Pointer to a request-specific structure.
-
Description
===========
@@ -52,7 +49,6 @@ their parameters are located in the media.h header file. All request ioctl
commands, their respective function and parameters are specified in
:ref:`media-user-func`.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/mediactl/request-func-poll.rst b/Documentation/userspace-api/media/mediactl/request-func-poll.rst
index 92947213d3d5..ce0043dbe7da 100644
--- a/Documentation/userspace-api/media/mediactl/request-func-poll.rst
+++ b/Documentation/userspace-api/media/mediactl/request-func-poll.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GPL-2.0 OR GFDL-1.1-no-invariants-or-later
+.. c:namespace:: MC
.. _request-func-poll:
@@ -11,7 +12,6 @@ Name
request-poll - Wait for some event on a file descriptor
-
Synopsis
========
@@ -19,9 +19,7 @@ Synopsis
#include <sys/poll.h>
-
.. c:function:: int poll( struct pollfd *ufds, unsigned int nfds, int timeout )
- :name: request-poll
Arguments
=========
@@ -35,14 +33,13 @@ Arguments
``timeout``
Timeout to wait for events
-
Description
===========
-With the :c:func:`poll() <request-func-poll>` function applications can wait
+With the :c:func:`poll()` function applications can wait
for a request to complete.
-On success :c:func:`poll() <request-func-poll>` returns the number of file
+On success :c:func:`poll()` returns the number of file
descriptors that have been selected (that is, file descriptors for which the
``revents`` field of the respective struct :c:type:`pollfd`
is non-zero). Request file descriptor set the ``POLLPRI`` flag in ``revents``
@@ -53,11 +50,10 @@ set appropriately.
Attempting to poll for a request that is not yet queued will
set the ``POLLERR`` flag in ``revents``.
-
Return Value
============
-On success, :c:func:`poll() <request-func-poll>` returns the number of
+On success, :c:func:`poll()` returns the number of
structures which have non-zero ``revents`` fields, or zero if the call
timed out. On error -1 is returned, and the ``errno`` variable is set
appropriately:
diff --git a/Documentation/userspace-api/media/rc/lirc-get-features.rst b/Documentation/userspace-api/media/rc/lirc-get-features.rst
index 6846ae99848c..66a243dbd437 100644
--- a/Documentation/userspace-api/media/rc/lirc-get-features.rst
+++ b/Documentation/userspace-api/media/rc/lirc-get-features.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: RC
.. _lirc_get_features:
@@ -14,8 +15,9 @@ LIRC_GET_FEATURES - Get the underlying hardware device's features
Synopsis
========
-.. c:function:: int ioctl( int fd, LIRC_GET_FEATURES, __u32 *features)
- :name: LIRC_GET_FEATURES
+.. c:macro:: LIRC_GET_FEATURES
+
+``int ioctl(int fd, LIRC_GET_FEATURES, __u32 *features)``
Arguments
=========
@@ -26,11 +28,9 @@ Arguments
``features``
Bitmask with the LIRC features.
-
Description
===========
-
Get the underlying hardware device's features. If a driver does not
announce support of certain features, calling of the corresponding ioctls
is undefined.
@@ -184,7 +184,6 @@ LIRC features
Unused. Kept just to avoid breaking uAPI.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/rc/lirc-get-rec-mode.rst b/Documentation/userspace-api/media/rc/lirc-get-rec-mode.rst
index e8f397a87331..188478ed1233 100644
--- a/Documentation/userspace-api/media/rc/lirc-get-rec-mode.rst
+++ b/Documentation/userspace-api/media/rc/lirc-get-rec-mode.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: RC
.. _lirc_get_rec_mode:
.. _lirc_set_rec_mode:
@@ -15,11 +16,13 @@ LIRC_GET_REC_MODE/LIRC_SET_REC_MODE - Get/set current receive mode.
Synopsis
========
-.. c:function:: int ioctl( int fd, LIRC_GET_REC_MODE, __u32 *mode)
- :name: LIRC_GET_REC_MODE
+.. c:macro:: LIRC_GET_REC_MODE
-.. c:function:: int ioctl( int fd, LIRC_SET_REC_MODE, __u32 *mode)
- :name: LIRC_SET_REC_MODE
+``int ioctl(int fd, LIRC_GET_REC_MODE, __u32 *mode)``
+
+.. c:macro:: LIRC_SET_REC_MODE
+
+``int ioctl(int fd, LIRC_SET_REC_MODE, __u32 *mode)``
Arguments
=========
@@ -47,7 +50,6 @@ Return Value
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- ``ENODEV``
diff --git a/Documentation/userspace-api/media/rc/lirc-get-rec-resolution.rst b/Documentation/userspace-api/media/rc/lirc-get-rec-resolution.rst
index 3f08aa7c24a9..e29445c5ce16 100644
--- a/Documentation/userspace-api/media/rc/lirc-get-rec-resolution.rst
+++ b/Documentation/userspace-api/media/rc/lirc-get-rec-resolution.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: RC
.. _lirc_get_rec_resolution:
@@ -14,8 +15,9 @@ LIRC_GET_REC_RESOLUTION - Obtain the value of receive resolution, in microsecond
Synopsis
========
-.. c:function:: int ioctl( int fd, LIRC_GET_REC_RESOLUTION, __u32 *microseconds)
- :name: LIRC_GET_REC_RESOLUTION
+.. c:macro:: LIRC_GET_REC_RESOLUTION
+
+``int ioctl(int fd, LIRC_GET_REC_RESOLUTION, __u32 *microseconds)``
Arguments
=========
@@ -26,7 +28,6 @@ Arguments
``microseconds``
Resolution, in microseconds.
-
Description
===========
@@ -38,7 +39,6 @@ This ioctl returns the integer value with such resolution, with can be
used by userspace applications like lircd to automatically adjust the
tolerance value.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/rc/lirc-get-send-mode.rst b/Documentation/userspace-api/media/rc/lirc-get-send-mode.rst
index f93b30c92193..77472fb5608a 100644
--- a/Documentation/userspace-api/media/rc/lirc-get-send-mode.rst
+++ b/Documentation/userspace-api/media/rc/lirc-get-send-mode.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: RC
.. _lirc_get_send_mode:
.. _lirc_set_send_mode:
@@ -15,11 +16,13 @@ LIRC_GET_SEND_MODE/LIRC_SET_SEND_MODE - Get/set current transmit mode.
Synopsis
========
-.. c:function:: int ioctl( int fd, LIRC_GET_SEND_MODE, __u32 *mode )
- :name: LIRC_GET_SEND_MODE
+.. c:macro:: LIRC_GET_SEND_MODE
-.. c:function:: int ioctl( int fd, LIRC_SET_SEND_MODE, __u32 *mode )
- :name: LIRC_SET_SEND_MODE
+``int ioctl(int fd, LIRC_GET_SEND_MODE, __u32 *mode)``
+
+.. c:macro:: LIRC_SET_SEND_MODE
+
+``int ioctl(int fd, LIRC_SET_SEND_MODE, __u32 *mode)``
Arguments
=========
@@ -30,7 +33,6 @@ Arguments
``mode``
The mode used for transmitting.
-
Description
===========
@@ -44,14 +46,12 @@ modes the driver supports.
Return Value
============
-
.. tabularcolumns:: |p{2.5cm}|p{15.0cm}|
.. flat-table::
:header-rows: 0
:stub-columns: 0
-
- .. row 1
- ``ENODEV``
diff --git a/Documentation/userspace-api/media/rc/lirc-get-timeout.rst b/Documentation/userspace-api/media/rc/lirc-get-timeout.rst
index ec191a383d75..f5f3e06d6206 100644
--- a/Documentation/userspace-api/media/rc/lirc-get-timeout.rst
+++ b/Documentation/userspace-api/media/rc/lirc-get-timeout.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: RC
.. _lirc_get_min_timeout:
.. _lirc_get_max_timeout:
@@ -16,11 +17,13 @@ range for IR receive.
Synopsis
========
-.. c:function:: int ioctl( int fd, LIRC_GET_MIN_TIMEOUT, __u32 *timeout)
- :name: LIRC_GET_MIN_TIMEOUT
+.. c:macro:: LIRC_GET_MIN_TIMEOUT
-.. c:function:: int ioctl( int fd, LIRC_GET_MAX_TIMEOUT, __u32 *timeout)
- :name: LIRC_GET_MAX_TIMEOUT
+``int ioctl(int fd, LIRC_GET_MIN_TIMEOUT, __u32 *timeout)``
+
+.. c:macro:: LIRC_GET_MAX_TIMEOUT
+
+``int ioctl(int fd, LIRC_GET_MAX_TIMEOUT, __u32 *timeout)``
Arguments
=========
@@ -31,7 +34,6 @@ Arguments
``timeout``
Timeout, in microseconds.
-
Description
===========
@@ -47,7 +49,6 @@ that can be set.
both ioctls will return the same value even though the timeout
cannot be changed via :ref:`LIRC_SET_REC_TIMEOUT`.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/rc/lirc-read.rst b/Documentation/userspace-api/media/rc/lirc-read.rst
index b94a349bd99e..d589560214f4 100644
--- a/Documentation/userspace-api/media/rc/lirc-read.rst
+++ b/Documentation/userspace-api/media/rc/lirc-read.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: RC
.. _lirc-read:
@@ -11,7 +12,6 @@ Name
lirc-read - Read from a LIRC device
-
Synopsis
========
@@ -19,10 +19,7 @@ Synopsis
#include <unistd.h>
-
.. c:function:: ssize_t read( int fd, void *buf, size_t count )
- :name: lirc-read
-
Arguments
=========
@@ -39,9 +36,9 @@ Arguments
Description
===========
-:ref:`read() <lirc-read>` attempts to read up to ``count`` bytes from file
+:c:func:`read()` attempts to read up to ``count`` bytes from file
descriptor ``fd`` into the buffer starting at ``buf``. If ``count`` is zero,
-:ref:`read() <lirc-read>` returns zero and has no other results. If ``count``
+:c:func:`read()` returns zero and has no other results. If ``count``
is greater than ``SSIZE_MAX``, the result is unspecified.
The exact format of the data depends on what :ref:`lirc_modes` a driver
@@ -59,7 +56,6 @@ by hardware decoders. The :c:type:`rc_proto` member is set to the
used for transmission, and ``scancode`` to the decoded scancode,
and the ``keycode`` set to the keycode or ``KEY_RESERVED``.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/rc/lirc-set-measure-carrier-mode.rst b/Documentation/userspace-api/media/rc/lirc-set-measure-carrier-mode.rst
index 820d6bf28c1c..9bf9811a905a 100644
--- a/Documentation/userspace-api/media/rc/lirc-set-measure-carrier-mode.rst
+++ b/Documentation/userspace-api/media/rc/lirc-set-measure-carrier-mode.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: RC
.. _lirc_set_measure_carrier_mode:
@@ -14,8 +15,9 @@ LIRC_SET_MEASURE_CARRIER_MODE - enable or disable measure mode
Synopsis
========
-.. c:function:: int ioctl( int fd, LIRC_SET_MEASURE_CARRIER_MODE, __u32 *enable )
- :name: LIRC_SET_MEASURE_CARRIER_MODE
+.. c:macro:: LIRC_SET_MEASURE_CARRIER_MODE
+
+``int ioctl(int fd, LIRC_SET_MEASURE_CARRIER_MODE, __u32 *enable)``
Arguments
=========
@@ -27,7 +29,6 @@ Arguments
enable = 1 means enable measure mode, enable = 0 means disable measure
mode.
-
Description
===========
@@ -37,7 +38,6 @@ Enable or disable measure mode. If enabled, from the next key
press on, the driver will send ``LIRC_MODE2_FREQUENCY`` packets. By
default this should be turned off.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/rc/lirc-set-rec-carrier-range.rst b/Documentation/userspace-api/media/rc/lirc-set-rec-carrier-range.rst
index e33e6a320b7a..530bc223930a 100644
--- a/Documentation/userspace-api/media/rc/lirc-set-rec-carrier-range.rst
+++ b/Documentation/userspace-api/media/rc/lirc-set-rec-carrier-range.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: RC
.. _lirc_set_rec_carrier_range:
@@ -15,8 +16,9 @@ IR receive.
Synopsis
========
-.. c:function:: int ioctl( int fd, LIRC_SET_REC_CARRIER_RANGE, __u32 *frequency )
- :name: LIRC_SET_REC_CARRIER_RANGE
+.. c:macro:: LIRC_SET_REC_CARRIER_RANGE
+
+``int ioctl(int fd, LIRC_SET_REC_CARRIER_RANGE, __u32 *frequency)``
Arguments
=========
diff --git a/Documentation/userspace-api/media/rc/lirc-set-rec-carrier.rst b/Documentation/userspace-api/media/rc/lirc-set-rec-carrier.rst
index a6784d5e59c8..28c928f1cc14 100644
--- a/Documentation/userspace-api/media/rc/lirc-set-rec-carrier.rst
+++ b/Documentation/userspace-api/media/rc/lirc-set-rec-carrier.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: RC
.. _lirc_set_rec_carrier:
@@ -11,12 +12,12 @@ Name
LIRC_SET_REC_CARRIER - Set carrier used to modulate IR receive.
-
Synopsis
========
-.. c:function:: int ioctl( int fd, LIRC_SET_REC_CARRIER, __u32 *frequency )
- :name: LIRC_SET_REC_CARRIER
+.. c:macro:: LIRC_SET_REC_CARRIER
+
+``int ioctl(int fd, LIRC_SET_REC_CARRIER, __u32 *frequency)``
Arguments
=========
@@ -37,7 +38,6 @@ Set receive carrier used to modulate IR PWM pulses and spaces.
If called together with :ref:`LIRC_SET_REC_CARRIER_RANGE`, this ioctl
sets the upper bound frequency that will be recognized by the device.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/rc/lirc-set-rec-timeout-reports.rst b/Documentation/userspace-api/media/rc/lirc-set-rec-timeout-reports.rst
index 55be65df7d5a..83e7155c5796 100644
--- a/Documentation/userspace-api/media/rc/lirc-set-rec-timeout-reports.rst
+++ b/Documentation/userspace-api/media/rc/lirc-set-rec-timeout-reports.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: RC
.. _lirc_set_rec_timeout_reports:
@@ -14,8 +15,9 @@ LIRC_SET_REC_TIMEOUT_REPORTS - enable or disable timeout reports for IR receive
Synopsis
========
-.. c:function:: int ioctl( int fd, LIRC_SET_REC_TIMEOUT_REPORTS, __u32 *enable )
- :name: LIRC_SET_REC_TIMEOUT_REPORTS
+.. c:macro:: LIRC_SET_REC_TIMEOUT_REPORTS
+
+``int ioctl(int fd, LIRC_SET_REC_TIMEOUT_REPORTS, __u32 *enable)``
Arguments
=========
@@ -27,7 +29,6 @@ Arguments
enable = 1 means enable timeout report, enable = 0 means disable timeout
reports.
-
Description
===========
@@ -40,7 +41,6 @@ should be turned off.
This ioctl is only valid for :ref:`LIRC_MODE_MODE2 <lirc-mode-mode2>`.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/rc/lirc-set-rec-timeout.rst b/Documentation/userspace-api/media/rc/lirc-set-rec-timeout.rst
index e91a0daecde6..8f3f9adf54ab 100644
--- a/Documentation/userspace-api/media/rc/lirc-set-rec-timeout.rst
+++ b/Documentation/userspace-api/media/rc/lirc-set-rec-timeout.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: RC
.. _lirc_set_rec_timeout:
.. _lirc_get_rec_timeout:
@@ -15,11 +16,13 @@ LIRC_GET_REC_TIMEOUT/LIRC_SET_REC_TIMEOUT - Get/set the integer value for IR ina
Synopsis
========
-.. c:function:: int ioctl( int fd, LIRC_GET_REC_TIMEOUT, __u32 *timeout )
- :name: LIRC_GET_REC_TIMEOUT
+.. c:macro:: LIRC_GET_REC_TIMEOUT
-.. c:function:: int ioctl( int fd, LIRC_SET_REC_TIMEOUT, __u32 *timeout )
- :name: LIRC_SET_REC_TIMEOUT
+``int ioctl(int fd, LIRC_GET_REC_TIMEOUT, __u32 *timeout)``
+
+.. c:macro:: LIRC_SET_REC_TIMEOUT
+
+``int ioctl(int fd, LIRC_SET_REC_TIMEOUT, __u32 *timeout)``
Arguments
=========
@@ -30,7 +33,6 @@ Arguments
``timeout``
Timeout, in microseconds.
-
Description
===========
@@ -45,7 +47,6 @@ given value should be set.
The range of supported timeout is given by :ref:`LIRC_GET_MIN_TIMEOUT`.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/rc/lirc-set-send-carrier.rst b/Documentation/userspace-api/media/rc/lirc-set-send-carrier.rst
index e199aac7d8e0..e3810ba58746 100644
--- a/Documentation/userspace-api/media/rc/lirc-set-send-carrier.rst
+++ b/Documentation/userspace-api/media/rc/lirc-set-send-carrier.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: RC
.. _lirc_set_send_carrier:
@@ -11,12 +12,12 @@ Name
LIRC_SET_SEND_CARRIER - Set send carrier used to modulate IR TX.
-
Synopsis
========
-.. c:function:: int ioctl( int fd, LIRC_SET_SEND_CARRIER, __u32 *frequency )
- :name: LIRC_SET_SEND_CARRIER
+.. c:macro:: LIRC_SET_SEND_CARRIER
+
+``int ioctl(int fd, LIRC_SET_SEND_CARRIER, __u32 *frequency)``
Arguments
=========
@@ -32,7 +33,6 @@ Description
Set send carrier used to modulate IR PWM pulses and spaces.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/rc/lirc-set-send-duty-cycle.rst b/Documentation/userspace-api/media/rc/lirc-set-send-duty-cycle.rst
index a9074f4fb058..52a072529af9 100644
--- a/Documentation/userspace-api/media/rc/lirc-set-send-duty-cycle.rst
+++ b/Documentation/userspace-api/media/rc/lirc-set-send-duty-cycle.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: RC
.. _lirc_set_send_duty_cycle:
@@ -15,8 +16,9 @@ IR transmit.
Synopsis
========
-.. c:function:: int ioctl( int fd, LIRC_SET_SEND_DUTY_CYCLE, __u32 *duty_cycle)
- :name: LIRC_SET_SEND_DUTY_CYCLE
+.. c:macro:: LIRC_SET_SEND_DUTY_CYCLE
+
+``int ioctl(int fd, LIRC_SET_SEND_DUTY_CYCLE, __u32 *duty_cycle)``
Arguments
=========
@@ -28,7 +30,6 @@ Arguments
Duty cicle, describing the pulse width in percent (from 1 to 99) of
the total cycle. Values 0 and 100 are reserved.
-
Description
===========
@@ -38,7 +39,6 @@ Currently, no special meaning is defined for 0 or 100, but this
could be used to switch off carrier generation in the future, so
these values should be reserved.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/rc/lirc-set-transmitter-mask.rst b/Documentation/userspace-api/media/rc/lirc-set-transmitter-mask.rst
index 1f5527427281..68f4cc2e3ae3 100644
--- a/Documentation/userspace-api/media/rc/lirc-set-transmitter-mask.rst
+++ b/Documentation/userspace-api/media/rc/lirc-set-transmitter-mask.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: RC
.. _lirc_set_transmitter_mask:
@@ -14,8 +15,9 @@ LIRC_SET_TRANSMITTER_MASK - Enables send codes on a given set of transmitters
Synopsis
========
-.. c:function:: int ioctl( int fd, LIRC_SET_TRANSMITTER_MASK, __u32 *mask )
- :name: LIRC_SET_TRANSMITTER_MASK
+.. c:macro:: LIRC_SET_TRANSMITTER_MASK
+
+``int ioctl(int fd, LIRC_SET_TRANSMITTER_MASK, __u32 *mask)``
Arguments
=========
@@ -26,7 +28,6 @@ Arguments
``mask``
Mask with channels to enable tx. Channel 0 is the least significant bit.
-
Description
===========
@@ -42,7 +43,6 @@ When an invalid bit mask is given, i.e. a bit is set, even though the device
does not have so many transitters, then this ioctl returns the number of
available transitters and does nothing otherwise.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/rc/lirc-set-wideband-receiver.rst b/Documentation/userspace-api/media/rc/lirc-set-wideband-receiver.rst
index 2c43b620b3f3..be5321c4a91f 100644
--- a/Documentation/userspace-api/media/rc/lirc-set-wideband-receiver.rst
+++ b/Documentation/userspace-api/media/rc/lirc-set-wideband-receiver.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: RC
.. _lirc_set_wideband_receiver:
@@ -14,8 +15,9 @@ LIRC_SET_WIDEBAND_RECEIVER - enable wide band receiver.
Synopsis
========
-.. c:function:: int ioctl( int fd, LIRC_SET_WIDEBAND_RECEIVER, __u32 *enable )
- :name: LIRC_SET_WIDEBAND_RECEIVER
+.. c:macro:: LIRC_SET_WIDEBAND_RECEIVER
+
+``int ioctl(int fd, LIRC_SET_WIDEBAND_RECEIVER, __u32 *enable)``
Arguments
=========
@@ -27,7 +29,6 @@ Arguments
enable = 1 means enable wideband receiver, enable = 0 means disable
wideband receiver.
-
Description
===========
@@ -47,7 +48,6 @@ reduced range of reception.
carrier reports. Trying to disable wide band receiver while carrier
reports are active will do nothing.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/rc/lirc-write.rst b/Documentation/userspace-api/media/rc/lirc-write.rst
index 421de2cfa4ca..c1c3230d4fd6 100644
--- a/Documentation/userspace-api/media/rc/lirc-write.rst
+++ b/Documentation/userspace-api/media/rc/lirc-write.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: RC
.. _lirc-write:
@@ -11,7 +12,6 @@ Name
lirc-write - Write to a LIRC device
-
Synopsis
========
@@ -19,9 +19,7 @@ Synopsis
#include <unistd.h>
-
.. c:function:: ssize_t write( int fd, void *buf, size_t count )
- :name: lirc-write
Arguments
=========
@@ -38,7 +36,7 @@ Arguments
Description
===========
-:ref:`write() <lirc-write>` writes up to ``count`` bytes to the device
+:c:func:`write()` writes up to ``count`` bytes to the device
referenced by the file descriptor ``fd`` from the buffer starting at
``buf``.
@@ -64,7 +62,6 @@ for the protocol or the scancode is not valid for the specified protocol,
``EINVAL`` is returned. The write function blocks until the scancode
is transmitted by the hardware.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/buffer.rst b/Documentation/userspace-api/media/v4l/buffer.rst
index 4f95496adc5b..7dbdfbb4a0a9 100644
--- a/Documentation/userspace-api/media/v4l/buffer.rst
+++ b/Documentation/userspace-api/media/v4l/buffer.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _buffer:
@@ -33,7 +34,6 @@ mem-to-mem devices is an exception to the rule: the timestamp source
flags are copied from the OUTPUT video buffer to the CAPTURE video
buffer.
-
Interactions between formats, controls and buffers
==================================================
@@ -152,7 +152,6 @@ based on the queried sizes (for instance by allocating a set of buffers large
enough for all the desired formats and controls, or by allocating separate set
of appropriately sized buffers for each use case).
-
.. c:type:: v4l2_buffer
struct v4l2_buffer
@@ -257,7 +256,7 @@ struct v4l2_buffer
``V4L2_MEMORY_MMAP`` this is the offset of the buffer from the
start of the device memory. The value is returned by the driver
and apart of serving as parameter to the
- :ref:`mmap() <func-mmap>` function not useful for applications.
+ :c:func:`mmap()` function not useful for applications.
See :ref:`mmap` for details
* - unsigned long
- ``userptr``
@@ -310,7 +309,6 @@ struct v4l2_buffer
given, then ``EINVAL`` will be returned.
-
.. c:type:: v4l2_plane
struct v4l2_plane
@@ -350,7 +348,7 @@ struct v4l2_plane
- ``mem_offset``
- When the memory type in the containing struct
:c:type:`v4l2_buffer` is ``V4L2_MEMORY_MMAP``, this
- is the value that should be passed to :ref:`mmap() <func-mmap>`,
+ is the value that should be passed to :c:func:`mmap()`,
similar to the ``offset`` field in struct
:c:type:`v4l2_buffer`.
* - unsigned long
@@ -384,7 +382,6 @@ struct v4l2_plane
applications.
-
.. c:type:: v4l2_buf_type
enum v4l2_buf_type
@@ -448,7 +445,6 @@ enum v4l2_buf_type
- Buffer for metadata output, see :ref:`metadata`.
-
.. _buffer-flags:
Buffer Flags
@@ -682,20 +678,6 @@ Buffer Flags
.. _memory-flags:
-Memory Consistency Flags
-========================
-
-.. tabularcolumns:: |p{7.0cm}|p{2.2cm}|p{8.3cm}|
-
-.. cssclass:: longtable
-
-.. flat-table::
- :header-rows: 0
- :stub-columns: 0
- :widths: 3 1 4
-
-.. c:type:: v4l2_memory
-
enum v4l2_memory
================
@@ -720,7 +702,6 @@ enum v4l2_memory
- The buffer is used for :ref:`DMA shared buffer <dmabuf>` I/O.
-
Timecodes
=========
@@ -729,7 +710,6 @@ The :c:type:`v4l2_buffer_timecode` structure is designed to hold a
(struct :c:type:`timeval` timestamps are stored in the struct
:c:type:`v4l2_buffer` ``timestamp`` field.)
-
.. c:type:: v4l2_timecode
struct v4l2_timecode
@@ -766,7 +746,6 @@ struct v4l2_timecode
- The "user group" bits from the timecode.
-
.. _timecode-type:
Timecode Types
@@ -796,7 +775,6 @@ Timecode Types
-
-
.. _timecode-flags:
Timecode Flags
diff --git a/Documentation/userspace-api/media/v4l/dev-capture.rst b/Documentation/userspace-api/media/v4l/dev-capture.rst
index 5ea1ffe71fa6..fe58fd450e2f 100644
--- a/Documentation/userspace-api/media/v4l/dev-capture.rst
+++ b/Documentation/userspace-api/media/v4l/dev-capture.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _capture:
@@ -19,7 +20,6 @@ device.
.. note:: The same device file names are used for video output devices.
-
Querying Capabilities
=====================
@@ -34,7 +34,6 @@ functions they may also support the :ref:`video overlay <overlay>`
streaming I/O methods must be supported. Tuners and audio inputs are
optional.
-
Supplemental Functions
======================
@@ -45,7 +44,6 @@ Video capture devices shall support :ref:`audio input <audio>`,
:ref:`video input <video>` ioctls must be supported by all video
capture devices.
-
Image Format Negotiation
========================
@@ -55,7 +53,7 @@ capture, the latter how images are stored in memory, i. e. in RGB or YUV
format, the number of bits per pixel or width and height. Together they
also define how images are scaled in the process.
-As usual these parameters are *not* reset at :ref:`open() <func-open>`
+As usual these parameters are *not* reset at :c:func:`open()`
time to permit Unix tool chains, programming a device and then reading
from it as if it was a plain file. Well written V4L2 applications ensure
they really get what they want, including cropping and scaling.
@@ -95,7 +93,6 @@ and :ref:`VIDIOC_S_FMT <VIDIOC_G_FMT>` ioctl, even if :ref:`VIDIOC_S_FMT <VIDIOC
requests and always returns default parameters as :ref:`VIDIOC_G_FMT <VIDIOC_G_FMT>` does.
:ref:`VIDIOC_TRY_FMT <VIDIOC_G_FMT>` is optional.
-
Reading Images
==============
diff --git a/Documentation/userspace-api/media/v4l/dev-output.rst b/Documentation/userspace-api/media/v4l/dev-output.rst
index 2315faf61aaf..eadcb4aa813b 100644
--- a/Documentation/userspace-api/media/v4l/dev-output.rst
+++ b/Documentation/userspace-api/media/v4l/dev-output.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _output:
@@ -18,7 +19,6 @@ device.
.. note:: The same device file names are used also for video capture devices.
-
Querying Capabilities
=====================
@@ -32,7 +32,6 @@ functions they may also support the :ref:`raw VBI output <raw-vbi>`
streaming I/O methods must be supported. Modulators and audio outputs
are optional.
-
Supplemental Functions
======================
@@ -43,7 +42,6 @@ Video output devices shall support :ref:`audio output <audio>`,
:ref:`video output <video>` ioctls must be supported by all video
output devices.
-
Image Format Negotiation
========================
@@ -53,7 +51,7 @@ the latter how images are stored in memory, i. e. in RGB or YUV format,
the number of bits per pixel or width and height. Together they also
define how images are scaled in the process.
-As usual these parameters are *not* reset at :ref:`open() <func-open>`
+As usual these parameters are *not* reset at :c:func:`open()`
time to permit Unix tool chains, programming a device and then writing
to it as if it was a plain file. Well written V4L2 applications ensure
they really get what they want, including cropping and scaling.
@@ -92,7 +90,6 @@ and :ref:`VIDIOC_S_FMT <VIDIOC_G_FMT>` ioctl, even if :ref:`VIDIOC_S_FMT <VIDIOC
requests and always returns default parameters as :ref:`VIDIOC_G_FMT <VIDIOC_G_FMT>` does.
:ref:`VIDIOC_TRY_FMT <VIDIOC_G_FMT>` is optional.
-
Writing Images
==============
diff --git a/Documentation/userspace-api/media/v4l/dev-raw-vbi.rst b/Documentation/userspace-api/media/v4l/dev-raw-vbi.rst
index bb52f85a619c..3f43a01ba938 100644
--- a/Documentation/userspace-api/media/v4l/dev-raw-vbi.rst
+++ b/Documentation/userspace-api/media/v4l/dev-raw-vbi.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _raw-vbi:
@@ -32,7 +33,6 @@ applications must call the :ref:`VIDIOC_S_FMT <VIDIOC_G_FMT>` ioctl.
Accessed as ``/dev/vbi``, raw VBI capturing or output is the default
device function.
-
Querying Capabilities
=====================
@@ -44,7 +44,6 @@ in the ``capabilities`` field of struct
read/write, streaming or asynchronous I/O methods must be supported. VBI
devices may or may not have a tuner or modulator.
-
Supplemental Functions
======================
@@ -53,7 +52,6 @@ VBI devices shall support :ref:`video input or output <video>`,
ioctls as needed. The :ref:`video standard <standard>` ioctls provide
information vital to program a VBI device, therefore must be supported.
-
Raw VBI Format Negotiation
==========================
@@ -62,7 +60,7 @@ frequency. To properly interpret the data V4L2 specifies an ioctl to
query the sampling parameters. Moreover, to allow for some flexibility
applications can also suggest different parameters.
-As usual these parameters are *not* reset at :ref:`open() <func-open>`
+As usual these parameters are *not* reset at :c:func:`open()`
time to permit Unix tool chains, programming a device and then reading
from it as if it was a plain file. Well written V4L2 applications should
always ensure they really get what they want, requesting reasonable
@@ -91,8 +89,8 @@ happen for instance when the video and VBI areas to capture would
overlap, or when the driver supports multiple opens and another process
already requested VBI capturing or output. Anyway, applications must
expect other resource allocation points which may return ``EBUSY``, at the
-:ref:`VIDIOC_STREAMON` ioctl and the first :ref:`read() <func-read>`
-, :ref:`write() <func-write>` and :ref:`select() <func-select>` calls.
+:ref:`VIDIOC_STREAMON` ioctl and the first :c:func:`read()`
+, :c:func:`write()` and :c:func:`select()` calls.
VBI devices must implement both the :ref:`VIDIOC_G_FMT <VIDIOC_G_FMT>` and
:ref:`VIDIOC_S_FMT <VIDIOC_G_FMT>` ioctl, even if :ref:`VIDIOC_S_FMT <VIDIOC_G_FMT>` ignores all requests
@@ -182,7 +180,6 @@ and always returns default parameters as :ref:`VIDIOC_G_FMT <VIDIOC_G_FMT>` does
- This array is reserved for future extensions. Drivers and
applications must set it to zero.
-
.. tabularcolumns:: |p{4.4cm}|p{1.5cm}|p{11.6cm}|
.. _vbifmt-flags:
@@ -218,7 +215,6 @@ and always returns default parameters as :ref:`VIDIOC_G_FMT <VIDIOC_G_FMT>` does
non-zero.
-
.. _vbi-hsync:
.. kernel-figure:: vbi_hsync.svg
@@ -227,7 +223,6 @@ and always returns default parameters as :ref:`VIDIOC_G_FMT <VIDIOC_G_FMT>` does
**Figure 4.1. Line synchronization**
-
.. _vbi-525:
.. kernel-figure:: vbi_525.svg
@@ -251,7 +246,6 @@ negotiation, or after switching the video standard which may invalidate
the negotiated VBI parameters, should be refused by the driver. A format
change during active I/O is not permitted.
-
Reading and writing VBI images
==============================
@@ -261,7 +255,6 @@ consisting of two fields of VBI images immediately following in memory.
The total size of a frame computes as follows:
-
.. code-block:: c
(count[0] + count[1]) * samples_per_line * sample size in bytes
@@ -276,8 +269,8 @@ The latter bears the possibility of synchronizing video and VBI data by
using buffer timestamps.
Remember the :ref:`VIDIOC_STREAMON <VIDIOC_STREAMON>` ioctl and the
-first :ref:`read() <func-read>`, :ref:`write() <func-write>` and
-:ref:`select() <func-select>` call can be resource allocation
+first :c:func:`read()`, :c:func:`write()` and
+:c:func:`select()` call can be resource allocation
points returning an ``EBUSY`` error code if the required hardware resources
are temporarily unavailable, for example the device is already in use by
another process.
diff --git a/Documentation/userspace-api/media/v4l/dev-rds.rst b/Documentation/userspace-api/media/v4l/dev-rds.rst
index 463726ba46d7..207216d5e6a5 100644
--- a/Documentation/userspace-api/media/v4l/dev-rds.rst
+++ b/Documentation/userspace-api/media/v4l/dev-rds.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _rds:
@@ -28,7 +29,6 @@ The RDS interface does not support this format. Should support for MMBS
the linux-media mailing list:
`https://linuxtv.org/lists.php <https://linuxtv.org/lists.php>`__.
-
Querying Capabilities
=====================
@@ -68,31 +68,27 @@ like program identification codes and radio text, the flag
:ref:`Writing RDS data <writing-rds-data>` and
:ref:`FM Transmitter Control Reference <fm-tx-controls>`.
-
.. _reading-rds-data:
Reading RDS data
================
RDS data can be read from the radio device with the
-:ref:`read() <func-read>` function. The data is packed in groups of
+:c:func:`read()` function. The data is packed in groups of
three bytes.
-
.. _writing-rds-data:
Writing RDS data
================
RDS data can be written to the radio device with the
-:ref:`write() <func-write>` function. The data is packed in groups of
+:c:func:`write()` function. The data is packed in groups of
three bytes, as follows:
-
RDS datastructures
==================
-
.. c:type:: v4l2_rds_data
.. tabularcolumns:: |p{2.5cm}|p{2.5cm}|p{12.5cm}|
@@ -113,7 +109,6 @@ RDS datastructures
- Block description
-
.. _v4l2-rds-block:
.. tabularcolumns:: |p{2.9cm}|p{14.6cm}|
@@ -136,7 +131,6 @@ RDS datastructures
reception of this block.
-
.. _v4l2-rds-block-codes:
.. tabularcolumns:: |p{6.4cm}|p{2.0cm}|p{1.2cm}|p{7.9cm}|
diff --git a/Documentation/userspace-api/media/v4l/dev-sliced-vbi.rst b/Documentation/userspace-api/media/v4l/dev-sliced-vbi.rst
index 807751f305fb..f0df144c9f63 100644
--- a/Documentation/userspace-api/media/v4l/dev-sliced-vbi.rst
+++ b/Documentation/userspace-api/media/v4l/dev-sliced-vbi.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _sliced:
@@ -27,7 +28,6 @@ however the default function here is video capturing or output.
Different file descriptors must be used to pass raw and sliced VBI data
simultaneously, if this is supported by the driver.
-
Querying Capabilities
=====================
@@ -39,7 +39,6 @@ respectively, in the ``capabilities`` field of struct
read/write, streaming or asynchronous :ref:`I/O methods <io>` must be
supported. Sliced VBI devices may have a tuner or modulator.
-
Supplemental Functions
======================
@@ -49,7 +48,6 @@ capabilities, and they may support :ref:`control` ioctls.
The :ref:`video standard <standard>` ioctls provide information vital
to program a sliced VBI device, therefore must be supported.
-
.. _sliced-vbi-format-negotitation:
Sliced VBI Format Negotiation
@@ -96,9 +94,8 @@ at this point, it may return an ``EBUSY`` error code if the required
resources are temporarily unavailable. Other resource allocation points
which may return ``EBUSY`` can be the
:ref:`VIDIOC_STREAMON` ioctl and the first
-:ref:`read() <func-read>`, :ref:`write() <func-write>` and
-:ref:`select() <func-select>` call.
-
+:c:func:`read()`, :c:func:`write()` and
+:c:func:`select()` call.
.. c:type:: v4l2_sliced_vbi_format
@@ -191,7 +188,7 @@ struct v4l2_sliced_vbi_format
* - __u32
- ``io_size``
- :cspan:`2` Maximum number of bytes passed by one
- :ref:`read() <func-read>` or :ref:`write() <func-write>` call,
+ :c:func:`read()` or :c:func:`write()` call,
and the buffer size in bytes for the
:ref:`VIDIOC_QBUF` and
:ref:`VIDIOC_DQBUF <VIDIOC_QBUF>` ioctl. Drivers set this field
@@ -274,7 +271,6 @@ Sliced VBI services
\normalsize
-
Drivers may return an ``EINVAL`` error code when applications attempt to
read or write data without prior format negotiation, after switching the
video standard (which may invalidate the negotiated VBI parameters) and
@@ -284,13 +280,12 @@ return an ``EBUSY`` error code when applications attempt to change the
format while i/o is in progress (between a
:ref:`VIDIOC_STREAMON` and
:ref:`VIDIOC_STREAMOFF <VIDIOC_STREAMON>` call, and after the first
-:ref:`read() <func-read>` or :ref:`write() <func-write>` call).
-
+:c:func:`read()` or :c:func:`write()` call).
Reading and writing sliced VBI data
===================================
-A single :ref:`read() <func-read>` or :ref:`write() <func-write>`
+A single :c:func:`read()` or :c:func:`write()`
call must pass all data belonging to one video frame. That is an array
of struct :c:type:`v4l2_sliced_vbi_data` structures with one or
more elements and a total size not exceeding ``io_size`` bytes. Likewise
@@ -298,7 +293,6 @@ in streaming I/O mode one buffer of ``io_size`` bytes must contain data
of one video frame. The ``id`` of unused
struct :c:type:`v4l2_sliced_vbi_data` elements must be zero.
-
.. c:type:: v4l2_sliced_vbi_data
struct v4l2_sliced_vbi_data
@@ -344,9 +338,8 @@ struct v4l2_sliced_vbi_data
bytes at the end of this array are undefined, drivers and
applications shall ignore them.
-
Packets are always passed in ascending line number order, without
-duplicate line numbers. The :ref:`write() <func-write>` function and
+duplicate line numbers. The :c:func:`write()` function and
the :ref:`VIDIOC_QBUF` ioctl must return an ``EINVAL``
error code when applications violate this rule. They must also return an
EINVAL error code when applications pass an incorrect field or line
@@ -370,7 +363,6 @@ streaming (:ref:`memory mapping <mmap>` and/or
:ref:`user pointer <userp>`) I/O. The latter bears the possibility of
synchronizing video and VBI data by using buffer timestamps.
-
Sliced VBI Data in MPEG Streams
===============================
@@ -405,7 +397,6 @@ data insertion is not supported by the device.
The following subsections specify the format of the embedded sliced VBI
data.
-
MPEG Stream Embedded, Sliced VBI Data Format: NONE
--------------------------------------------------
@@ -417,7 +408,6 @@ nor driver shall insert "empty" embedded sliced VBI data packets in the
MPEG stream when this format is set. No MPEG stream data structures are
specified for this format.
-
MPEG Stream Embedded, Sliced VBI Data Format: IVTV
--------------------------------------------------
@@ -460,7 +450,6 @@ the end with unspecified fill bytes to align the end of the payload to a
with 18 lines/field with 43 bytes of data/line and a 4 byte magic
number).
-
.. c:type:: v4l2_mpeg_vbi_fmt_ivtv
struct v4l2_mpeg_vbi_fmt_ivtv
@@ -523,7 +512,6 @@ Magic Constants for struct v4l2_mpeg_vbi_fmt_ivtv magic field
valid and that 36 lines of sliced VBI data are present.
-
.. c:type:: v4l2_mpeg_vbi_itv0
.. c:type:: v4l2_mpeg_vbi_ITV0
@@ -548,7 +536,6 @@ structs v4l2_mpeg_vbi_itv0 and v4l2_mpeg_vbi_ITV0
value:
-
::
linemask[0] b0: line 6 first field
@@ -574,7 +561,6 @@ structs v4l2_mpeg_vbi_itv0 and v4l2_mpeg_vbi_ITV0
applications.
-
.. _v4l2-mpeg-vbi-itv0-1:
struct v4l2_mpeg_vbi_ITV0
@@ -596,7 +582,6 @@ struct v4l2_mpeg_vbi_ITV0
lines 6 through 23 of the second field.
-
.. c:type:: v4l2_mpeg_vbi_itv0_line
struct v4l2_mpeg_vbi_itv0_line
@@ -619,7 +604,6 @@ struct v4l2_mpeg_vbi_itv0_line
- The sliced VBI data for the line.
-
.. _ITV0-Line-Identifier-Constants:
Line Identifiers for struct v4l2_mpeg_vbi_itv0_line id field
@@ -653,7 +637,6 @@ Line Identifiers for struct v4l2_mpeg_vbi_itv0_line id field
description of the line payload.
-
.. [#f1]
According to :ref:`ETS 300 706 <ets300706>` lines 6-22 of the first
field and lines 5-22 of the second field may carry Teletext data.
diff --git a/Documentation/userspace-api/media/v4l/diff-v4l.rst b/Documentation/userspace-api/media/v4l/diff-v4l.rst
index 3f7bac44377c..caa05fbbd396 100644
--- a/Documentation/userspace-api/media/v4l/diff-v4l.rst
+++ b/Documentation/userspace-api/media/v4l/diff-v4l.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _diff-v4l:
@@ -13,7 +14,6 @@ the much improved V4L2 API replaces the V4L API. The support for the old
V4L calls were removed from Kernel, but the library :ref:`libv4l`
supports the conversion of a V4L API system call into a V4L2 one.
-
Opening and Closing Devices
===========================
@@ -32,7 +32,6 @@ recommend that V4L2 drivers by default register devices with the same
numbers, but the system administrator can assign arbitrary minor numbers
using driver module options. The major device number remains 81.
-
.. _v4l-dev:
.. flat-table:: V4L Device Types, Names and Numbers
@@ -53,14 +52,12 @@ using driver module options. The major device number remains 81.
- ``/dev/vbi``, ``/dev/vbi0`` to ``/dev/vbi31``
- 224-255
-
V4L prohibits (or used to prohibit) multiple opens of a device file.
V4L2 drivers *may* support multiple opens, see :ref:`open` for details
and consequences.
V4L drivers respond to V4L2 ioctls with an ``EINVAL`` error code.
-
Querying Capabilities
=====================
@@ -151,7 +148,6 @@ introduction.
- ``-``
- See above.
-
The ``audios`` field was replaced by ``capabilities`` flag
``V4L2_CAP_AUDIO``, indicating *if* the device has any audio inputs or
outputs. To determine their number applications can enumerate audio
@@ -164,7 +160,6 @@ were removed. Calling the :ref:`VIDIOC_S_FMT <VIDIOC_G_FMT>` or
dimensions returns the closest size possible, taking into account the
current video standard, cropping and scaling limitations.
-
Video Sources
=============
@@ -180,7 +175,6 @@ The ``channel`` field counting inputs was renamed to ``index``, the
video input types were renamed as follows:
-
.. flat-table::
:header-rows: 1
:stub-columns: 0
@@ -192,7 +186,6 @@ video input types were renamed as follows:
* - ``VIDEO_TYPE_CAMERA``
- ``V4L2_INPUT_TYPE_CAMERA``
-
Unlike the ``tuners`` field expressing the number of tuners of this
input, V4L2 assumes each video input is connected to at most one tuner.
However a tuner can have more than one input, i. e. RF connectors, and a
@@ -216,7 +209,6 @@ addition together with the ``norm`` field and has been removed in the
meantime. V4L2 has a similar, albeit more comprehensive approach to
video standards, see :ref:`standard` for more information.
-
Tuning
======
@@ -260,7 +252,6 @@ frequency where renamed to
to a struct :c:type:`v4l2_frequency` instead of an
unsigned long integer.
-
.. _v4l-image-properties:
Image Properties
@@ -274,7 +265,6 @@ replaced by V4L2 controls accessible with the
:ref:`VIDIOC_S_CTRL <VIDIOC_G_CTRL>` ioctls:
-
.. flat-table::
:header-rows: 1
:stub-columns: 0
@@ -292,7 +282,6 @@ replaced by V4L2 controls accessible with the
* - ``whiteness``
- ``V4L2_CID_WHITENESS``
-
The V4L picture controls are assumed to range from 0 to 65535 with no
particular reset value. The V4L2 API permits arbitrary limits and
defaults which can be queried with the
@@ -306,7 +295,6 @@ of the image depth and others need not know. The ``palette`` field moved
into the struct :c:type:`v4l2_pix_format`:
-
.. flat-table::
:header-rows: 1
:stub-columns: 0
@@ -346,11 +334,9 @@ into the struct :c:type:`v4l2_pix_format`:
* - ``VIDEO_PALETTE_YUV410P``
- :ref:`V4L2_PIX_FMT_YVU410 <V4L2-PIX-FMT-YVU410>`
-
V4L2 image formats are defined in :ref:`pixfmt`. The image format can
be selected with the :ref:`VIDIOC_S_FMT <VIDIOC_G_FMT>` ioctl.
-
Audio
=====
@@ -384,7 +370,6 @@ The following fields where replaced by V4L2 controls accessible with the
:ref:`VIDIOC_S_CTRL <VIDIOC_G_CTRL>` ioctls:
-
.. flat-table::
:header-rows: 1
:stub-columns: 0
@@ -400,7 +385,6 @@ The following fields where replaced by V4L2 controls accessible with the
* - ``balance``
- ``V4L2_CID_AUDIO_BALANCE``
-
To determine which of these controls are supported by a driver V4L
provides the ``flags`` ``VIDEO_AUDIO_VOLUME``, ``VIDEO_AUDIO_BASS``,
``VIDEO_AUDIO_TREBLE`` and ``VIDEO_AUDIO_BALANCE``. In the V4L2 API the
@@ -416,7 +400,6 @@ V4L2 API permits arbitrary limits and defaults which can be queried with
the :ref:`VIDIOC_QUERYCTRL` ioctl. For general
information about controls see :ref:`control`.
-
Frame Buffer Overlay
====================
@@ -463,7 +446,6 @@ size is determined by ``w.width`` and ``w.height``.
The ``VIDIOCCAPTURE`` ioctl to enable or disable overlay was renamed to
:ref:`VIDIOC_OVERLAY`.
-
Cropping
========
@@ -490,21 +472,19 @@ struct :c:type:`v4l2_window`. These structures are used to
select a capture or overlay format with the
:ref:`VIDIOC_S_FMT <VIDIOC_G_FMT>` ioctl.
-
Reading Images, Memory Mapping
==============================
-
Capturing using the read method
-------------------------------
There is no essential difference between reading images from a V4L or
-V4L2 device using the :ref:`read() <func-read>` function, however V4L2
+V4L2 device using the :c:func:`read()` function, however V4L2
drivers are not required to support this I/O method. Applications can
determine if the function is available with the
:ref:`VIDIOC_QUERYCAP` ioctl. All V4L2 devices
exchanging data with applications must support the
-:ref:`select() <func-select>` and :ref:`poll() <func-poll>`
+:c:func:`select()` and :c:func:`poll()`
functions.
To select an image format and size, V4L provides the ``VIDIOCSPICT`` and
@@ -517,7 +497,6 @@ negotiation ioctls :ref:`VIDIOC_G_FMT <VIDIOC_G_FMT>` and
For more information about the V4L2 read interface see :ref:`rw`.
-
Capturing using memory mapping
------------------------------
@@ -528,7 +507,6 @@ read method. V4L2 supports memory mapping as well, with a few
differences.
-
.. flat-table::
:header-rows: 1
:stub-columns: 0
@@ -550,7 +528,7 @@ differences.
``VIDIOCGMBUF`` ioctl is available to query the number of buffers,
the offset of each buffer from the start of the virtual file, and
the overall amount of memory used, which can be used as arguments
- for the :ref:`mmap() <func-mmap>` function.
+ for the :c:func:`mmap()` function.
- Buffers are individually mapped. The offset and size of each
buffer can be determined with the
:ref:`VIDIOC_QUERYBUF` ioctl.
@@ -568,7 +546,7 @@ differences.
the incoming queue. Filled buffers are dequeued from the outgoing
queue with the :ref:`VIDIOC_DQBUF <VIDIOC_QBUF>` ioctl. To wait
until filled buffers become available this function,
- :ref:`select() <func-select>` or :ref:`poll() <func-poll>` can
+ :c:func:`select()` or :c:func:`poll()` can
be used. The :ref:`VIDIOC_STREAMON` ioctl
must be called once after enqueuing one or more buffers to start
capturing. Its counterpart
@@ -577,11 +555,9 @@ differences.
signal status, if known, with the
:ref:`VIDIOC_ENUMINPUT` ioctl.
-
For a more in-depth discussion of memory mapping and examples, see
:ref:`mmap`.
-
Reading Raw VBI Data
====================
@@ -592,7 +568,6 @@ the V4L VBI interface. Reading from the device yields a raw VBI image
with the following parameters:
-
.. flat-table::
:header-rows: 1
:stub-columns: 0
@@ -616,7 +591,6 @@ with the following parameters:
* - flags
- 0
-
Undocumented in the V4L specification, in Linux 2.3 the
``VIDIOCGVBIFMT`` and ``VIDIOCSVBIFMT`` ioctls using struct
``vbi_format`` were added to determine the VBI image
@@ -630,11 +604,10 @@ remaining fields are probably equivalent to struct
Apparently only the Zoran (ZR 36120) driver implements these ioctls. The
semantics differ from those specified for V4L2 in two ways. The
-parameters are reset on :ref:`open() <func-open>` and
+parameters are reset on :c:func:`open()` and
``VIDIOCSVBIFMT`` always returns an ``EINVAL`` error code if the parameters
are invalid.
-
Miscellaneous
=============
diff --git a/Documentation/userspace-api/media/v4l/dmabuf.rst b/Documentation/userspace-api/media/v4l/dmabuf.rst
index f43d400dafaa..50fba11c2477 100644
--- a/Documentation/userspace-api/media/v4l/dmabuf.rst
+++ b/Documentation/userspace-api/media/v4l/dmabuf.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _dmabuf:
@@ -36,7 +37,6 @@ are passed in struct :c:type:`v4l2_buffer` (or in struct
driver must be switched into DMABUF I/O mode by calling the
:ref:`VIDIOC_REQBUFS <VIDIOC_REQBUFS>` with the desired buffer type.
-
Example: Initiating streaming I/O with DMABUF file descriptors
==============================================================
@@ -135,10 +135,10 @@ buffers it must wait until an empty buffer can be dequeued and reused.
Two methods exist to suspend execution of the application until one or
more buffers can be dequeued. By default :ref:`VIDIOC_DQBUF
<VIDIOC_QBUF>` blocks when no buffer is in the outgoing queue. When the
-``O_NONBLOCK`` flag was given to the :ref:`open() <func-open>` function,
+``O_NONBLOCK`` flag was given to the :c:func:`open()` function,
:ref:`VIDIOC_DQBUF <VIDIOC_QBUF>` returns immediately with an ``EAGAIN``
error code when no buffer is available. The
-:ref:`select() <func-select>` and :ref:`poll() <func-poll>`
+:c:func:`select()` and :c:func:`poll()`
functions are always available.
To start and stop capturing or displaying applications call the
@@ -158,5 +158,5 @@ Drivers implementing DMABUF importing I/O must support the
:ref:`VIDIOC_REQBUFS <VIDIOC_REQBUFS>`, :ref:`VIDIOC_QBUF <VIDIOC_QBUF>`,
:ref:`VIDIOC_DQBUF <VIDIOC_QBUF>`, :ref:`VIDIOC_STREAMON
<VIDIOC_STREAMON>` and :ref:`VIDIOC_STREAMOFF <VIDIOC_STREAMON>` ioctls,
-and the :ref:`select() <func-select>` and :ref:`poll() <func-poll>`
+and the :c:func:`select()` and :c:func:`poll()`
functions.
diff --git a/Documentation/userspace-api/media/v4l/format.rst b/Documentation/userspace-api/media/v4l/format.rst
index eaa6445f6160..35bbb2fea46e 100644
--- a/Documentation/userspace-api/media/v4l/format.rst
+++ b/Documentation/userspace-api/media/v4l/format.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _format:
@@ -6,7 +7,6 @@
Data Formats
************
-
Data Format Negotiation
=======================
@@ -53,8 +53,8 @@ image size.
When applications omit the :ref:`VIDIOC_S_FMT <VIDIOC_G_FMT>` ioctl its locking side
effects are implied by the next step, the selection of an I/O method
with the :ref:`VIDIOC_REQBUFS` ioctl or implicit
-with the first :ref:`read() <func-read>` or
-:ref:`write() <func-write>` call.
+with the first :c:func:`read()` or
+:c:func:`write()` call.
Generally only one logical stream can be assigned to a file descriptor,
the exception being drivers permitting simultaneous video capturing and
@@ -67,7 +67,6 @@ All drivers exchanging data with applications must support the
:ref:`VIDIOC_G_FMT <VIDIOC_G_FMT>` and :ref:`VIDIOC_S_FMT <VIDIOC_G_FMT>` ioctl. Implementation of the
:ref:`VIDIOC_TRY_FMT <VIDIOC_G_FMT>` is highly recommended but optional.
-
Image Format Enumeration
========================
diff --git a/Documentation/userspace-api/media/v4l/func-close.rst b/Documentation/userspace-api/media/v4l/func-close.rst
index c03ff3e62738..dba3263fd1b9 100644
--- a/Documentation/userspace-api/media/v4l/func-close.rst
+++ b/Documentation/userspace-api/media/v4l/func-close.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _func-close:
@@ -11,7 +12,6 @@ Name
v4l2-close - Close a V4L2 device
-
Synopsis
========
@@ -19,16 +19,13 @@ Synopsis
#include <unistd.h>
-
.. c:function:: int close( int fd )
- :name: v4l2-close
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
-
+ File descriptor returned by :c:func:`open()`.
Description
===========
@@ -38,7 +35,6 @@ associated with the file descriptor are freed. However data format
parameters, current input or output, control values or other properties
remain unchanged.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/func-ioctl.rst b/Documentation/userspace-api/media/v4l/func-ioctl.rst
index 8bde6b4f1cb5..f3b005094334 100644
--- a/Documentation/userspace-api/media/v4l/func-ioctl.rst
+++ b/Documentation/userspace-api/media/v4l/func-ioctl.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _func-ioctl:
@@ -11,7 +12,6 @@ Name
v4l2-ioctl - Program a V4L2 device
-
Synopsis
========
@@ -19,15 +19,13 @@ Synopsis
#include <sys/ioctl.h>
-
-.. c:function:: int ioctl( int fd, int request, void *argp )
- :name: v4l2-ioctl
+``int ioctl(int fd, int request, void *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``request``
V4L2 ioctl request code as defined in the ``videodev2.h`` header
@@ -36,7 +34,6 @@ Arguments
``argp``
Pointer to a function parameter, usually a structure.
-
Description
===========
@@ -50,7 +47,6 @@ include the version in the kernel sources on the system they compile on.
All V4L2 ioctl requests, their respective function and parameters are
specified in :ref:`user-func`.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/func-mmap.rst b/Documentation/userspace-api/media/v4l/func-mmap.rst
index b3a9cd862a7f..e3e5e64ebe7e 100644
--- a/Documentation/userspace-api/media/v4l/func-mmap.rst
+++ b/Documentation/userspace-api/media/v4l/func-mmap.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _func-mmap:
@@ -11,7 +12,6 @@ Name
v4l2-mmap - Map device memory into application address space
-
Synopsis
========
@@ -20,9 +20,7 @@ Synopsis
#include <unistd.h>
#include <sys/mman.h>
-
.. c:function:: void *mmap( void *start, size_t length, int prot, int flags, int fd, off_t offset )
- :name: v4l2-mmap
Arguments
=========
@@ -54,7 +52,7 @@ Arguments
#. The Linux ``videobuf`` kernel module, which is used by some
drivers supports only ``PROT_READ`` | ``PROT_WRITE``. When the
driver does not support the desired protection, the
- :ref:`mmap() <func-mmap>` function fails.
+ :c:func:`mmap()` function fails.
#. Device memory accesses (e. g. the memory on a graphics card
with video capturing hardware) may incur a performance penalty
@@ -70,7 +68,7 @@ Arguments
``MAP_FIXED`` requests that the driver selects no other address than
the one specified. If the specified address cannot be used,
- :ref:`mmap() <func-mmap>` will fail. If ``MAP_FIXED`` is specified,
+ :c:func:`mmap()` will fail. If ``MAP_FIXED`` is specified,
``start`` must be a multiple of the pagesize. Use of this option is
discouraged.
@@ -87,7 +85,7 @@ Arguments
flags.
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``offset``
Offset of the buffer in device memory. This must be the same value
@@ -97,11 +95,10 @@ Arguments
in the struct :c:type:`v4l2_plane` ``m`` union
``mem_offset`` field for the multi-planar API.
-
Description
===========
-The :ref:`mmap() <func-mmap>` function asks to map ``length`` bytes starting at
+The :c:func:`mmap()` function asks to map ``length`` bytes starting at
``offset`` in the memory of the device specified by ``fd`` into the
application address space, preferably at address ``start``. This latter
address is a hint only, and is usually specified as 0.
@@ -111,13 +108,12 @@ Suitable length and offset parameters are queried with the
allocated with the :ref:`VIDIOC_REQBUFS` ioctl
before they can be queried.
-To unmap buffers the :ref:`munmap() <func-munmap>` function is used.
-
+To unmap buffers the :c:func:`munmap()` function is used.
Return Value
============
-On success :ref:`mmap() <func-mmap>` returns a pointer to the mapped buffer. On
+On success :c:func:`mmap()` returns a pointer to the mapped buffer. On
error ``MAP_FAILED`` (-1) is returned, and the ``errno`` variable is set
appropriately. Possible error codes are:
diff --git a/Documentation/userspace-api/media/v4l/func-munmap.rst b/Documentation/userspace-api/media/v4l/func-munmap.rst
index e8a27e43373a..077d58333904 100644
--- a/Documentation/userspace-api/media/v4l/func-munmap.rst
+++ b/Documentation/userspace-api/media/v4l/func-munmap.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _func-munmap:
@@ -11,7 +12,6 @@ Name
v4l2-munmap - Unmap device memory
-
Synopsis
========
@@ -20,37 +20,33 @@ Synopsis
#include <unistd.h>
#include <sys/mman.h>
-
.. c:function:: int munmap( void *start, size_t length )
- :name: v4l2-munmap
Arguments
=========
``start``
Address of the mapped buffer as returned by the
- :ref:`mmap() <func-mmap>` function.
+ :c:func:`mmap()` function.
``length``
Length of the mapped buffer. This must be the same value as given to
- :ref:`mmap() <func-mmap>` and returned by the driver in the struct
+ :c:func:`mmap()` and returned by the driver in the struct
:c:type:`v4l2_buffer` ``length`` field for the
single-planar API and in the struct
:c:type:`v4l2_plane` ``length`` field for the
multi-planar API.
-
Description
===========
-Unmaps a previously with the :ref:`mmap() <func-mmap>` function mapped
+Unmaps a previously with the :c:func:`mmap()` function mapped
buffer and frees it, if possible.
-
Return Value
============
-On success :ref:`munmap() <func-munmap>` returns 0, on failure -1 and the
+On success :c:func:`munmap()` returns 0, on failure -1 and the
``errno`` variable is set appropriately:
EINVAL
diff --git a/Documentation/userspace-api/media/v4l/func-open.rst b/Documentation/userspace-api/media/v4l/func-open.rst
index f3890d284918..ba23ff1e45dd 100644
--- a/Documentation/userspace-api/media/v4l/func-open.rst
+++ b/Documentation/userspace-api/media/v4l/func-open.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _func-open:
@@ -11,7 +12,6 @@ Name
v4l2-open - Open a V4L2 device
-
Synopsis
========
@@ -19,9 +19,7 @@ Synopsis
#include <fcntl.h>
-
.. c:function:: int open( const char *device_name, int flags )
- :name: v4l2-open
Arguments
=========
@@ -34,7 +32,7 @@ Arguments
technicality, input devices still support only reading and output
devices only writing.
- When the ``O_NONBLOCK`` flag is given, the :ref:`read() <func-read>`
+ When the ``O_NONBLOCK`` flag is given, the :c:func:`read()`
function and the :ref:`VIDIOC_DQBUF <VIDIOC_QBUF>` ioctl will
return the ``EAGAIN`` error code when no data is available or no
buffer is in the driver outgoing queue, otherwise these functions
@@ -43,22 +41,20 @@ Arguments
Other flags have no effect.
-
Description
===========
-To open a V4L2 device applications call :ref:`open() <func-open>` with the
+To open a V4L2 device applications call :c:func:`open()` with the
desired device name. This function has no side effects; all data format
parameters, current input or output, control values or other properties
-remain unchanged. At the first :ref:`open() <func-open>` call after loading the
+remain unchanged. At the first :c:func:`open()` call after loading the
driver they will be reset to default values, drivers are never in an
undefined state.
-
Return Value
============
-On success :ref:`open() <func-open>` returns the new file descriptor. On error
+On success :c:func:`open()` returns the new file descriptor. On error
-1 is returned, and the ``errno`` variable is set appropriately.
Possible error codes are:
diff --git a/Documentation/userspace-api/media/v4l/func-poll.rst b/Documentation/userspace-api/media/v4l/func-poll.rst
index 95cf9c6fedcd..cbf4a0a10ae2 100644
--- a/Documentation/userspace-api/media/v4l/func-poll.rst
+++ b/Documentation/userspace-api/media/v4l/func-poll.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _func-poll:
@@ -11,7 +12,6 @@ Name
v4l2-poll - Wait for some event on a file descriptor
-
Synopsis
========
@@ -19,19 +19,16 @@ Synopsis
#include <sys/poll.h>
-
.. c:function:: int poll( struct pollfd *ufds, unsigned int nfds, int timeout )
- :name: v4l2-poll
Arguments
=========
-
Description
===========
-With the :ref:`poll() <func-poll>` function applications can suspend execution
+With the :c:func:`poll()` function applications can suspend execution
until the driver has captured data or is ready to accept data for
output.
@@ -44,57 +41,56 @@ display. When buffers are already in the outgoing queue of the driver
(capture) or the incoming queue isn't full (display) the function
returns immediately.
-On success :ref:`poll() <func-poll>` returns the number of file descriptors
+On success :c:func:`poll()` returns the number of file descriptors
that have been selected (that is, file descriptors for which the
-``revents`` field of the respective :c:func:`struct pollfd` structure
+``revents`` field of the respective ``struct pollfd`` structure
is non-zero). Capture devices set the ``POLLIN`` and ``POLLRDNORM``
flags in the ``revents`` field, output devices the ``POLLOUT`` and
``POLLWRNORM`` flags. When the function timed out it returns a value of
zero, on failure it returns -1 and the ``errno`` variable is set
appropriately. When the application did not call
-:ref:`VIDIOC_STREAMON <VIDIOC_STREAMON>` the :ref:`poll() <func-poll>`
+:ref:`VIDIOC_STREAMON <VIDIOC_STREAMON>` the :c:func:`poll()`
function succeeds, but sets the ``POLLERR`` flag in the ``revents``
field. When the application has called
:ref:`VIDIOC_STREAMON <VIDIOC_STREAMON>` for a capture device but
hasn't yet called :ref:`VIDIOC_QBUF <VIDIOC_QBUF>`, the
-:ref:`poll() <func-poll>` function succeeds and sets the ``POLLERR`` flag in
+:c:func:`poll()` function succeeds and sets the ``POLLERR`` flag in
the ``revents`` field. For output devices this same situation will cause
-:ref:`poll() <func-poll>` to succeed as well, but it sets the ``POLLOUT`` and
+:c:func:`poll()` to succeed as well, but it sets the ``POLLOUT`` and
``POLLWRNORM`` flags in the ``revents`` field.
If an event occurred (see :ref:`VIDIOC_DQEVENT`)
then ``POLLPRI`` will be set in the ``revents`` field and
-:ref:`poll() <func-poll>` will return.
+:c:func:`poll()` will return.
-When use of the :ref:`read() <func-read>` function has been negotiated and the
-driver does not capture yet, the :ref:`poll() <func-poll>` function starts
+When use of the :c:func:`read()` function has been negotiated and the
+driver does not capture yet, the :c:func:`poll()` function starts
capturing. When that fails it returns a ``POLLERR`` as above. Otherwise
it waits until data has been captured and can be read. When the driver
captures continuously (as opposed to, for example, still images) the
function may return immediately.
-When use of the :ref:`write() <func-write>` function has been negotiated and the
-driver does not stream yet, the :ref:`poll() <func-poll>` function starts
+When use of the :c:func:`write()` function has been negotiated and the
+driver does not stream yet, the :c:func:`poll()` function starts
streaming. When that fails it returns a ``POLLERR`` as above. Otherwise
it waits until the driver is ready for a non-blocking
-:ref:`write() <func-write>` call.
+:c:func:`write()` call.
If the caller is only interested in events (just ``POLLPRI`` is set in
-the ``events`` field), then :ref:`poll() <func-poll>` will *not* start
+the ``events`` field), then :c:func:`poll()` will *not* start
streaming if the driver does not stream yet. This makes it possible to
just poll for events and not for buffers.
-All drivers implementing the :ref:`read() <func-read>` or :ref:`write() <func-write>`
-function or streaming I/O must also support the :ref:`poll() <func-poll>`
+All drivers implementing the :c:func:`read()` or :c:func:`write()`
+function or streaming I/O must also support the :c:func:`poll()`
function.
-For more details see the :ref:`poll() <func-poll>` manual page.
-
+For more details see the :c:func:`poll()` manual page.
Return Value
============
-On success, :ref:`poll() <func-poll>` returns the number structures which have
+On success, :c:func:`poll()` returns the number structures which have
non-zero ``revents`` fields, or zero if the call timed out. On error -1
is returned, and the ``errno`` variable is set appropriately:
diff --git a/Documentation/userspace-api/media/v4l/func-read.rst b/Documentation/userspace-api/media/v4l/func-read.rst
index 56b255c595e1..e6f6ac4bed77 100644
--- a/Documentation/userspace-api/media/v4l/func-read.rst
+++ b/Documentation/userspace-api/media/v4l/func-read.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _func-read:
@@ -11,7 +12,6 @@ Name
v4l2-read - Read from a V4L2 device
-
Synopsis
========
@@ -19,15 +19,13 @@ Synopsis
#include <unistd.h>
-
.. c:function:: ssize_t read( int fd, void *buf, size_t count )
- :name: v4l2-read
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``buf``
Buffer to be filled
@@ -38,48 +36,48 @@ Arguments
Description
===========
-:ref:`read() <func-read>` attempts to read up to ``count`` bytes from file
+:c:func:`read()` attempts to read up to ``count`` bytes from file
descriptor ``fd`` into the buffer starting at ``buf``. The layout of the
data in the buffer is discussed in the respective device interface
-section, see ##. If ``count`` is zero, :ref:`read() <func-read>` returns zero
+section, see ##. If ``count`` is zero, :c:func:`read()` returns zero
and has no other results. If ``count`` is greater than ``SSIZE_MAX``,
the result is unspecified. Regardless of the ``count`` value each
-:ref:`read() <func-read>` call will provide at most one frame (two fields)
+:c:func:`read()` call will provide at most one frame (two fields)
worth of data.
-By default :ref:`read() <func-read>` blocks until data becomes available. When
-the ``O_NONBLOCK`` flag was given to the :ref:`open() <func-open>`
+By default :c:func:`read()` blocks until data becomes available. When
+the ``O_NONBLOCK`` flag was given to the :c:func:`open()`
function it returns immediately with an ``EAGAIN`` error code when no data
-is available. The :ref:`select() <func-select>` or
-:ref:`poll() <func-poll>` functions can always be used to suspend
+is available. The :c:func:`select()` or
+:c:func:`poll()` functions can always be used to suspend
execution until data becomes available. All drivers supporting the
-:ref:`read() <func-read>` function must also support :ref:`select() <func-select>` and
-:ref:`poll() <func-poll>`.
+:c:func:`read()` function must also support :c:func:`select()` and
+:c:func:`poll()`.
Drivers can implement read functionality in different ways, using a
single or multiple buffers and discarding the oldest or newest frames
once the internal buffers are filled.
-:ref:`read() <func-read>` never returns a "snapshot" of a buffer being filled.
+:c:func:`read()` never returns a "snapshot" of a buffer being filled.
Using a single buffer the driver will stop capturing when the
application starts reading the buffer until the read is finished. Thus
only the period of the vertical blanking interval is available for
reading, or the capture rate must fall below the nominal frame rate of
the video standard.
-The behavior of :ref:`read() <func-read>` when called during the active picture
+The behavior of :c:func:`read()` when called during the active picture
period or the vertical blanking separating the top and bottom field
depends on the discarding policy. A driver discarding the oldest frames
keeps capturing into an internal buffer, continuously overwriting the
previously, not read frame, and returns the frame being received at the
-time of the :ref:`read() <func-read>` call as soon as it is complete.
+time of the :c:func:`read()` call as soon as it is complete.
A driver discarding the newest frames stops capturing until the next
-:ref:`read() <func-read>` call. The frame being received at :ref:`read() <func-read>`
+:c:func:`read()` call. The frame being received at :c:func:`read()`
time is discarded, returning the following frame instead. Again this
implies a reduction of the capture rate to one half or less of the
nominal frame rate. An example of this model is the video read mode of
-the bttv driver, initiating a DMA to user memory when :ref:`read() <func-read>`
+the bttv driver, initiating a DMA to user memory when :c:func:`read()`
is called and returning when the DMA finished.
In the multiple buffer model drivers maintain a ring of internal
@@ -94,14 +92,13 @@ the driver with the :ref:`VIDIOC_G_PARM <VIDIOC_G_PARM>` and
however. The discarding policy is not reported and cannot be changed.
For minimum requirements see :ref:`devices`.
-
Return Value
============
On success, the number of bytes read is returned. It is not an error if
this number is smaller than the number of bytes requested, or the amount
of data required for one frame. This may happen for example because
-:ref:`read() <func-read>` was interrupted by a signal. On error, -1 is
+:c:func:`read()` was interrupted by a signal. On error, -1 is
returned, and the ``errno`` variable is set appropriately. In this case
the next read will start at the beginning of a new frame. Possible error
codes are:
@@ -129,5 +126,5 @@ EIO
communicate with a remote device (USB camera etc.).
EINVAL
- The :ref:`read() <func-read>` function is not supported by this driver, not
+ The :c:func:`read()` function is not supported by this driver, not
on this device, or generally not on this type of device.
diff --git a/Documentation/userspace-api/media/v4l/func-select.rst b/Documentation/userspace-api/media/v4l/func-select.rst
index 6715d5efcc27..ba1879c728f0 100644
--- a/Documentation/userspace-api/media/v4l/func-select.rst
+++ b/Documentation/userspace-api/media/v4l/func-select.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _func-select:
@@ -11,7 +12,6 @@ Name
v4l2-select - Synchronous I/O multiplexing
-
Synopsis
========
@@ -21,9 +21,7 @@ Synopsis
#include <sys/types.h>
#include <unistd.h>
-
.. c:function:: int select( int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout )
- :name: v4l2-select
Arguments
=========
@@ -43,11 +41,10 @@ Arguments
``timeout``
Maximum time to wait.
-
Description
===========
-With the :ref:`select() <func-select>` function applications can suspend
+With the :c:func:`select()` function applications can suspend
execution until the driver has captured data or is ready to accept data
for output.
@@ -56,40 +53,39 @@ buffer has been filled or displayed and can be dequeued with the
:ref:`VIDIOC_DQBUF <VIDIOC_QBUF>` ioctl. When buffers are already in
the outgoing queue of the driver the function returns immediately.
-On success :ref:`select() <func-select>` returns the total number of bits set in
-:c:func:`struct fd_set`. When the function timed out it returns
+On success :c:func:`select()` returns the total number of bits set in
+``fd_set``. When the function timed out it returns
a value of zero. On failure it returns -1 and the ``errno`` variable is
set appropriately. When the application did not call
:ref:`VIDIOC_QBUF` or
-:ref:`VIDIOC_STREAMON` yet the :ref:`select() <func-select>`
+:ref:`VIDIOC_STREAMON` yet the :c:func:`select()`
function succeeds, setting the bit of the file descriptor in ``readfds``
or ``writefds``, but subsequent :ref:`VIDIOC_DQBUF <VIDIOC_QBUF>`
calls will fail. [#f1]_
-When use of the :ref:`read() <func-read>` function has been negotiated and the
-driver does not capture yet, the :ref:`select() <func-select>` function starts
-capturing. When that fails, :ref:`select() <func-select>` returns successful and
-a subsequent :ref:`read() <func-read>` call, which also attempts to start
+When use of the :c:func:`read()` function has been negotiated and the
+driver does not capture yet, the :c:func:`select()` function starts
+capturing. When that fails, :c:func:`select()` returns successful and
+a subsequent :c:func:`read()` call, which also attempts to start
capturing, will return an appropriate error code. When the driver
captures continuously (as opposed to, for example, still images) and
-data is already available the :ref:`select() <func-select>` function returns
+data is already available the :c:func:`select()` function returns
immediately.
-When use of the :ref:`write() <func-write>` function has been negotiated the
-:ref:`select() <func-select>` function just waits until the driver is ready for a
-non-blocking :ref:`write() <func-write>` call.
+When use of the :c:func:`write()` function has been negotiated the
+:c:func:`select()` function just waits until the driver is ready for a
+non-blocking :c:func:`write()` call.
-All drivers implementing the :ref:`read() <func-read>` or :ref:`write() <func-write>`
-function or streaming I/O must also support the :ref:`select() <func-select>`
+All drivers implementing the :c:func:`read()` or :c:func:`write()`
+function or streaming I/O must also support the :c:func:`select()`
function.
-For more details see the :ref:`select() <func-select>` manual page.
-
+For more details see the :c:func:`select()` manual page.
Return Value
============
-On success, :ref:`select() <func-select>` returns the number of descriptors
+On success, :c:func:`select()` returns the number of descriptors
contained in the three returned descriptor sets, which will be zero if
the timeout expired. On error -1 is returned, and the ``errno`` variable
is set appropriately; the sets and ``timeout`` are undefined. Possible
@@ -115,6 +111,6 @@ EINVAL
``FD_SETSIZE``.
.. [#f1]
- The Linux kernel implements :ref:`select() <func-select>` like the
- :ref:`poll() <func-poll>` function, but :ref:`select() <func-select>` cannot
+ The Linux kernel implements :c:func:`select()` like the
+ :c:func:`poll()` function, but :c:func:`select()` cannot
return a ``POLLERR``.
diff --git a/Documentation/userspace-api/media/v4l/func-write.rst b/Documentation/userspace-api/media/v4l/func-write.rst
index 37683611df04..49f5a0f4275f 100644
--- a/Documentation/userspace-api/media/v4l/func-write.rst
+++ b/Documentation/userspace-api/media/v4l/func-write.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _func-write:
@@ -11,7 +12,6 @@ Name
v4l2-write - Write to a V4L2 device
-
Synopsis
========
@@ -19,15 +19,13 @@ Synopsis
#include <unistd.h>
-
.. c:function:: ssize_t write( int fd, void *buf, size_t count )
- :name: v4l2-write
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``buf``
Buffer with data to be written
@@ -38,10 +36,10 @@ Arguments
Description
===========
-:ref:`write() <func-write>` writes up to ``count`` bytes to the device
+:c:func:`write()` writes up to ``count`` bytes to the device
referenced by the file descriptor ``fd`` from the buffer starting at
``buf``. When the hardware outputs are not active yet, this function
-enables them. When ``count`` is zero, :ref:`write() <func-write>` returns 0
+enables them. When ``count`` is zero, :c:func:`write()` returns 0
without any other effect.
When the application does not provide more data in time, the previous
@@ -49,7 +47,6 @@ video frame, raw VBI image, sliced VPS or WSS data is displayed again.
Sliced Teletext or Closed Caption data is not repeated, the driver
inserts a blank line instead.
-
Return Value
============
@@ -80,5 +77,5 @@ EIO
I/O error. This indicates some hardware problem.
EINVAL
- The :ref:`write() <func-write>` function is not supported by this driver,
+ The :c:func:`write()` function is not supported by this driver,
not on this device, or generally not on this type of device.
diff --git a/Documentation/userspace-api/media/v4l/hist-v4l2.rst b/Documentation/userspace-api/media/v4l/hist-v4l2.rst
index 1a4fd941f163..28a2750d5c8c 100644
--- a/Documentation/userspace-api/media/v4l/hist-v4l2.rst
+++ b/Documentation/userspace-api/media/v4l/hist-v4l2.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _hist-v4l2:
@@ -14,18 +15,17 @@ not just an extension but a replacement for the V4L API. However it took
another four years and two stable kernel releases until the new API was
finally accepted for inclusion into the kernel in its present form.
-
Early Versions
==============
1998-08-20: First version.
-1998-08-27: The :ref:`select() <func-select>` function was introduced.
+1998-08-27: The :c:func:`select()` function was introduced.
1998-09-10: New video standard interface.
1998-09-18: The ``VIDIOC_NONCAP`` ioctl was replaced by the otherwise
-meaningless ``O_TRUNC`` :ref:`open() <func-open>` flag, and the
+meaningless ``O_TRUNC`` :c:func:`open()` flag, and the
aliases ``O_NONCAP`` and ``O_NOIO`` were defined. Applications can set
this flag if they intend to access controls only, as opposed to capture
applications which need exclusive access. The ``VIDEO_STD_XXX``
@@ -65,7 +65,6 @@ output devices were added.
1999-01-19: The ``VIDIOC_NEXTBUF`` ioctl was removed.
-
V4L2 Version 0.16 1999-01-31
============================
@@ -73,7 +72,6 @@ V4L2 Version 0.16 1999-01-31
are gone. VIDIOC_QBUF takes a v4l2_buffer as a parameter. Added
digital zoom (cropping) controls.
-
V4L2 Version 0.18 1999-03-16
============================
@@ -81,7 +79,6 @@ Added a v4l to V4L2 ioctl compatibility layer to videodev.c. Driver
writers, this changes how you implement your ioctl handler. See the
Driver Writer's Guide. Added some more control id codes.
-
V4L2 Version 0.19 1999-06-05
============================
@@ -107,7 +104,6 @@ malfunction of this ioctl.
1999-06-05: Changed the value of V4L2_CID_WHITENESS.
-
V4L2 Version 0.20 (1999-09-10)
==============================
@@ -128,14 +124,12 @@ common Linux driver API conventions.
VIDIOC_STREAMON, VIDIOC_STREAMOFF, VIDIOC_S_FREQ,
VIDIOC_S_INPUT, VIDIOC_S_OUTPUT, VIDIOC_S_EFFECT. For example
-
.. code-block:: c
err = ioctl (fd, VIDIOC_XXX, V4L2_XXX);
becomes
-
.. code-block:: c
int a = V4L2_XXX; err = ioctl(fd, VIDIOC_XXX, &a);
@@ -202,7 +196,6 @@ common Linux driver API conventions.
field counts captured frames, it is ignored by output devices. When a
capture driver drops a frame, the sequence number of that frame is skipped.
-
V4L2 Version 0.20 incremental changes
=====================================
@@ -290,13 +283,11 @@ A number of changes were made to the raw VBI interface.
were added. The former is an alias for the old ``V4L2_TYPE_VBI``, the
latter was missing in the ``videodev.h`` file.
-
V4L2 Version 0.20 2002-07-25
============================
Added sliced VBI interface proposal.
-
V4L2 in Linux 2.5.46, 2002-10
=============================
@@ -307,7 +298,7 @@ This unnamed version was finally merged into Linux 2.5.46.
1. As specified in :ref:`related`, drivers must make related device
functions available under all minor device numbers.
-2. The :ref:`open() <func-open>` function requires access mode
+2. The :c:func:`open()` function requires access mode
``O_RDWR`` regardless of the device type. All V4L2 drivers
exchanging data with applications must support the ``O_NONBLOCK``
flag. The ``O_NOIO`` flag, a V4L2 symbol which aliased the
@@ -435,7 +426,6 @@ This unnamed version was finally merged into Linux 2.5.46.
the buffer type names changed as follows.
-
.. flat-table::
:header-rows: 1
:stub-columns: 0
@@ -597,7 +587,6 @@ This unnamed version was finally merged into Linux 2.5.46.
V4L2 documentation was inaccurate, this has been corrected in
:ref:`pixfmt`.
-
V4L2 2003-06-19
===============
@@ -648,7 +637,6 @@ V4L2 2003-06-19
Kernel 2.6.39. Drivers and applications assuming a constant parameter
need an update.
-
V4L2 2003-11-05
===============
@@ -657,7 +645,6 @@ V4L2 2003-11-05
refer to bytes in memory, in ascending address order.
-
.. flat-table::
:header-rows: 1
:stub-columns: 0
@@ -678,7 +665,6 @@ V4L2 2003-11-05
- R, G, B, X
- B, G, R, X
-
The ``V4L2_PIX_FMT_BGR24`` example was always correct.
In :ref:`v4l-image-properties` the mapping of the V4L
@@ -689,7 +675,6 @@ V4L2 2003-11-05
RGB pixel formats differently. These issues have yet to be addressed,
for details see :ref:`pixfmt-rgb`.
-
V4L2 in Linux 2.6.6, 2004-05-09
===============================
@@ -698,7 +683,6 @@ V4L2 in Linux 2.6.6, 2004-05-09
ioctl, while the read-only version was renamed to
``VIDIOC_CROPCAP_OLD``. The old ioctl was removed on Kernel 2.6.39.
-
V4L2 in Linux 2.6.8
===================
@@ -709,7 +693,6 @@ V4L2 in Linux 2.6.8
the new ``V4L2_BUF_FLAG_INPUT`` flag. The ``flags`` field is no
longer read-only.
-
V4L2 spec erratum 2004-08-01
============================
@@ -727,7 +710,6 @@ V4L2 spec erratum 2004-08-01
also missing from examples. Also on the ``VIDIOC_DQBUF`` page the ``EIO``
error code was not documented.
-
V4L2 in Linux 2.6.14
====================
@@ -735,7 +717,6 @@ V4L2 in Linux 2.6.14
:ref:`sliced` and replaces the interface first proposed in V4L2
specification 0.8.
-
V4L2 in Linux 2.6.15
====================
@@ -755,7 +736,6 @@ V4L2 in Linux 2.6.15
``VIDIOC_G_MPEGCOMP`` and ``VIDIOC_S_MPEGCOMP`` ioctls where removed
in Linux 2.6.25.)
-
V4L2 spec erratum 2005-11-27
============================
@@ -765,7 +745,6 @@ cropping is supported. In the video standard selection example in
:ref:`standard` the :ref:`VIDIOC_S_STD <VIDIOC_G_STD>` call used
the wrong argument type.
-
V4L2 spec erratum 2006-01-10
============================
@@ -778,14 +757,12 @@ V4L2 spec erratum 2006-01-10
write-only as stated on its reference page. The ioctl changed in 2003
as noted above.
-
V4L2 spec erratum 2006-02-03
============================
1. In struct v4l2_captureparm and struct v4l2_outputparm the ``timeperframe``
field gives the time in seconds, not microseconds.
-
V4L2 spec erratum 2006-02-04
============================
@@ -808,7 +785,6 @@ V4L2 in Linux 2.6.17
``V4L2_TUNER_MODE_STEREO`` for this purpose is deprecated now. See
the :ref:`VIDIOC_G_TUNER <VIDIOC_G_TUNER>` section for details.
-
V4L2 spec erratum 2006-09-23 (Draft 0.15)
=========================================
@@ -837,7 +813,6 @@ V4L2 spec erratum 2006-09-23 (Draft 0.15)
extended from 224-239 to 224-255. Accordingly device file names
``/dev/vbi0`` to ``/dev/vbi31`` are possible now.
-
V4L2 in Linux 2.6.18
====================
@@ -852,7 +827,6 @@ V4L2 in Linux 2.6.18
``V4L2_CTRL_FLAG_INACTIVE`` and ``V4L2_CTRL_FLAG_SLIDER``
(:ref:`control-flags`). See :ref:`extended-controls` for details.
-
V4L2 in Linux 2.6.19
====================
@@ -874,14 +848,12 @@ V4L2 in Linux 2.6.19
3. A new pixel format ``V4L2_PIX_FMT_RGB444`` (:ref:`pixfmt-rgb`) was
added.
-
V4L2 spec erratum 2006-10-12 (Draft 0.17)
=========================================
1. ``V4L2_PIX_FMT_HM12`` (:ref:`reserved-formats`) is a YUV 4:2:0, not
4:2:2 format.
-
V4L2 in Linux 2.6.21
====================
@@ -889,7 +861,6 @@ V4L2 in Linux 2.6.21
General Public License version two or later, and under a 3-clause
BSD-style license.
-
V4L2 in Linux 2.6.22
====================
@@ -914,7 +885,6 @@ V4L2 in Linux 2.6.22
This may **break compatibility** with existing applications. Drivers
supporting the "host order RGB32" format are not known.
-
V4L2 in Linux 2.6.24
====================
@@ -922,7 +892,6 @@ V4L2 in Linux 2.6.24
``V4L2_PIX_FMT_YUV555``, ``V4L2_PIX_FMT_YUV565`` and
``V4L2_PIX_FMT_YUV32`` were added.
-
V4L2 in Linux 2.6.25
====================
@@ -949,7 +918,6 @@ V4L2 in Linux 2.6.25
interface in Linux 2.6.18, where finally removed from the
``videodev2.h`` header file.
-
V4L2 in Linux 2.6.26
====================
@@ -959,7 +927,6 @@ V4L2 in Linux 2.6.26
2. Added user controls ``V4L2_CID_CHROMA_AGC`` and
``V4L2_CID_COLOR_KILLER``.
-
V4L2 in Linux 2.6.27
====================
@@ -971,7 +938,6 @@ V4L2 in Linux 2.6.27
``V4L2_PIX_FMT_PCA561``, ``V4L2_PIX_FMT_SGBRG8``,
``V4L2_PIX_FMT_PAC207`` and ``V4L2_PIX_FMT_PJPG`` were added.
-
V4L2 in Linux 2.6.28
====================
@@ -983,7 +949,6 @@ V4L2 in Linux 2.6.28
3. The pixel formats ``V4L2_PIX_FMT_SGRBG10`` and
``V4L2_PIX_FMT_SGRBG10DPCM8`` were added.
-
V4L2 in Linux 2.6.29
====================
@@ -999,7 +964,6 @@ V4L2 in Linux 2.6.29
``V4L2_CID_ZOOM_RELATIVE``, ``V4L2_CID_ZOOM_CONTINUOUS`` and
``V4L2_CID_PRIVACY``.
-
V4L2 in Linux 2.6.30
====================
@@ -1007,7 +971,6 @@ V4L2 in Linux 2.6.30
2. New control ``V4L2_CID_COLORFX`` was added.
-
V4L2 in Linux 2.6.32
====================
@@ -1034,21 +997,18 @@ V4L2 in Linux 2.6.32
9. Added Remote Controller chapter, describing the default Remote
Controller mapping for media devices.
-
V4L2 in Linux 2.6.33
====================
1. Added support for Digital Video timings in order to support HDTV
receivers and transmitters.
-
V4L2 in Linux 2.6.34
====================
1. Added ``V4L2_CID_IRIS_ABSOLUTE`` and ``V4L2_CID_IRIS_RELATIVE``
controls to the :ref:`Camera controls class <camera-controls>`.
-
V4L2 in Linux 2.6.37
====================
@@ -1057,7 +1017,6 @@ V4L2 in Linux 2.6.37
applications found that used it. It was originally scheduled for
removal in 2.6.35.
-
V4L2 in Linux 2.6.39
====================
@@ -1067,7 +1026,6 @@ V4L2 in Linux 2.6.39
drivers and applications. See :ref:`multi-planar API <planar-apis>`
for details.
-
V4L2 in Linux 3.1
=================
@@ -1078,7 +1036,6 @@ V4L2 in Linux 3.1
Added V4L2_CTRL_TYPE_BITMASK.
-
V4L2 in Linux 3.2
=================
@@ -1089,7 +1046,6 @@ V4L2 in Linux 3.2
Does not affect the compatibility of current drivers and
applications. See :ref:`selection API <selection-api>` for details.
-
V4L2 in Linux 3.3
=================
@@ -1099,7 +1055,6 @@ V4L2 in Linux 3.3
2. Added the device_caps field to struct v4l2_capabilities and added
the new V4L2_CAP_DEVICE_CAPS capability.
-
V4L2 in Linux 3.4
=================
@@ -1110,7 +1065,6 @@ V4L2 in Linux 3.4
:ref:`VIDIOC_QUERY_DV_TIMINGS` and
:ref:`VIDIOC_DV_TIMINGS_CAP`.
-
V4L2 in Linux 3.5
=================
@@ -1137,7 +1091,6 @@ V4L2 in Linux 3.5
``V4L2_CID_AUTO_FOCUS_START``, ``V4L2_CID_AUTO_FOCUS_STOP``,
``V4L2_CID_AUTO_FOCUS_STATUS`` and ``V4L2_CID_AUTO_FOCUS_RANGE``.
-
V4L2 in Linux 3.6
=================
@@ -1150,7 +1103,6 @@ V4L2 in Linux 3.6
3. Added support for frequency band enumerations:
:ref:`VIDIOC_ENUM_FREQ_BANDS`.
-
V4L2 in Linux 3.9
=================
@@ -1160,7 +1112,6 @@ V4L2 in Linux 3.9
2. Added ``V4L2_EVENT_CTRL_CH_RANGE`` control event changes flag. See
:ref:`ctrl-changes-flags`.
-
V4L2 in Linux 3.10
==================
@@ -1172,32 +1123,27 @@ V4L2 in Linux 3.10
2. Added new debugging ioctl
:ref:`VIDIOC_DBG_G_CHIP_INFO`.
-
V4L2 in Linux 3.11
==================
1. Remove obsolete ``VIDIOC_DBG_G_CHIP_IDENT`` ioctl.
-
V4L2 in Linux 3.14
==================
1. In struct v4l2_rect, the type of ``width`` and
``height`` fields changed from _s32 to _u32.
-
V4L2 in Linux 3.15
==================
1. Added Software Defined Radio (SDR) Interface.
-
V4L2 in Linux 3.16
==================
1. Added event V4L2_EVENT_SOURCE_CHANGE.
-
V4L2 in Linux 3.17
==================
@@ -1207,14 +1153,12 @@ V4L2 in Linux 3.17
2. Added compound control types and
:ref:`VIDIOC_QUERY_EXT_CTRL <VIDIOC_QUERYCTRL>`.
-
V4L2 in Linux 3.18
==================
1. Added ``V4L2_CID_PAN_SPEED`` and ``V4L2_CID_TILT_SPEED`` camera
controls.
-
V4L2 in Linux 3.19
==================
@@ -1232,13 +1176,11 @@ V4L2 in Linux 4.4
3. Added transmitter support for Software Defined Radio (SDR) Interface.
-
.. _other:
Relation of V4L2 to other Linux multimedia APIs
===============================================
-
.. _xvideo:
X Video Extension
@@ -1284,7 +1226,6 @@ YUV to RGB conversion and scaling for faster video playback, and added
an interface to MPEG-2 decoding hardware. This API is useful to display
images captured with V4L2 devices.
-
Digital Video
-------------
@@ -1294,13 +1235,11 @@ homepage at `https://linuxtv.org <https://linuxtv.org>`__. The Linux
DVB API has no connection to the V4L2 API except that drivers for hybrid
hardware may support both.
-
Audio Interfaces
----------------
[to do - OSS/ALSA]
-
.. _experimental:
Experimental API Elements
@@ -1314,7 +1253,6 @@ change in the future.
- :ref:`VIDIOC_DBG_G_CHIP_INFO` ioctl.
-
.. _obsolete:
Obsolete API Elements
diff --git a/Documentation/userspace-api/media/v4l/io.rst b/Documentation/userspace-api/media/v4l/io.rst
index 9dc36b41dbf6..ce0cece6f35f 100644
--- a/Documentation/userspace-api/media/v4l/io.rst
+++ b/Documentation/userspace-api/media/v4l/io.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _io:
@@ -9,8 +10,8 @@ The V4L2 API defines several different methods to read from or write to
a device. All drivers exchanging data with applications must support at
least one of them.
-The classic I/O method using the :ref:`read() <func-read>` and
-:ref:`write() <func-write>` function is automatically selected after opening a
+The classic I/O method using the :c:func:`read()` and
+:c:func:`write()` function is automatically selected after opening a
V4L2 device. When the driver does not support this method attempts to
read or write will fail at any time.
@@ -38,7 +39,6 @@ closing and reopening the device.
The following sections describe the various I/O methods in more detail.
-
.. toctree::
:maxdepth: 1
diff --git a/Documentation/userspace-api/media/v4l/libv4l-introduction.rst b/Documentation/userspace-api/media/v4l/libv4l-introduction.rst
index e03280b35570..05690f2358ce 100644
--- a/Documentation/userspace-api/media/v4l/libv4l-introduction.rst
+++ b/Documentation/userspace-api/media/v4l/libv4l-introduction.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _libv4l-introduction:
@@ -17,7 +18,6 @@ An example of using libv4l is provided by
libv4l consists of 3 different libraries:
-
libv4lconvert
=============
@@ -65,7 +65,6 @@ libv4lconvert/processing. These controls are stored application wide
libv4lconvert/processing offers the actual video processing
functionality.
-
libv4l1
=======
@@ -78,7 +77,6 @@ just pass calls through.
Since those functions are emulations of the old V4L1 API, it shouldn't
be used for new applications.
-
libv4l2
=======
@@ -105,7 +103,6 @@ available in the driver. :ref:`VIDIOC_ENUM_FMT <VIDIOC_ENUM_FMT>`
keeps enumerating the hardware supported formats, plus the emulated
formats offered by libv4l at the end.
-
.. _libv4l-ops:
Libv4l device control functions
@@ -115,17 +112,17 @@ The common file operation methods are provided by libv4l.
Those functions operate just like the gcc function ``dup()`` and
V4L2 functions
-:c:func:`open() <v4l2-open>`, :c:func:`close() <v4l2-close>`,
-:c:func:`ioctl() <v4l2-ioctl>`, :c:func:`read() <v4l2-read>`,
-:c:func:`mmap() <v4l2-mmap>` and :c:func:`munmap() <v4l2-munmap>`:
+:c:func:`open()`, :c:func:`close()`,
+:c:func:`ioctl()`, :c:func:`read()`,
+:c:func:`mmap()` and :c:func:`munmap()`:
.. c:function:: int v4l2_open(const char *file, int oflag, ...)
- operates like the :c:func:`open() <v4l2-open>` function.
+ operates like the :c:func:`open()` function.
.. c:function:: int v4l2_close(int fd)
- operates like the :c:func:`close() <v4l2-close>` function.
+ operates like the :c:func:`close()` function.
.. c:function:: int v4l2_dup(int fd)
@@ -133,19 +130,19 @@ V4L2 functions
.. c:function:: int v4l2_ioctl (int fd, unsigned long int request, ...)
- operates like the :c:func:`ioctl() <v4l2-ioctl>` function.
+ operates like the :c:func:`ioctl()` function.
.. c:function:: int v4l2_read (int fd, void* buffer, size_t n)
- operates like the :c:func:`read() <v4l2-read>` function.
+ operates like the :c:func:`read()` function.
.. c:function:: void v4l2_mmap(void *start, size_t length, int prot, int flags, int fd, int64_t offset);
- operates like the :c:func:`munmap() <v4l2-munmap>` function.
+ operates like the :c:func:`munmap()` function.
.. c:function:: int v4l2_munmap(void *_start, size_t length);
- operates like the :c:func:`munmap() <v4l2-munmap>` function.
+ operates like the :c:func:`munmap()` function.
Those functions provide additional control:
@@ -168,14 +165,13 @@ Those functions provide additional control:
of the given v4l control id. when the cid does not exist, could not be
accessed for some reason, or some error occurred 0 is returned.
-
v4l1compat.so wrapper library
=============================
This library intercepts calls to
-:c:func:`open() <v4l2-open>`, :c:func:`close() <v4l2-close>`,
-:c:func:`ioctl() <v4l2-ioctl>`, :c:func:`mmap() <v4l2-mmap>` and
-:c:func:`munmap() <v4l2-munmap>`
+:c:func:`open()`, :c:func:`close()`,
+:c:func:`ioctl()`, :c:func:`mmap()` and
+:c:func:`munmap()`
operations and redirects them to the libv4l counterparts, by using
``LD_PRELOAD=/usr/lib/v4l1compat.so``. It also emulates V4L1 calls via V4L2
API.
diff --git a/Documentation/userspace-api/media/v4l/mmap.rst b/Documentation/userspace-api/media/v4l/mmap.rst
index 1cce31c6de79..16b1e13b029f 100644
--- a/Documentation/userspace-api/media/v4l/mmap.rst
+++ b/Documentation/userspace-api/media/v4l/mmap.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _mmap:
@@ -35,22 +36,22 @@ This ioctl can also be used to change the number of buffers or to free
the allocated memory, provided none of the buffers are still mapped.
Before applications can access the buffers they must map them into their
-address space with the :ref:`mmap() <func-mmap>` function. The
+address space with the :c:func:`mmap()` function. The
location of the buffers in device memory can be determined with the
:ref:`VIDIOC_QUERYBUF` ioctl. In the single-planar
API case, the ``m.offset`` and ``length`` returned in a struct
:c:type:`v4l2_buffer` are passed as sixth and second
-parameter to the :ref:`mmap() <func-mmap>` function. When using the
+parameter to the :c:func:`mmap()` function. When using the
multi-planar API, struct :c:type:`v4l2_buffer` contains an
array of struct :c:type:`v4l2_plane` structures, each
containing its own ``m.offset`` and ``length``. When using the
multi-planar API, every plane of every buffer has to be mapped
-separately, so the number of calls to :ref:`mmap() <func-mmap>` should
+separately, so the number of calls to :c:func:`mmap()` should
be equal to number of buffers times number of planes in each buffer. The
offset and length values must not be modified. Remember, the buffers are
allocated in physical memory, as opposed to virtual memory, which can be
swapped out to disk. Applications should free the buffers as soon as
-possible with the :ref:`munmap() <func-munmap>` function.
+possible with the :c:func:`munmap()` function.
Example: Mapping buffers in the single-planar API
=================================================
@@ -122,7 +123,6 @@ Example: Mapping buffers in the single-planar API
for (i = 0; i < reqbuf.count; i++)
munmap(buffers[i].start, buffers[i].length);
-
Example: Mapping buffers in the multi-planar API
================================================
@@ -238,10 +238,10 @@ be determined at any time using the :ref:`VIDIOC_QUERYBUF` ioctl. Two
methods exist to suspend execution of the application until one or more
buffers can be dequeued. By default :ref:`VIDIOC_DQBUF <VIDIOC_QBUF>`
blocks when no buffer is in the outgoing queue. When the ``O_NONBLOCK``
-flag was given to the :ref:`open() <func-open>` function,
+flag was given to the :c:func:`open()` function,
:ref:`VIDIOC_DQBUF <VIDIOC_QBUF>` returns immediately with an ``EAGAIN``
-error code when no buffer is available. The :ref:`select() <func-select>`
-or :ref:`poll() <func-poll>` functions are always available.
+error code when no buffer is available. The :c:func:`select()`
+or :c:func:`poll()` functions are always available.
To start and stop capturing or output applications call the
:ref:`VIDIOC_STREAMON <VIDIOC_STREAMON>` and :ref:`VIDIOC_STREAMOFF
@@ -259,15 +259,15 @@ Drivers implementing memory mapping I/O must support the
<VIDIOC_QUERYBUF>`, :ref:`VIDIOC_QBUF <VIDIOC_QBUF>`, :ref:`VIDIOC_DQBUF
<VIDIOC_QBUF>`, :ref:`VIDIOC_STREAMON <VIDIOC_STREAMON>`
and :ref:`VIDIOC_STREAMOFF <VIDIOC_STREAMON>` ioctls, the :ref:`mmap()
-<func-mmap>`, :ref:`munmap() <func-munmap>`, :ref:`select()
-<func-select>` and :ref:`poll() <func-poll>` function. [#f3]_
+<func-mmap>`, :c:func:`munmap()`, :ref:`select()
+<func-select>` and :c:func:`poll()` function. [#f3]_
[capture example]
.. [#f1]
One could use one file descriptor and set the buffer type field
accordingly when calling :ref:`VIDIOC_QBUF` etc.,
- but it makes the :ref:`select() <func-select>` function ambiguous. We also
+ but it makes the :c:func:`select()` function ambiguous. We also
like the clean approach of one file descriptor per logical stream.
Video overlay for example is also a logical stream, although the CPU
is not needed for continuous operation.
@@ -280,6 +280,6 @@ and :ref:`VIDIOC_STREAMOFF <VIDIOC_STREAMON>` ioctls, the :ref:`mmap()
scatter-gather lists and the like.
.. [#f3]
- At the driver level :ref:`select() <func-select>` and :ref:`poll() <func-poll>` are
- the same, and :ref:`select() <func-select>` is too important to be optional.
+ At the driver level :c:func:`select()` and :c:func:`poll()` are
+ the same, and :c:func:`select()` is too important to be optional.
The rest should be evident.
diff --git a/Documentation/userspace-api/media/v4l/open.rst b/Documentation/userspace-api/media/v4l/open.rst
index 4e8fd216a1b0..18bfb9b8137d 100644
--- a/Documentation/userspace-api/media/v4l/open.rst
+++ b/Documentation/userspace-api/media/v4l/open.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _open:
@@ -140,7 +141,6 @@ means applications cannot *reliably* scan for loaded or installed
drivers. The user must enter a device name, or the application can try
the conventional device names.
-
.. _related:
Related Devices
@@ -157,7 +157,7 @@ support all functions. However, in practice this never worked: this
support it and if they did it was certainly never tested. In addition,
switching a device node between different functions only works when
using the streaming I/O API, not with the
-:ref:`read() <func-read>`/\ :ref:`write() <func-write>` API.
+:c:func:`read()`/\ :c:func:`write()` API.
Today each V4L2 device node supports just one function.
@@ -178,7 +178,6 @@ the Media Controller. If you want to work on this please write to the
linux-media mailing list:
`https://linuxtv.org/lists.php <https://linuxtv.org/lists.php>`__.
-
Multiple Opens
==============
@@ -192,8 +191,8 @@ device should not change the state of the device. [#f2]_
Once an application has allocated the memory buffers needed for
streaming data (by calling the :ref:`VIDIOC_REQBUFS`
or :ref:`VIDIOC_CREATE_BUFS` ioctls, or
-implicitly by calling the :ref:`read() <func-read>` or
-:ref:`write() <func-write>` functions) that application (filehandle)
+implicitly by calling the :c:func:`read()` or
+:c:func:`write()` functions) that application (filehandle)
becomes the owner of the device. It is no longer allowed to make changes
that would affect the buffer sizes (e.g. by calling the
:ref:`VIDIOC_S_FMT <VIDIOC_G_FMT>` ioctl) and other applications are
@@ -206,7 +205,6 @@ requested type of data, and to change related properties, to this file
descriptor. Applications can request additional access privileges using
the priority mechanism described in :ref:`app-pri`.
-
Shared Data Streams
===================
@@ -215,12 +213,11 @@ the same data stream on a device by copying buffers, time multiplexing
or similar means. This is better handled by a proxy application in user
space.
-
Functions
=========
To open and close V4L2 devices applications use the
-:ref:`open() <func-open>` and :ref:`close() <func-close>` function,
+:c:func:`open()` and :c:func:`close()` function,
respectively. Devices are programmed using the
:ref:`ioctl() <func-ioctl>` function as explained in the following
sections.
@@ -228,7 +225,7 @@ sections.
.. [#f1]
There are still some old and obscure drivers that have not been
updated to allow for multiple opens. This implies that for such
- drivers :ref:`open() <func-open>` can return an ``EBUSY`` error code
+ drivers :c:func:`open()` can return an ``EBUSY`` error code
when the device is already in use.
.. [#f2]
diff --git a/Documentation/userspace-api/media/v4l/rw.rst b/Documentation/userspace-api/media/v4l/rw.rst
index 43609a27c3ec..64b85fb2a328 100644
--- a/Documentation/userspace-api/media/v4l/rw.rst
+++ b/Documentation/userspace-api/media/v4l/rw.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _rw:
@@ -6,8 +7,8 @@
Read/Write
**********
-Input and output devices support the :ref:`read() <func-read>` and
-:ref:`write() <func-write>` function, respectively, when the
+Input and output devices support the :c:func:`read()` and
+:c:func:`write()` function, respectively, when the
``V4L2_CAP_READWRITE`` flag in the ``capabilities`` field of struct
:c:type:`v4l2_capability` returned by the
:ref:`VIDIOC_QUERYCAP` ioctl is set.
@@ -22,18 +23,17 @@ However this is also the simplest I/O method, requiring little or no
setup to exchange data. It permits command line stunts like this (the
vidctrl tool is fictitious):
-
.. code-block:: none
$ vidctrl /dev/video --input=0 --format=YUYV --size=352x288
$ dd if=/dev/video of=myimage.422 bs=202752 count=1
-To read from the device applications use the :ref:`read() <func-read>`
-function, to write the :ref:`write() <func-write>` function. Drivers
+To read from the device applications use the :c:func:`read()`
+function, to write the :c:func:`write()` function. Drivers
must implement one I/O method if they exchange data with applications,
but it need not be this. [#f1]_ When reading or writing is supported, the
-driver must also support the :ref:`select() <func-select>` and
-:ref:`poll() <func-poll>` function. [#f2]_
+driver must also support the :c:func:`select()` and
+:c:func:`poll()` function. [#f2]_
.. [#f1]
It would be desirable if applications could depend on drivers
@@ -43,5 +43,5 @@ driver must also support the :ref:`select() <func-select>` and
capturing still images.
.. [#f2]
- At the driver level :ref:`select() <func-select>` and :ref:`poll() <func-poll>` are
- the same, and :ref:`select() <func-select>` is too important to be optional.
+ At the driver level :c:func:`select()` and :c:func:`poll()` are
+ the same, and :c:func:`select()` is too important to be optional.
diff --git a/Documentation/userspace-api/media/v4l/streaming-par.rst b/Documentation/userspace-api/media/v4l/streaming-par.rst
index cc2e8fcecc7e..806cbfdad0f1 100644
--- a/Documentation/userspace-api/media/v4l/streaming-par.rst
+++ b/Documentation/userspace-api/media/v4l/streaming-par.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _streaming-par:
@@ -14,13 +15,13 @@ The current video standard determines a nominal number of frames per
second. If less than this number of frames is to be captured or output,
applications can request frame skipping or duplicating on the driver
side. This is especially useful when using the
-:ref:`read() <func-read>` or :ref:`write() <func-write>`, which are
+:c:func:`read()` or :c:func:`write()`, which are
not augmented by timestamps or sequence counters, and to avoid
unnecessary data copying.
Finally these ioctls can be used to determine the number of buffers used
internally by a driver in read/write mode. For implications see the
-section discussing the :ref:`read() <func-read>` function.
+section discussing the :c:func:`read()` function.
To get and set the streaming parameters applications call the
:ref:`VIDIOC_G_PARM <VIDIOC_G_PARM>` and
diff --git a/Documentation/userspace-api/media/v4l/userp.rst b/Documentation/userspace-api/media/v4l/userp.rst
index 5b7321907dab..db224f9b611e 100644
--- a/Documentation/userspace-api/media/v4l/userp.rst
+++ b/Documentation/userspace-api/media/v4l/userp.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _userp:
@@ -78,10 +79,10 @@ buffers it must wait until an empty buffer can be dequeued and reused.
Two methods exist to suspend execution of the application until one or
more buffers can be dequeued. By default :ref:`VIDIOC_DQBUF
<VIDIOC_QBUF>` blocks when no buffer is in the outgoing queue. When the
-``O_NONBLOCK`` flag was given to the :ref:`open() <func-open>` function,
+``O_NONBLOCK`` flag was given to the :c:func:`open()` function,
:ref:`VIDIOC_DQBUF <VIDIOC_QBUF>` returns immediately with an ``EAGAIN``
error code when no buffer is available. The :ref:`select()
-<func-select>` or :ref:`poll() <func-poll>` function are always
+<func-select>` or :c:func:`poll()` function are always
available.
To start and stop capturing or output applications call the
@@ -101,7 +102,7 @@ Drivers implementing user pointer I/O must support the
:ref:`VIDIOC_REQBUFS <VIDIOC_REQBUFS>`, :ref:`VIDIOC_QBUF <VIDIOC_QBUF>`,
:ref:`VIDIOC_DQBUF <VIDIOC_QBUF>`, :ref:`VIDIOC_STREAMON <VIDIOC_STREAMON>`
and :ref:`VIDIOC_STREAMOFF <VIDIOC_STREAMON>` ioctls, the
-:ref:`select() <func-select>` and :ref:`poll() <func-poll>` function. [#f2]_
+:c:func:`select()` and :c:func:`poll()` function. [#f2]_
.. [#f1]
We expect that frequently used buffers are typically not swapped out.
@@ -116,6 +117,6 @@ and :ref:`VIDIOC_STREAMOFF <VIDIOC_STREAMON>` ioctls, the
because an application may share them with other processes.
.. [#f2]
- At the driver level :ref:`select() <func-select>` and :ref:`poll() <func-poll>` are
- the same, and :ref:`select() <func-select>` is too important to be optional.
+ At the driver level :c:func:`select()` and :c:func:`poll()` are
+ the same, and :c:func:`select()` is too important to be optional.
The rest should be evident.
diff --git a/Documentation/userspace-api/media/v4l/vidioc-create-bufs.rst b/Documentation/userspace-api/media/v4l/vidioc-create-bufs.rst
index d999028f47df..b06e5b528e11 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-create-bufs.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-create-bufs.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_CREATE_BUFS:
@@ -11,24 +12,22 @@ Name
VIDIOC_CREATE_BUFS - Create buffers for Memory Mapped or User Pointer or DMA Buffer I/O
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_CREATE_BUFS, struct v4l2_create_buffers *argp )
- :name: VIDIOC_CREATE_BUFS
+.. c:macro:: VIDIOC_CREATE_BUFS
+``int ioctl(int fd, VIDIOC_CREATE_BUFS, struct v4l2_create_buffers *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_create_buffers`.
-
Description
===========
@@ -71,7 +70,6 @@ the actual number allocated and the starting index in the ``count`` and
the ``index`` fields respectively. On return ``count`` can be smaller
than the number requested.
-
.. c:type:: v4l2_create_buffers
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
@@ -119,7 +117,6 @@ than the number requested.
- A place holder for future extensions. Drivers and applications
must set the array to zero.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-cropcap.rst b/Documentation/userspace-api/media/v4l/vidioc-cropcap.rst
index aa02c312824e..00c31410d4e4 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-cropcap.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-cropcap.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_CROPCAP:
@@ -11,24 +12,22 @@ Name
VIDIOC_CROPCAP - Information about the video cropping and scaling abilities
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_CROPCAP, struct v4l2_cropcap *argp )
- :name: VIDIOC_CROPCAP
+.. c:macro:: VIDIOC_CROPCAP
+``int ioctl(int fd, VIDIOC_CROPCAP, struct v4l2_cropcap *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_cropcap`.
-
Description
===========
@@ -95,7 +94,6 @@ overlay devices.
Starting with kernel 4.13 both variations are allowed.
-
.. _v4l2-rect-crop:
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
@@ -120,7 +118,6 @@ overlay devices.
- ``height``
- Height of the rectangle, in pixels.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-dbg-g-chip-info.rst b/Documentation/userspace-api/media/v4l/vidioc-dbg-g-chip-info.rst
index a2541329b30a..bde6e952b267 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-dbg-g-chip-info.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-dbg-g-chip-info.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_DBG_G_CHIP_INFO:
@@ -11,24 +12,22 @@ Name
VIDIOC_DBG_G_CHIP_INFO - Identify the chips on a TV card
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_DBG_G_CHIP_INFO, struct v4l2_dbg_chip_info *argp )
- :name: VIDIOC_DBG_G_CHIP_INFO
+.. c:macro:: VIDIOC_DBG_G_CHIP_INFO
+``int ioctl(int fd, VIDIOC_DBG_G_CHIP_INFO, struct v4l2_dbg_chip_info *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_dbg_chip_info`.
-
Description
===========
@@ -76,7 +75,6 @@ is available from the LinuxTV v4l-dvb repository; see
`https://linuxtv.org/repo/ <https://linuxtv.org/repo/>`__ for access
instructions.
-
.. tabularcolumns:: |p{3.5cm}|p{3.5cm}|p{3.5cm}|p{7.0cm}|
.. _name-v4l2-dbg-match:
@@ -103,7 +101,6 @@ instructions.
-
-
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
.. c:type:: v4l2_dbg_chip_info
@@ -130,7 +127,6 @@ instructions.
- Reserved fields, both application and driver must set these to 0.
-
.. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
.. _name-chip-match-types:
@@ -148,7 +144,6 @@ instructions.
- 4
- Match the nth sub-device.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-dbg-g-register.rst b/Documentation/userspace-api/media/v4l/vidioc-dbg-g-register.rst
index 350a04eb0e86..e1a6abe705bd 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-dbg-g-register.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-dbg-g-register.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_DBG_G_REGISTER:
@@ -11,27 +12,26 @@ Name
VIDIOC_DBG_G_REGISTER - VIDIOC_DBG_S_REGISTER - Read or write hardware registers
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_DBG_G_REGISTER, struct v4l2_dbg_register *argp )
- :name: VIDIOC_DBG_G_REGISTER
+.. c:macro:: VIDIOC_DBG_G_REGISTER
+
+``int ioctl(int fd, VIDIOC_DBG_G_REGISTER, struct v4l2_dbg_register *argp)``
-.. c:function:: int ioctl( int fd, VIDIOC_DBG_S_REGISTER, const struct v4l2_dbg_register *argp )
- :name: VIDIOC_DBG_S_REGISTER
+.. c:macro:: VIDIOC_DBG_S_REGISTER
+``int ioctl(int fd, VIDIOC_DBG_S_REGISTER, const struct v4l2_dbg_register *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_dbg_register`.
-
Description
===========
@@ -85,7 +85,6 @@ It is available from the LinuxTV v4l-dvb repository; see
`https://linuxtv.org/repo/ <https://linuxtv.org/repo/>`__ for access
instructions.
-
.. tabularcolumns:: |p{3.5cm}|p{3.5cm}|p{3.5cm}|p{7.0cm}|
.. c:type:: v4l2_dbg_match
@@ -112,7 +111,6 @@ instructions.
-
-
.. c:type:: v4l2_dbg_register
.. flat-table:: struct v4l2_dbg_register
@@ -133,7 +131,6 @@ instructions.
- The value read from, or to be written into the register.
-
.. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
.. _chip-match-types:
@@ -151,7 +148,6 @@ instructions.
- 4
- Match the nth sub-device.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-decoder-cmd.rst b/Documentation/userspace-api/media/v4l/vidioc-decoder-cmd.rst
index 0ef757f061e3..fd71ceece037 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-decoder-cmd.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-decoder-cmd.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_DECODER_CMD:
@@ -11,28 +12,26 @@ Name
VIDIOC_DECODER_CMD - VIDIOC_TRY_DECODER_CMD - Execute an decoder command
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_DECODER_CMD, struct v4l2_decoder_cmd *argp )
- :name: VIDIOC_DECODER_CMD
+.. c:macro:: VIDIOC_DECODER_CMD
+``int ioctl(int fd, VIDIOC_DECODER_CMD, struct v4l2_decoder_cmd *argp)``
-.. c:function:: int ioctl( int fd, VIDIOC_TRY_DECODER_CMD, struct v4l2_decoder_cmd *argp )
- :name: VIDIOC_TRY_DECODER_CMD
+.. c:macro:: VIDIOC_TRY_DECODER_CMD
+``int ioctl(int fd, VIDIOC_TRY_DECODER_CMD, struct v4l2_decoder_cmd *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
pointer to struct :c:type:`v4l2_decoder_cmd`.
-
Description
===========
@@ -47,11 +46,11 @@ this structure.
The ``cmd`` field must contain the command code. Some commands use the
``flags`` field for additional information.
-A :ref:`write() <func-write>` or :ref:`VIDIOC_STREAMON`
+A :c:func:`write()` or :ref:`VIDIOC_STREAMON`
call sends an implicit START command to the decoder if it has not been
started yet. Applies to both queues of mem2mem decoders.
-A :ref:`close() <func-close>` or :ref:`VIDIOC_STREAMOFF <VIDIOC_STREAMON>`
+A :c:func:`close()` or :ref:`VIDIOC_STREAMOFF <VIDIOC_STREAMON>`
call of a streaming file descriptor sends an implicit immediate STOP
command to the decoder, and all buffered data is discarded. Applies to both
queues of mem2mem decoders.
@@ -60,7 +59,6 @@ In principle, these ioctls are optional, not all drivers may support them. They
introduced in Linux 3.3. They are, however, mandatory for stateful mem2mem decoders
(as further documented in :ref:`decoder`).
-
.. tabularcolumns:: |p{1.1cm}|p{2.4cm}|p{1.2cm}|p{1.6cm}|p{10.6cm}|
.. c:type:: v4l2_decoder_cmd
@@ -131,7 +129,6 @@ introduced in Linux 3.3. They are, however, mandatory for stateful mem2mem decod
-
-
.. tabularcolumns:: |p{5.6cm}|p{0.6cm}|p{11.3cm}|
.. _decoder-cmds:
diff --git a/Documentation/userspace-api/media/v4l/vidioc-dqevent.rst b/Documentation/userspace-api/media/v4l/vidioc-dqevent.rst
index f0dfc8cf9b14..634af717c8ba 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-dqevent.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-dqevent.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_DQEVENT:
@@ -11,24 +12,22 @@ Name
VIDIOC_DQEVENT - Dequeue event
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_DQEVENT, struct v4l2_event *argp )
- :name: VIDIOC_DQEVENT
+.. c:macro:: VIDIOC_DQEVENT
+``int ioctl(int fd, VIDIOC_DQEVENT, struct v4l2_event *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_event`.
-
Description
===========
@@ -38,7 +37,6 @@ structure are filled by the driver. The file handle will also receive
exceptions which the application may get by e.g. using the select system
call.
-
.. tabularcolumns:: |p{3.0cm}|p{4.4cm}|p{2.4cm}|p{7.7cm}|
.. c:type:: v4l2_event
@@ -100,7 +98,6 @@ call.
zero.
-
.. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
.. cssclass:: longtable
@@ -191,7 +188,6 @@ call.
- Base event number for driver-private events.
-
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
.. c:type:: v4l2_event_vsync
@@ -206,7 +202,6 @@ call.
- The upcoming field. See enum :c:type:`v4l2_field`.
-
.. tabularcolumns:: |p{3.5cm}|p{3.0cm}|p{1.8cm}|p{8.5cm}|
.. c:type:: v4l2_event_ctrl
@@ -257,7 +252,6 @@ call.
:ref:`v4l2_queryctrl <v4l2-queryctrl>`.
-
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
.. c:type:: v4l2_event_frame_sync
@@ -272,7 +266,6 @@ call.
- The sequence number of the frame being received.
-
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
.. c:type:: v4l2_event_src_change
@@ -288,7 +281,6 @@ call.
:ref:`src-changes-flags`.
-
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
.. c:type:: v4l2_event_motion_det
@@ -318,7 +310,6 @@ call.
automatically assigned to the default region 0.
-
.. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
.. _ctrl-changes-flags:
@@ -344,7 +335,6 @@ call.
step or the default value of the control changed.
-
.. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
.. _src-changes-flags:
@@ -375,7 +365,6 @@ call.
loss of signal and so restarting streaming I/O is required in order for
the hardware to synchronize to the video signal.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-dv-timings-cap.rst b/Documentation/userspace-api/media/v4l/vidioc-dv-timings-cap.rst
index 82bb732893be..27bd6a83e42c 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-dv-timings-cap.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-dv-timings-cap.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_DV_TIMINGS_CAP:
@@ -11,27 +12,26 @@ Name
VIDIOC_DV_TIMINGS_CAP - VIDIOC_SUBDEV_DV_TIMINGS_CAP - The capabilities of the Digital Video receiver/transmitter
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_DV_TIMINGS_CAP, struct v4l2_dv_timings_cap *argp )
- :name: VIDIOC_DV_TIMINGS_CAP
+.. c:macro:: VIDIOC_DV_TIMINGS_CAP
+
+``int ioctl(int fd, VIDIOC_DV_TIMINGS_CAP, struct v4l2_dv_timings_cap *argp)``
-.. c:function:: int ioctl( int fd, VIDIOC_SUBDEV_DV_TIMINGS_CAP, struct v4l2_dv_timings_cap *argp )
- :name: VIDIOC_SUBDEV_DV_TIMINGS_CAP
+.. c:macro:: VIDIOC_SUBDEV_DV_TIMINGS_CAP
+``int ioctl(int fd, VIDIOC_SUBDEV_DV_TIMINGS_CAP, struct v4l2_dv_timings_cap *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_dv_timings_cap`.
-
Description
===========
@@ -55,7 +55,6 @@ the desired pad number in the struct
zero the ``reserved`` array. Attempts to query capabilities on a pad
that doesn't support them will return an ``EINVAL`` error code.
-
.. tabularcolumns:: |p{1.2cm}|p{3.0cm}|p{13.3cm}|
.. c:type:: v4l2_bt_timings_cap
@@ -97,7 +96,6 @@ that doesn't support them will return an ``EINVAL`` error code.
Drivers must set the array to zero.
-
.. tabularcolumns:: |p{1.0cm}|p{4.0cm}|p{3.5cm}|p{9.2cm}|
.. c:type:: v4l2_dv_timings_cap
@@ -153,7 +151,6 @@ that doesn't support them will return an ``EINVAL`` error code.
- Can support non-standard timings, i.e. timings not belonging to
the standards set in the ``standards`` field.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-encoder-cmd.rst b/Documentation/userspace-api/media/v4l/vidioc-encoder-cmd.rst
index 44aad55d9459..5673606711b4 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-encoder-cmd.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-encoder-cmd.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_ENCODER_CMD:
@@ -11,22 +12,22 @@ Name
VIDIOC_ENCODER_CMD - VIDIOC_TRY_ENCODER_CMD - Execute an encoder command
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_ENCODER_CMD, struct v4l2_encoder_cmd *argp )
- :name: VIDIOC_ENCODER_CMD
+.. c:macro:: VIDIOC_ENCODER_CMD
+
+``int ioctl(int fd, VIDIOC_ENCODER_CMD, struct v4l2_encoder_cmd *argp)``
-.. c:function:: int ioctl( int fd, VIDIOC_TRY_ENCODER_CMD, struct v4l2_encoder_cmd *argp )
- :name: VIDIOC_TRY_ENCODER_CMD
+.. c:macro:: VIDIOC_TRY_ENCODER_CMD
+``int ioctl(int fd, VIDIOC_TRY_ENCODER_CMD, struct v4l2_encoder_cmd *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_encoder_cmd`.
@@ -47,16 +48,16 @@ this structure.
The ``cmd`` field must contain the command code. Some commands use the
``flags`` field for additional information.
-After a STOP command, :ref:`read() <func-read>` calls will read
+After a STOP command, :c:func:`read()` calls will read
the remaining data buffered by the driver. When the buffer is empty,
-:ref:`read() <func-read>` will return zero and the next :ref:`read() <func-read>`
+:c:func:`read()` will return zero and the next :c:func:`read()`
call will restart the encoder.
-A :ref:`read() <func-read>` or :ref:`VIDIOC_STREAMON <VIDIOC_STREAMON>`
+A :c:func:`read()` or :ref:`VIDIOC_STREAMON <VIDIOC_STREAMON>`
call sends an implicit START command to the encoder if it has not been
started yet. Applies to both queues of mem2mem encoders.
-A :ref:`close() <func-close>` or :ref:`VIDIOC_STREAMOFF <VIDIOC_STREAMON>`
+A :c:func:`close()` or :ref:`VIDIOC_STREAMOFF <VIDIOC_STREAMON>`
call of a streaming file descriptor sends an implicit immediate STOP to
the encoder, and all buffered data is discarded. Applies to both queues of
mem2mem encoders.
@@ -65,7 +66,6 @@ These ioctls are optional, not all drivers may support them. They were
introduced in Linux 2.6.21. They are, however, mandatory for stateful mem2mem
encoders (as further documented in :ref:`encoder`).
-
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
.. c:type:: v4l2_encoder_cmd
@@ -89,7 +89,6 @@ encoders (as further documented in :ref:`encoder`).
the array to zero.
-
.. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
.. _encoder-cmds:
@@ -134,7 +133,6 @@ encoders (as further documented in :ref:`encoder`).
the encoder is already running, this command does nothing. No
flags are defined for this command.
-
.. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
.. _encoder-flags:
@@ -151,7 +149,6 @@ encoders (as further documented in :ref:`encoder`).
Does not apply to :ref:`encoder`.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-enum-dv-timings.rst b/Documentation/userspace-api/media/v4l/vidioc-enum-dv-timings.rst
index bb74096cfbd8..20730cd4f6ef 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-enum-dv-timings.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-enum-dv-timings.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_ENUM_DV_TIMINGS:
@@ -11,27 +12,26 @@ Name
VIDIOC_ENUM_DV_TIMINGS - VIDIOC_SUBDEV_ENUM_DV_TIMINGS - Enumerate supported Digital Video timings
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_ENUM_DV_TIMINGS, struct v4l2_enum_dv_timings *argp )
- :name: VIDIOC_ENUM_DV_TIMINGS
+.. c:macro:: VIDIOC_ENUM_DV_TIMINGS
+
+``int ioctl(int fd, VIDIOC_ENUM_DV_TIMINGS, struct v4l2_enum_dv_timings *argp)``
-.. c:function:: int ioctl( int fd, VIDIOC_SUBDEV_ENUM_DV_TIMINGS, struct v4l2_enum_dv_timings *argp )
- :name: VIDIOC_SUBDEV_ENUM_DV_TIMINGS
+.. c:macro:: VIDIOC_SUBDEV_ENUM_DV_TIMINGS
+``int ioctl(int fd, VIDIOC_SUBDEV_ENUM_DV_TIMINGS, struct v4l2_enum_dv_timings *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_enum_dv_timings`.
-
Description
===========
@@ -65,7 +65,6 @@ pad number in the struct
Attempts to enumerate timings on a pad that doesn't support them will
return an ``EINVAL`` error code.
-
.. c:type:: v4l2_enum_dv_timings
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
@@ -91,7 +90,6 @@ return an ``EINVAL`` error code.
- ``timings``
- The timings.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-enum-fmt.rst b/Documentation/userspace-api/media/v4l/vidioc-enum-fmt.rst
index b8347a96a554..2b3fa9c23146 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-enum-fmt.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-enum-fmt.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_ENUM_FMT:
@@ -11,24 +12,22 @@ Name
VIDIOC_ENUM_FMT - Enumerate image formats
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_ENUM_FMT, struct v4l2_fmtdesc *argp )
- :name: VIDIOC_ENUM_FMT
+.. c:macro:: VIDIOC_ENUM_FMT
+``int ioctl(int fd, VIDIOC_ENUM_FMT, struct v4l2_fmtdesc *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_fmtdesc`.
-
Description
===========
@@ -72,7 +71,6 @@ the ``mbus_code`` field is handled differently:
formats shall not depend on the active configuration of the video device
or device pipeline.
-
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
.. c:type:: v4l2_fmtdesc
@@ -137,7 +135,6 @@ the ``mbus_code`` field is handled differently:
zero.
-
.. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
.. _fmtdesc-flags:
@@ -227,7 +224,6 @@ the ``mbus_code`` field is handled differently:
device when calling the :ref:`VIDIOC_S_FMT <VIDIOC_G_FMT>` ioctl with
:ref:`V4L2_PIX_FMT_FLAG_SET_CSC <v4l2-pix-fmt-flag-set-csc>` set.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-enum-frameintervals.rst b/Documentation/userspace-api/media/v4l/vidioc-enum-frameintervals.rst
index 68469756e6c7..1f0949726045 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-enum-frameintervals.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-enum-frameintervals.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_ENUM_FRAMEINTERVALS:
@@ -11,25 +12,23 @@ Name
VIDIOC_ENUM_FRAMEINTERVALS - Enumerate frame intervals
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_ENUM_FRAMEINTERVALS, struct v4l2_frmivalenum *argp )
- :name: VIDIOC_ENUM_FRAMEINTERVALS
+.. c:macro:: VIDIOC_ENUM_FRAMEINTERVALS
+``int ioctl(int fd, VIDIOC_ENUM_FRAMEINTERVALS, struct v4l2_frmivalenum *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_frmivalenum`
that contains a pixel format and size and receives a frame interval.
-
Description
===========
@@ -91,7 +90,6 @@ other ioctl calls while it runs the frame interval enumeration.
frame_rate = 1 / frame_interval
-
Structs
=======
@@ -99,7 +97,6 @@ In the structs below, *IN* denotes a value that has to be filled in by
the application, *OUT* denotes values that the driver fills in. The
application should zero out all members except for the *IN* fields.
-
.. c:type:: v4l2_frmival_stepwise
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
@@ -120,7 +117,6 @@ application should zero out all members except for the *IN* fields.
- Frame interval step size [s].
-
.. c:type:: v4l2_frmivalenum
.. tabularcolumns:: |p{1.8cm}|p{4.4cm}|p{2.4cm}|p{8.9cm}|
@@ -163,11 +159,9 @@ application should zero out all members except for the *IN* fields.
applications.
-
Enums
=====
-
.. c:type:: v4l2_frmivaltypes
.. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
@@ -187,7 +181,6 @@ Enums
- 3
- Step-wise defined frame interval.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-enum-framesizes.rst b/Documentation/userspace-api/media/v4l/vidioc-enum-framesizes.rst
index dc4e0e216e7e..c9a36bcf699f 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-enum-framesizes.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-enum-framesizes.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_ENUM_FRAMESIZES:
@@ -11,26 +12,24 @@ Name
VIDIOC_ENUM_FRAMESIZES - Enumerate frame sizes
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_ENUM_FRAMESIZES, struct v4l2_frmsizeenum *argp )
- :name: VIDIOC_ENUM_FRAMESIZES
+.. c:macro:: VIDIOC_ENUM_FRAMESIZES
+``int ioctl(int fd, VIDIOC_ENUM_FRAMESIZES, struct v4l2_frmsizeenum *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_frmsizeenum`
that contains an index and pixel format and receives a frame width
and height.
-
Description
===========
@@ -81,7 +80,6 @@ without any interaction from the application itself. This means that the
enumeration data is consistent if the application does not perform any
other ioctl calls while it runs the frame size enumeration.
-
Structs
=======
@@ -89,7 +87,6 @@ In the structs below, *IN* denotes a value that has to be filled in by
the application, *OUT* denotes values that the driver fills in. The
application should zero out all members except for the *IN* fields.
-
.. c:type:: v4l2_frmsize_discrete
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
@@ -107,7 +104,6 @@ application should zero out all members except for the *IN* fields.
- Height of the frame [pixel].
-
.. c:type:: v4l2_frmsize_stepwise
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
@@ -137,7 +133,6 @@ application should zero out all members except for the *IN* fields.
- Frame height step size [pixel].
-
.. c:type:: v4l2_frmsizeenum
.. tabularcolumns:: |p{1.4cm}|p{5.9cm}|p{2.3cm}|p{8.0cm}|
@@ -173,11 +168,9 @@ application should zero out all members except for the *IN* fields.
applications.
-
Enums
=====
-
.. c:type:: v4l2_frmsizetypes
.. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
@@ -197,7 +190,6 @@ Enums
- 3
- Step-wise defined frame size.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-enum-freq-bands.rst b/Documentation/userspace-api/media/v4l/vidioc-enum-freq-bands.rst
index 2dabf542b20f..a0764fca8d18 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-enum-freq-bands.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-enum-freq-bands.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_ENUM_FREQ_BANDS:
@@ -11,24 +12,22 @@ Name
VIDIOC_ENUM_FREQ_BANDS - Enumerate supported frequency bands
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_ENUM_FREQ_BANDS, struct v4l2_frequency_band *argp )
- :name: VIDIOC_ENUM_FREQ_BANDS
+.. c:macro:: VIDIOC_ENUM_FREQ_BANDS
+``int ioctl(int fd, VIDIOC_ENUM_FREQ_BANDS, struct v4l2_frequency_band *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_frequency_band`.
-
Description
===========
@@ -41,7 +40,6 @@ fields, and zero out the ``reserved`` array of a struct
This ioctl is supported if the ``V4L2_TUNER_CAP_FREQ_BANDS`` capability
of the corresponding tuner/modulator is set.
-
.. tabularcolumns:: |p{2.9cm}|p{2.9cm}|p{5.8cm}|p{2.9cm}|p{3.0cm}|
.. c:type:: v4l2_frequency_band
@@ -110,7 +108,6 @@ of the corresponding tuner/modulator is set.
Applications and drivers must set the array to zero.
-
.. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
.. _band-modulation:
@@ -130,7 +127,6 @@ of the corresponding tuner/modulator is set.
- 0x08
- Amplitude Modulation, commonly used for analog radio.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-enumaudio.rst b/Documentation/userspace-api/media/v4l/vidioc-enumaudio.rst
index 6cf06ac0bb70..7873e5434d3e 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-enumaudio.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-enumaudio.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_ENUMAUDIO:
@@ -11,24 +12,22 @@ Name
VIDIOC_ENUMAUDIO - Enumerate audio inputs
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_ENUMAUDIO, struct v4l2_audio *argp )
- :name: VIDIOC_ENUMAUDIO
+.. c:macro:: VIDIOC_ENUMAUDIO
+``int ioctl(int fd, VIDIOC_ENUMAUDIO, struct v4l2_audio *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_audio`.
-
Description
===========
@@ -43,7 +42,6 @@ zero, incrementing by one until the driver returns ``EINVAL``.
See :ref:`VIDIOC_G_AUDIO <VIDIOC_G_AUDIO>` for a description of struct
:c:type:`v4l2_audio`.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-enumaudioout.rst b/Documentation/userspace-api/media/v4l/vidioc-enumaudioout.rst
index b4a42ea397db..d4c3ba320834 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-enumaudioout.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-enumaudioout.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_ENUMAUDOUT:
@@ -11,24 +12,22 @@ Name
VIDIOC_ENUMAUDOUT - Enumerate audio outputs
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_ENUMAUDOUT, struct v4l2_audioout *argp )
- :name: VIDIOC_ENUMAUDOUT
+.. c:macro:: VIDIOC_ENUMAUDOUT
+``int ioctl(int fd, VIDIOC_ENUMAUDOUT, struct v4l2_audioout *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_audioout`.
-
Description
===========
@@ -48,7 +47,6 @@ zero, incrementing by one until the driver returns ``EINVAL``.
See :ref:`VIDIOC_G_AUDIOout <VIDIOC_G_AUDOUT>` for a description of struct
:c:type:`v4l2_audioout`.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-enuminput.rst b/Documentation/userspace-api/media/v4l/vidioc-enuminput.rst
index 714688f81e23..0f62e681a827 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-enuminput.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-enuminput.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_ENUMINPUT:
@@ -11,24 +12,22 @@ Name
VIDIOC_ENUMINPUT - Enumerate video inputs
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_ENUMINPUT, struct v4l2_input *argp )
- :name: VIDIOC_ENUMINPUT
+.. c:macro:: VIDIOC_ENUMINPUT
+``int ioctl(int fd, VIDIOC_ENUMINPUT, struct v4l2_input *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_input`.
-
Description
===========
@@ -39,7 +38,6 @@ fill the rest of the structure or return an ``EINVAL`` error code when the
index is out of bounds. To enumerate all inputs applications shall begin
at index zero, incrementing by one until the driver returns ``EINVAL``.
-
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
.. c:type:: v4l2_input
@@ -103,7 +101,6 @@ at index zero, incrementing by one until the driver returns ``EINVAL``.
zero.
-
.. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
.. _input-type:
@@ -126,7 +123,6 @@ at index zero, incrementing by one until the driver returns ``EINVAL``.
- This input is a touch device for capturing raw touch data.
-
.. tabularcolumns:: |p{4.8cm}|p{2.6cm}|p{10.1cm}|
.. _input-status:
@@ -198,7 +194,6 @@ at index zero, incrementing by one until the driver returns ``EINVAL``.
- VTR time constant. [?]
-
.. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
.. _input-capabilities:
@@ -222,7 +217,6 @@ at index zero, incrementing by one until the driver returns ``EINVAL``.
``V4L2_SEL_TGT_NATIVE_SIZE`` selection target, see
:ref:`v4l2-selections-common`.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-enumoutput.rst b/Documentation/userspace-api/media/v4l/vidioc-enumoutput.rst
index 272a0b2b475c..91fcf99094d2 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-enumoutput.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-enumoutput.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_ENUMOUTPUT:
@@ -11,24 +12,22 @@ Name
VIDIOC_ENUMOUTPUT - Enumerate video outputs
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_ENUMOUTPUT, struct v4l2_output *argp )
- :name: VIDIOC_ENUMOUTPUT
+.. c:macro:: VIDIOC_ENUMOUTPUT
+``int ioctl(int fd, VIDIOC_ENUMOUTPUT, struct v4l2_output *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_output`.
-
Description
===========
@@ -40,7 +39,6 @@ when the index is out of bounds. To enumerate all outputs applications
shall begin at index zero, incrementing by one until the driver returns
``EINVAL``.
-
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
.. c:type:: v4l2_output
@@ -98,7 +96,6 @@ shall begin at index zero, incrementing by one until the driver returns
zero.
-
.. tabularcolumns:: |p{7.0cm}|p{1.8cm}|p{8.7cm}|
.. _output-type:
@@ -121,7 +118,6 @@ shall begin at index zero, incrementing by one until the driver returns
- The video output will be copied to a :ref:`video overlay <overlay>`.
-
.. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
.. _output-capabilities:
@@ -145,7 +141,6 @@ shall begin at index zero, incrementing by one until the driver returns
``V4L2_SEL_TGT_NATIVE_SIZE`` selection target, see
:ref:`v4l2-selections-common`.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-enumstd.rst b/Documentation/userspace-api/media/v4l/vidioc-enumstd.rst
index 85bc6d0231f1..b5704e8cf909 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-enumstd.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-enumstd.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_ENUMSTD:
@@ -11,27 +12,26 @@ Name
VIDIOC_ENUMSTD - VIDIOC_SUBDEV_ENUMSTD - Enumerate supported video standards
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_ENUMSTD, struct v4l2_standard *argp )
- :name: VIDIOC_ENUMSTD
+.. c:macro:: VIDIOC_ENUMSTD
+
+``int ioctl(int fd, VIDIOC_ENUMSTD, struct v4l2_standard *argp)``
-.. c:function:: int ioctl( int fd, VIDIOC_SUBDEV_ENUMSTD, struct v4l2_standard *argp )
- :name: VIDIOC_SUBDEV_ENUMSTD
+.. c:macro:: VIDIOC_SUBDEV_ENUMSTD
+``int ioctl(int fd, VIDIOC_SUBDEV_ENUMSTD, struct v4l2_standard *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_standard`.
-
Description
===========
@@ -45,7 +45,6 @@ zero, incrementing by one until the driver returns ``EINVAL``. Drivers may
enumerate a different set of standards after switching the video input
or output. [#f1]_
-
.. c:type:: v4l2_standard
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
@@ -85,7 +84,6 @@ or output. [#f1]_
zero.
-
.. c:type:: v4l2_fract
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
@@ -102,7 +100,6 @@ or output. [#f1]_
- ``denominator``
-
-
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
.. _v4l2-std-id:
@@ -120,7 +117,6 @@ or output. [#f1]_
standards.
-
.. code-block:: c
#define V4L2_STD_PAL_B ((v4l2_std_id)0x00000001)
@@ -142,7 +138,6 @@ rate, and PAL color modulation with a 4.43 MHz color subcarrier. Some
PAL video recorders can play back NTSC tapes in this mode for display on
a 50/60 Hz agnostic PAL TV.
-
.. code-block:: c
#define V4L2_STD_NTSC_M ((v4l2_std_id)0x00001000)
@@ -152,7 +147,6 @@ a 50/60 Hz agnostic PAL TV.
``V4L2_STD_NTSC_443`` is a hybrid standard with 525 lines, 60 Hz refresh
rate, and NTSC color modulation with a 4.43 MHz color subcarrier.
-
.. code-block:: c
#define V4L2_STD_NTSC_M_KR ((v4l2_std_id)0x00008000)
@@ -175,7 +169,6 @@ terrestrial digital TV standards. Presently the V4L2 API does not
support digital TV. See also the Linux DVB API at
`https://linuxtv.org <https://linuxtv.org>`__.
-
.. code-block:: c
#define V4L2_STD_PAL_BG (V4L2_STD_PAL_B |
@@ -228,7 +221,6 @@ support digital TV. See also the Linux DVB API at
#define V4L2_STD_ALL (V4L2_STD_525_60 |
V4L2_STD_625_50)
-
.. raw:: latex
\begingroup
@@ -303,7 +295,6 @@ support digital TV. See also the Linux DVB API at
\endgroup
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-expbuf.rst b/Documentation/userspace-api/media/v4l/vidioc-expbuf.rst
index a2c475a83a58..212377c90442 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-expbuf.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-expbuf.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_EXPBUF:
@@ -11,24 +12,22 @@ Name
VIDIOC_EXPBUF - Export a buffer as a DMABUF file descriptor.
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_EXPBUF, struct v4l2_exportbuffer *argp )
- :name: VIDIOC_EXPBUF
+.. c:macro:: VIDIOC_EXPBUF
+``int ioctl(int fd, VIDIOC_EXPBUF, struct v4l2_exportbuffer *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_exportbuffer`.
-
Description
===========
@@ -63,11 +62,9 @@ for details about importing DMABUF files into V4L2 nodes. It is
recommended to close a DMABUF file when it is no longer used to allow
the associated memory to be reclaimed.
-
Examples
========
-
.. code-block:: c
int buffer_export(int v4lfd, enum v4l2_buf_type bt, int index, int *dmafd)
@@ -87,7 +84,6 @@ Examples
return 0;
}
-
.. code-block:: c
int buffer_export_mp(int v4lfd, enum v4l2_buf_type bt, int index,
@@ -114,7 +110,6 @@ Examples
return 0;
}
-
.. c:type:: v4l2_exportbuffer
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
@@ -155,7 +150,6 @@ Examples
- Reserved field for future use. Drivers and applications must set
the array to zero.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-g-audio.rst b/Documentation/userspace-api/media/v4l/vidioc-g-audio.rst
index 38667864355a..4c93bd55bd97 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-g-audio.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-g-audio.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_G_AUDIO:
@@ -11,27 +12,26 @@ Name
VIDIOC_G_AUDIO - VIDIOC_S_AUDIO - Query or select the current audio input and its attributes
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_G_AUDIO, struct v4l2_audio *argp )
- :name: VIDIOC_G_AUDIO
+.. c:macro:: VIDIOC_G_AUDIO
+
+``int ioctl(int fd, VIDIOC_G_AUDIO, struct v4l2_audio *argp)``
-.. c:function:: int ioctl( int fd, VIDIOC_S_AUDIO, const struct v4l2_audio *argp )
- :name: VIDIOC_S_AUDIO
+.. c:macro:: VIDIOC_S_AUDIO
+``int ioctl(int fd, VIDIOC_S_AUDIO, const struct v4l2_audio *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_audio`.
-
Description
===========
@@ -49,7 +49,6 @@ ioctl. Drivers may switch to a different audio mode if the request
cannot be satisfied. However, this is a write-only ioctl, it does not
return the actual new audio mode.
-
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
.. c:type:: v4l2_audio
@@ -80,7 +79,6 @@ return the actual new audio mode.
the array to zero.
-
.. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
.. _audio-capability:
@@ -101,7 +99,6 @@ return the actual new audio mode.
- Automatic Volume Level mode is supported.
-
.. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
.. _audio-mode:
@@ -115,7 +112,6 @@ return the actual new audio mode.
- 0x00001
- AVL mode is on.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-g-audioout.rst b/Documentation/userspace-api/media/v4l/vidioc-g-audioout.rst
index 5bf56723c7ba..194f22493517 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-g-audioout.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-g-audioout.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_G_AUDOUT:
@@ -11,27 +12,26 @@ Name
VIDIOC_G_AUDOUT - VIDIOC_S_AUDOUT - Query or select the current audio output
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_G_AUDOUT, struct v4l2_audioout *argp )
- :name: VIDIOC_G_AUDOUT
+.. c:macro:: VIDIOC_G_AUDOUT
+
+``int ioctl(int fd, VIDIOC_G_AUDOUT, struct v4l2_audioout *argp)``
-.. c:function:: int ioctl( int fd, VIDIOC_S_AUDOUT, const struct v4l2_audioout *argp )
- :name: VIDIOC_S_AUDOUT
+.. c:macro:: VIDIOC_S_AUDOUT
+``int ioctl(int fd, VIDIOC_S_AUDOUT, const struct v4l2_audioout *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_audioout`.
-
Description
===========
@@ -56,7 +56,6 @@ as ``VIDIOC_G_AUDOUT`` does.
Connectors on a TV card to loop back the received audio signal
to a sound card are not audio outputs in this sense.
-
.. c:type:: v4l2_audioout
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
@@ -87,7 +86,6 @@ as ``VIDIOC_G_AUDOUT`` does.
- Reserved for future extensions. Drivers and applications must set
the array to zero.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-g-crop.rst b/Documentation/userspace-api/media/v4l/vidioc-g-crop.rst
index 735a6bf5e025..0ac1509e41cc 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-g-crop.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-g-crop.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_G_CROP:
@@ -11,27 +12,26 @@ Name
VIDIOC_G_CROP - VIDIOC_S_CROP - Get or set the current cropping rectangle
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_G_CROP, struct v4l2_crop *argp )
- :name: VIDIOC_G_CROP
+.. c:macro:: VIDIOC_G_CROP
+
+``int ioctl(int fd, VIDIOC_G_CROP, struct v4l2_crop *argp)``
-.. c:function:: int ioctl( int fd, VIDIOC_S_CROP, const struct v4l2_crop *argp )
- :name: VIDIOC_S_CROP
+.. c:macro:: VIDIOC_S_CROP
+``int ioctl(int fd, VIDIOC_S_CROP, const struct v4l2_crop *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_crop`.
-
Description
===========
@@ -69,7 +69,6 @@ been negotiated.
When cropping is not supported then no parameters are changed and
:ref:`VIDIOC_S_CROP <VIDIOC_G_CROP>` returns the ``EINVAL`` error code.
-
.. c:type:: v4l2_crop
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
@@ -100,7 +99,6 @@ When cropping is not supported then no parameters are changed and
Starting with kernel 4.13 both variations are allowed.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-g-ctrl.rst b/Documentation/userspace-api/media/v4l/vidioc-g-ctrl.rst
index d863c849be95..4f1bed53fad5 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-g-ctrl.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-g-ctrl.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_G_CTRL:
@@ -11,27 +12,26 @@ Name
VIDIOC_G_CTRL - VIDIOC_S_CTRL - Get or set the value of a control
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_G_CTRL, struct v4l2_control *argp )
- :name: VIDIOC_G_CTRL
+.. c:macro:: VIDIOC_G_CTRL
+
+``int ioctl(int fd, VIDIOC_G_CTRL, struct v4l2_control *argp)``
-.. c:function:: int ioctl( int fd, VIDIOC_S_CTRL, struct v4l2_control *argp )
- :name: VIDIOC_S_CTRL
+.. c:macro:: VIDIOC_S_CTRL
+``int ioctl(int fd, VIDIOC_S_CTRL, struct v4l2_control *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_control`.
-
Description
===========
@@ -55,7 +55,6 @@ These ioctls work only with user controls. For other control classes the
:ref:`VIDIOC_S_EXT_CTRLS <VIDIOC_G_EXT_CTRLS>` or
:ref:`VIDIOC_TRY_EXT_CTRLS <VIDIOC_G_EXT_CTRLS>` must be used.
-
.. c:type:: v4l2_control
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
@@ -72,7 +71,6 @@ These ioctls work only with user controls. For other control classes the
- ``value``
- New value or current value.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-g-dv-timings.rst b/Documentation/userspace-api/media/v4l/vidioc-g-dv-timings.rst
index e5a58db574d4..760a33d43b7d 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-g-dv-timings.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-g-dv-timings.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_G_DV_TIMINGS:
@@ -11,33 +12,34 @@ Name
VIDIOC_G_DV_TIMINGS - VIDIOC_S_DV_TIMINGS - VIDIOC_SUBDEV_G_DV_TIMINGS - VIDIOC_SUBDEV_S_DV_TIMINGS - Get or set DV timings for input or output
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_G_DV_TIMINGS, struct v4l2_dv_timings *argp )
- :name: VIDIOC_G_DV_TIMINGS
+.. c:macro:: VIDIOC_G_DV_TIMINGS
+
+``int ioctl(int fd, VIDIOC_G_DV_TIMINGS, struct v4l2_dv_timings *argp)``
+
+.. c:macro:: VIDIOC_S_DV_TIMINGS
+
+``int ioctl(int fd, VIDIOC_S_DV_TIMINGS, struct v4l2_dv_timings *argp)``
-.. c:function:: int ioctl( int fd, VIDIOC_S_DV_TIMINGS, struct v4l2_dv_timings *argp )
- :name: VIDIOC_S_DV_TIMINGS
+.. c:macro:: VIDIOC_SUBDEV_G_DV_TIMINGS
-.. c:function:: int ioctl( int fd, VIDIOC_SUBDEV_G_DV_TIMINGS, struct v4l2_dv_timings *argp )
- :name: VIDIOC_SUBDEV_G_DV_TIMINGS
+``int ioctl(int fd, VIDIOC_SUBDEV_G_DV_TIMINGS, struct v4l2_dv_timings *argp)``
-.. c:function:: int ioctl( int fd, VIDIOC_SUBDEV_S_DV_TIMINGS, struct v4l2_dv_timings *argp )
- :name: VIDIOC_SUBDEV_S_DV_TIMINGS
+.. c:macro:: VIDIOC_SUBDEV_S_DV_TIMINGS
+``int ioctl(int fd, VIDIOC_SUBDEV_S_DV_TIMINGS, struct v4l2_dv_timings *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_dv_timings`.
-
Description
===========
@@ -60,7 +62,6 @@ the current input or output does not support DV timings (e.g. if
:ref:`VIDIOC_ENUMINPUT` does not set the
``V4L2_IN_CAP_DV_TIMINGS`` flag), then ``ENODATA`` error code is returned.
-
Return Value
============
@@ -170,7 +171,6 @@ EPERM
- Reserved for future extensions. Drivers and applications must set
the array to zero.
-
.. tabularcolumns:: |p{3.5cm}|p{3.5cm}|p{7.0cm}|p{3.5cm}|
.. c:type:: v4l2_dv_timings
diff --git a/Documentation/userspace-api/media/v4l/vidioc-g-edid.rst b/Documentation/userspace-api/media/v4l/vidioc-g-edid.rst
index 6a9ed2915cb9..39d523a449a7 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-g-edid.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-g-edid.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_G_EDID:
@@ -11,34 +12,34 @@ Name
VIDIOC_G_EDID - VIDIOC_S_EDID - VIDIOC_SUBDEV_G_EDID - VIDIOC_SUBDEV_S_EDID - Get or set the EDID of a video receiver/transmitter
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_G_EDID, struct v4l2_edid *argp )
- :name: VIDIOC_G_EDID
+.. c:macro:: VIDIOC_G_EDID
+
+``int ioctl(int fd, VIDIOC_G_EDID, struct v4l2_edid *argp)``
+
+.. c:macro:: VIDIOC_S_EDID
-.. c:function:: int ioctl( int fd, VIDIOC_S_EDID, struct v4l2_edid *argp )
- :name: VIDIOC_S_EDID
+``int ioctl(int fd, VIDIOC_S_EDID, struct v4l2_edid *argp)``
+.. c:macro:: VIDIOC_SUBDEV_G_EDID
-.. c:function:: int ioctl( int fd, VIDIOC_SUBDEV_G_EDID, struct v4l2_edid *argp )
- :name: VIDIOC_SUBDEV_G_EDID
+``int ioctl(int fd, VIDIOC_SUBDEV_G_EDID, struct v4l2_edid *argp)``
-.. c:function:: int ioctl( int fd, VIDIOC_SUBDEV_S_EDID, struct v4l2_edid *argp )
- :name: VIDIOC_SUBDEV_S_EDID
+.. c:macro:: VIDIOC_SUBDEV_S_EDID
+``int ioctl(int fd, VIDIOC_SUBDEV_S_EDID, struct v4l2_edid *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_edid`.
-
Description
===========
@@ -97,7 +98,6 @@ this will drive the hotplug pin low and/or block the source from reading
the EDID data in some way. In any case, the end result is the same: the
EDID is no longer available.
-
.. c:type:: v4l2_edid
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
@@ -132,7 +132,6 @@ EDID is no longer available.
- Pointer to memory that contains the EDID. The minimum size is
``blocks`` * 128.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-g-enc-index.rst b/Documentation/userspace-api/media/v4l/vidioc-g-enc-index.rst
index 99cddf3b9888..7698e65ccccf 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-g-enc-index.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-g-enc-index.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_G_ENC_INDEX:
@@ -11,24 +12,22 @@ Name
VIDIOC_G_ENC_INDEX - Get meta data about a compressed video stream
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_G_ENC_INDEX, struct v4l2_enc_idx *argp )
- :name: VIDIOC_G_ENC_INDEX
+.. c:macro:: VIDIOC_G_ENC_INDEX
+``int ioctl(int fd, VIDIOC_G_ENC_INDEX, struct v4l2_enc_idx *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_enc_idx`.
-
Description
===========
@@ -55,7 +54,6 @@ will be zero.
Currently this ioctl is only defined for MPEG-2 program streams and
video elementary streams.
-
.. tabularcolumns:: |p{3.8cm}|p{5.6cm}|p{8.1cm}|
.. c:type:: v4l2_enc_idx
@@ -83,7 +81,6 @@ video elementary streams.
their ``offset``.
-
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
.. c:type:: v4l2_enc_idx_entry
@@ -116,7 +113,6 @@ video elementary streams.
- Reserved for future extensions. Drivers must set the array to
zero.
-
.. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
.. _enc-idx-flags:
@@ -140,7 +136,6 @@ video elementary streams.
- *AND* the flags field with this mask to obtain the picture coding
type.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-g-ext-ctrls.rst b/Documentation/userspace-api/media/v4l/vidioc-g-ext-ctrls.rst
index 0991af626591..f2173e310d67 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-g-ext-ctrls.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-g-ext-ctrls.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_G_EXT_CTRLS:
@@ -11,32 +12,30 @@ Name
VIDIOC_G_EXT_CTRLS - VIDIOC_S_EXT_CTRLS - VIDIOC_TRY_EXT_CTRLS - Get or set the value of several controls, try control values
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_G_EXT_CTRLS, struct v4l2_ext_controls *argp )
- :name: VIDIOC_G_EXT_CTRLS
+.. c:macro:: VIDIOC_G_EXT_CTRLS
+``int ioctl(int fd, VIDIOC_G_EXT_CTRLS, struct v4l2_ext_controls *argp)``
-.. c:function:: int ioctl( int fd, VIDIOC_S_EXT_CTRLS, struct v4l2_ext_controls *argp )
- :name: VIDIOC_S_EXT_CTRLS
+.. c:macro:: VIDIOC_S_EXT_CTRLS
+``int ioctl(int fd, VIDIOC_S_EXT_CTRLS, struct v4l2_ext_controls *argp)``
-.. c:function:: int ioctl( int fd, VIDIOC_TRY_EXT_CTRLS, struct v4l2_ext_controls *argp )
- :name: VIDIOC_TRY_EXT_CTRLS
+.. c:macro:: VIDIOC_TRY_EXT_CTRLS
+``int ioctl(int fd, VIDIOC_TRY_EXT_CTRLS, struct v4l2_ext_controls *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_ext_controls`.
-
Description
===========
@@ -119,7 +118,6 @@ correct. This prevents the situation where only some of the controls
were set/get. Only low-level errors (e. g. a failed i2c command) can
still cause this situation.
-
.. tabularcolumns:: |p{1.2cm}|p{3.0cm}|p{1.5cm}|p{11.8cm}|
.. c:type:: v4l2_ext_control
@@ -195,7 +193,6 @@ still cause this situation.
* - }
-
-
.. tabularcolumns:: |p{4.0cm}|p{2.2cm}|p{2.1cm}|p{8.2cm}|
.. c:type:: v4l2_ext_controls
@@ -309,7 +306,6 @@ still cause this situation.
Ignored if ``count`` equals zero.
-
.. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
.. _ctrl-class:
@@ -363,7 +359,6 @@ still cause this situation.
- The class containing RF tuner controls. These controls are
described in :ref:`rf-tuner-controls`.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-g-fbuf.rst b/Documentation/userspace-api/media/v4l/vidioc-g-fbuf.rst
index 7e1a0b812754..dc1f16343b22 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-g-fbuf.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-g-fbuf.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_G_FBUF:
@@ -11,27 +12,26 @@ Name
VIDIOC_G_FBUF - VIDIOC_S_FBUF - Get or set frame buffer overlay parameters
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_G_FBUF, struct v4l2_framebuffer *argp )
- :name: VIDIOC_G_FBUF
+.. c:macro:: VIDIOC_G_FBUF
+
+``int ioctl(int fd, VIDIOC_G_FBUF, struct v4l2_framebuffer *argp)``
-.. c:function:: int ioctl( int fd, VIDIOC_S_FBUF, const struct v4l2_framebuffer *argp )
- :name: VIDIOC_S_FBUF
+.. c:macro:: VIDIOC_S_FBUF
+``int ioctl(int fd, VIDIOC_S_FBUF, const struct v4l2_framebuffer *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_framebuffer`.
-
Description
===========
@@ -75,7 +75,6 @@ jeopardize the system security, its stability or even damage the
hardware, therefore only the superuser can set the parameters for a
destructive video overlay.
-
.. tabularcolumns:: |p{3.5cm}|p{3.5cm}|p{3.5cm}|p{7.0cm}|
.. c:type:: v4l2_framebuffer
@@ -208,7 +207,6 @@ destructive video overlay.
- ``priv``
- Reserved. Drivers and applications must set this field to zero.
-
.. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
.. _framebuffer-cap:
@@ -257,7 +255,6 @@ destructive video overlay.
chroma-key colors are replaced by framebuffer pixels, which is
exactly opposite of ``V4L2_FBUF_CAP_CHROMAKEY``
-
.. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
.. _framebuffer-flags:
@@ -332,7 +329,6 @@ destructive video overlay.
other, so same ``chromakey`` field of struct
:c:type:`v4l2_window` is being used.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-g-fmt.rst b/Documentation/userspace-api/media/v4l/vidioc-g-fmt.rst
index 7d113451bfbc..7e9f8475ea63 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-g-fmt.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-g-fmt.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_G_FMT:
@@ -11,29 +12,30 @@ Name
VIDIOC_G_FMT - VIDIOC_S_FMT - VIDIOC_TRY_FMT - Get or set the data format, try a format
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_G_FMT, struct v4l2_format *argp )
- :name: VIDIOC_G_FMT
+.. c:macro:: VIDIOC_G_FMT
+
+``int ioctl(int fd, VIDIOC_G_FMT, struct v4l2_format *argp)``
+
+.. c:macro:: VIDIOC_S_FMT
+
+``int ioctl(int fd, VIDIOC_S_FMT, struct v4l2_format *argp)``
-.. c:function:: int ioctl( int fd, VIDIOC_S_FMT, struct v4l2_format *argp )
- :name: VIDIOC_S_FMT
+.. c:macro:: VIDIOC_TRY_FMT
-.. c:function:: int ioctl( int fd, VIDIOC_TRY_FMT, struct v4l2_format *argp )
- :name: VIDIOC_TRY_FMT
+``int ioctl(int fd, VIDIOC_TRY_FMT, struct v4l2_format *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_format`.
-
Description
===========
@@ -85,7 +87,6 @@ recommended drivers are not required to implement this ioctl.
The format as returned by :ref:`VIDIOC_TRY_FMT <VIDIOC_G_FMT>` must be identical to what
:ref:`VIDIOC_S_FMT <VIDIOC_G_FMT>` returns for the same input or output.
-
.. c:type:: v4l2_format
.. tabularcolumns:: |p{1.2cm}|p{4.6cm}|p{3.0cm}|p{8.6cm}|
@@ -135,7 +136,6 @@ The format as returned by :ref:`VIDIOC_TRY_FMT <VIDIOC_G_FMT>` must be identical
* - }
-
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-g-frequency.rst b/Documentation/userspace-api/media/v4l/vidioc-g-frequency.rst
index da0d5dee72ff..5445a4a442e4 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-g-frequency.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-g-frequency.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_G_FREQUENCY:
@@ -11,27 +12,26 @@ Name
VIDIOC_G_FREQUENCY - VIDIOC_S_FREQUENCY - Get or set tuner or modulator radio frequency
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_G_FREQUENCY, struct v4l2_frequency *argp )
- :name: VIDIOC_G_FREQUENCY
+.. c:macro:: VIDIOC_G_FREQUENCY
+
+``int ioctl(int fd, VIDIOC_G_FREQUENCY, struct v4l2_frequency *argp)``
-.. c:function:: int ioctl( int fd, VIDIOC_S_FREQUENCY, const struct v4l2_frequency *argp )
- :name: VIDIOC_S_FREQUENCY
+.. c:macro:: VIDIOC_S_FREQUENCY
+``int ioctl(int fd, VIDIOC_S_FREQUENCY, const struct v4l2_frequency *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_frequency`.
-
Description
===========
@@ -51,7 +51,6 @@ structure. When the requested frequency is not possible the driver
assumes the closest possible value. However :ref:`VIDIOC_S_FREQUENCY <VIDIOC_G_FREQUENCY>` is a
write-only ioctl, it does not return the actual new frequency.
-
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
.. c:type:: v4l2_frequency
@@ -89,7 +88,6 @@ write-only ioctl, it does not return the actual new frequency.
- Reserved for future extensions. Drivers and applications must set
the array to zero.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-g-input.rst b/Documentation/userspace-api/media/v4l/vidioc-g-input.rst
index f4637bc464f6..eee9ce51c797 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-g-input.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-g-input.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_G_INPUT:
@@ -11,27 +12,26 @@ Name
VIDIOC_G_INPUT - VIDIOC_S_INPUT - Query or select the current video input
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_G_INPUT, int *argp )
- :name: VIDIOC_G_INPUT
+.. c:macro:: VIDIOC_G_INPUT
+
+``int ioctl(int fd, VIDIOC_G_INPUT, int *argp)``
-.. c:function:: int ioctl( int fd, VIDIOC_S_INPUT, int *argp )
- :name: VIDIOC_S_INPUT
+.. c:macro:: VIDIOC_S_INPUT
+``int ioctl(int fd, VIDIOC_S_INPUT, int *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer an integer with input index.
-
Description
===========
@@ -52,7 +52,6 @@ other parameters.
Information about video inputs is available using the
:ref:`VIDIOC_ENUMINPUT` ioctl.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-g-jpegcomp.rst b/Documentation/userspace-api/media/v4l/vidioc-g-jpegcomp.rst
index 8721adc5ad70..93ed111dfcb9 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-g-jpegcomp.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-g-jpegcomp.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_G_JPEGCOMP:
@@ -11,27 +12,26 @@ Name
VIDIOC_G_JPEGCOMP - VIDIOC_S_JPEGCOMP
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_G_JPEGCOMP, v4l2_jpegcompression *argp )
- :name: VIDIOC_G_JPEGCOMP
+.. c:macro:: VIDIOC_G_JPEGCOMP
+
+``int ioctl(int fd, VIDIOC_G_JPEGCOMP, v4l2_jpegcompression *argp)``
-.. c:function:: int ioctl( int fd, VIDIOC_S_JPEGCOMP, const v4l2_jpegcompression *argp )
- :name: VIDIOC_S_JPEGCOMP
+.. c:macro:: VIDIOC_S_JPEGCOMP
+``int ioctl(int fd, VIDIOC_S_JPEGCOMP, const v4l2_jpegcompression *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_jpegcompression`.
-
Description
===========
@@ -54,7 +54,6 @@ stored in the JPEG-encoded fields. These define how the JPEG field is
encoded. If you omit them, applications assume you've used standard
encoding. You usually do want to add them.
-
.. tabularcolumns:: |p{1.2cm}|p{3.0cm}|p{13.3cm}|
.. c:type:: v4l2_jpegcompression
@@ -92,7 +91,6 @@ encoding. You usually do want to add them.
control is exposed by a driver applications should use it instead
and ignore this field.
-
.. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
.. _jpeg-markers:
@@ -118,7 +116,6 @@ encoding. You usually do want to add them.
- (1<<7)
- App segment, driver will always use APP0
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-g-modulator.rst b/Documentation/userspace-api/media/v4l/vidioc-g-modulator.rst
index baf499d0df74..2ac2473e341b 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-g-modulator.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-g-modulator.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_G_MODULATOR:
@@ -11,27 +12,26 @@ Name
VIDIOC_G_MODULATOR - VIDIOC_S_MODULATOR - Get or set modulator attributes
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_G_MODULATOR, struct v4l2_modulator *argp )
- :name: VIDIOC_G_MODULATOR
+.. c:macro:: VIDIOC_G_MODULATOR
+
+``int ioctl(int fd, VIDIOC_G_MODULATOR, struct v4l2_modulator *argp)``
-.. c:function:: int ioctl( int fd, VIDIOC_S_MODULATOR, const struct v4l2_modulator *argp )
- :name: VIDIOC_S_MODULATOR
+.. c:macro:: VIDIOC_S_MODULATOR
+``int ioctl(int fd, VIDIOC_S_MODULATOR, const struct v4l2_modulator *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_modulator`.
-
Description
===========
@@ -60,7 +60,6 @@ context.
To change the radio frequency the
:ref:`VIDIOC_S_FREQUENCY <VIDIOC_G_FREQUENCY>` ioctl is available.
-
.. tabularcolumns:: |p{2.9cm}|p{2.9cm}|p{5.8cm}|p{2.9cm}|p{3.0cm}|
.. c:type:: v4l2_modulator
@@ -121,7 +120,6 @@ To change the radio frequency the
Drivers and applications must set the array to zero.
-
.. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
.. _modulator-txsubchans:
@@ -182,7 +180,6 @@ To change the radio frequency the
- 0x0010
- Enable the RDS encoder for a radio FM transmitter.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-g-output.rst b/Documentation/userspace-api/media/v4l/vidioc-g-output.rst
index 0afc55c67840..3138c4cc8fe3 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-g-output.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-g-output.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_G_OUTPUT:
@@ -11,27 +12,26 @@ Name
VIDIOC_G_OUTPUT - VIDIOC_S_OUTPUT - Query or select the current video output
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_G_OUTPUT, int *argp )
- :name: VIDIOC_G_OUTPUT
+.. c:macro:: VIDIOC_G_OUTPUT
+
+``int ioctl(int fd, VIDIOC_G_OUTPUT, int *argp)``
-.. c:function:: int ioctl( int fd, VIDIOC_S_OUTPUT, int *argp )
- :name: VIDIOC_S_OUTPUT
+.. c:macro:: VIDIOC_S_OUTPUT
+``int ioctl(int fd, VIDIOC_S_OUTPUT, int *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to an integer with output index.
-
Description
===========
@@ -53,7 +53,6 @@ negotiating any other parameters.
Information about video outputs is available using the
:ref:`VIDIOC_ENUMOUTPUT` ioctl.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-g-parm.rst b/Documentation/userspace-api/media/v4l/vidioc-g-parm.rst
index 94f3af279bba..724f7fa7bae1 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-g-parm.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-g-parm.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_G_PARM:
@@ -11,27 +12,26 @@ Name
VIDIOC_G_PARM - VIDIOC_S_PARM - Get or set streaming parameters
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_G_PARM, v4l2_streamparm *argp )
- :name: VIDIOC_G_PARM
+.. c:macro:: VIDIOC_G_PARM
+
+``int ioctl(int fd, VIDIOC_G_PARM, v4l2_streamparm *argp)``
-.. c:function:: int ioctl( int fd, VIDIOC_S_PARM, v4l2_streamparm *argp )
- :name: VIDIOC_S_PARM
+.. c:macro:: VIDIOC_S_PARM
+``int ioctl(int fd, VIDIOC_S_PARM, v4l2_streamparm *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_streamparm`.
-
Description
===========
@@ -48,7 +48,7 @@ format, on the other hand, may change the frame interval.
Further these ioctls can be used to determine the number of buffers used
internally by a driver in read/write mode. For implications see the
-section discussing the :ref:`read() <func-read>` function.
+section discussing the :c:func:`read()` function.
To get and set the streaming parameters applications call the
:ref:`VIDIOC_G_PARM <VIDIOC_G_PARM>` and
@@ -56,7 +56,6 @@ To get and set the streaming parameters applications call the
pointer to a struct :c:type:`v4l2_streamparm` which contains a
union holding separate parameters for input and output devices.
-
.. tabularcolumns:: |p{3.5cm}|p{3.5cm}|p{3.5cm}|p{7.0cm}|
.. c:type:: v4l2_streamparm
@@ -89,7 +88,6 @@ union holding separate parameters for input and output devices.
-
-
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
.. c:type:: v4l2_captureparm
@@ -138,7 +136,7 @@ union holding separate parameters for input and output devices.
* - __u32
- ``readbuffers``
- Applications set this field to the desired number of buffers used
- internally by the driver in :ref:`read() <func-read>` mode.
+ internally by the driver in :c:func:`read()` mode.
Drivers return the actual number of buffers. When an application
requests zero buffers, drivers should just return the current
setting rather than the minimum or an error code. For details see
@@ -149,7 +147,6 @@ union holding separate parameters for input and output devices.
the array to zero.
-
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
.. c:type:: v4l2_outputparm
@@ -172,7 +169,7 @@ union holding separate parameters for input and output devices.
* - :cspan:`2`
The field is intended to repeat frames on the driver side in
- :ref:`write() <func-write>` mode (in streaming mode timestamps
+ :c:func:`write()` mode (in streaming mode timestamps
can be used to throttle the output), saving I/O bandwidth.
For stateful encoders (see :ref:`encoder`) this represents the
@@ -199,7 +196,7 @@ union holding separate parameters for input and output devices.
* - __u32
- ``writebuffers``
- Applications set this field to the desired number of buffers used
- internally by the driver in :ref:`write() <func-write>` mode. Drivers
+ internally by the driver in :c:func:`write()` mode. Drivers
return the actual number of buffers. When an application requests
zero buffers, drivers should just return the current setting
rather than the minimum or an error code. For details see
@@ -210,7 +207,6 @@ union holding separate parameters for input and output devices.
the array to zero.
-
.. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
.. _parm-caps:
@@ -226,7 +222,6 @@ union holding separate parameters for input and output devices.
field.
-
.. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
.. _parm-flags:
@@ -265,8 +260,7 @@ union holding separate parameters for input and output devices.
- Moving objects in the image might have excessive motion blur.
- - Capture might only work through the :ref:`read() <func-read>` call.
-
+ - Capture might only work through the :c:func:`read()` call.
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-g-priority.rst b/Documentation/userspace-api/media/v4l/vidioc-g-priority.rst
index da3166ac6d08..d72a0c716fca 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-g-priority.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-g-priority.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_G_PRIORITY:
@@ -11,27 +12,26 @@ Name
VIDIOC_G_PRIORITY - VIDIOC_S_PRIORITY - Query or request the access priority associated with a file descriptor
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_G_PRIORITY, enum v4l2_priority *argp )
- :name: VIDIOC_G_PRIORITY
+.. c:macro:: VIDIOC_G_PRIORITY
+
+``int ioctl(int fd, VIDIOC_G_PRIORITY, enum v4l2_priority *argp)``
-.. c:function:: int ioctl( int fd, VIDIOC_S_PRIORITY, const enum v4l2_priority *argp )
- :name: VIDIOC_S_PRIORITY
+.. c:macro:: VIDIOC_S_PRIORITY
+``int ioctl(int fd, VIDIOC_S_PRIORITY, const enum v4l2_priority *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to an enum :c:type:`v4l2_priority` type.
-
Description
===========
@@ -43,7 +43,6 @@ To request an access priority applications store the desired priority in
an enum v4l2_priority variable and call :ref:`VIDIOC_S_PRIORITY <VIDIOC_G_PRIORITY>` ioctl
with a pointer to this variable.
-
.. c:type:: v4l2_priority
.. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
@@ -78,7 +77,6 @@ with a pointer to this variable.
it blocks any other fd from changing device properties. Usually
applications which must not be interrupted, like video recording.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-g-selection.rst b/Documentation/userspace-api/media/v4l/vidioc-g-selection.rst
index cda7a69ea130..9a9e589cce77 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-g-selection.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-g-selection.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_G_SELECTION:
@@ -11,23 +12,22 @@ Name
VIDIOC_G_SELECTION - VIDIOC_S_SELECTION - Get or set one of the selection rectangles
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_G_SELECTION, struct v4l2_selection *argp )
- :name: VIDIOC_G_SELECTION
+.. c:macro:: VIDIOC_G_SELECTION
+``int ioctl(int fd, VIDIOC_G_SELECTION, struct v4l2_selection *argp)``
-.. c:function:: int ioctl( int fd, VIDIOC_S_SELECTION, struct v4l2_selection *argp )
- :name: VIDIOC_S_SELECTION
+.. c:macro:: VIDIOC_S_SELECTION
+``int ioctl(int fd, VIDIOC_S_SELECTION, struct v4l2_selection *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_selection`.
@@ -115,7 +115,6 @@ constraints.
Selection targets and flags are documented in
:ref:`v4l2-selections-common`.
-
.. _sel-const-adjust:
.. kernel-figure:: constraints.svg
@@ -128,7 +127,6 @@ Selection targets and flags are documented in
-
.. c:type:: v4l2_selection
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
@@ -168,7 +166,6 @@ Selection targets and flags are documented in
Starting with kernel 4.13 both variations are allowed.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-g-sliced-vbi-cap.rst b/Documentation/userspace-api/media/v4l/vidioc-g-sliced-vbi-cap.rst
index a3a7fb00350f..752f7f5fae73 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-g-sliced-vbi-cap.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-g-sliced-vbi-cap.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_G_SLICED_VBI_CAP:
@@ -11,24 +12,22 @@ Name
VIDIOC_G_SLICED_VBI_CAP - Query sliced VBI capabilities
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_G_SLICED_VBI_CAP, struct v4l2_sliced_vbi_cap *argp )
- :name: VIDIOC_G_SLICED_VBI_CAP
+.. c:macro:: VIDIOC_G_SLICED_VBI_CAP
+``int ioctl(int fd, VIDIOC_G_SLICED_VBI_CAP, struct v4l2_sliced_vbi_cap *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_sliced_vbi_cap`.
-
Description
===========
@@ -44,7 +43,6 @@ the sliced VBI API is unsupported or ``type`` is invalid.
The ``type`` field was added, and the ioctl changed from read-only
to write-read, in Linux 2.6.19.
-
.. c:type:: v4l2_sliced_vbi_cap
.. tabularcolumns:: |p{1.2cm}|p{4.2cm}|p{4.1cm}|p{4.0cm}|p{4.0cm}|
@@ -120,7 +118,6 @@ the sliced VBI API is unsupported or ``type`` is invalid.
See also :ref:`vbi-525` and :ref:`vbi-625`.
-
.. raw:: latex
\scriptsize
@@ -183,7 +180,6 @@ the sliced VBI API is unsupported or ``type`` is invalid.
\normalsize
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-g-std.rst b/Documentation/userspace-api/media/v4l/vidioc-g-std.rst
index 8a659a873528..da91fe07d9e0 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-g-std.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-g-std.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_G_STD:
@@ -11,33 +12,34 @@ Name
VIDIOC_G_STD - VIDIOC_S_STD - VIDIOC_SUBDEV_G_STD - VIDIOC_SUBDEV_S_STD - Query or select the video standard of the current input
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_G_STD, v4l2_std_id *argp )
- :name: VIDIOC_G_STD
+.. c:macro:: VIDIOC_G_STD
+
+``int ioctl(int fd, VIDIOC_G_STD, v4l2_std_id *argp)``
+
+.. c:macro:: VIDIOC_S_STD
-.. c:function:: int ioctl( int fd, VIDIOC_S_STD, const v4l2_std_id *argp )
- :name: VIDIOC_S_STD
+``int ioctl(int fd, VIDIOC_S_STD, const v4l2_std_id *argp)``
-.. c:function:: int ioctl( int fd, VIDIOC_SUBDEV_G_STD, v4l2_std_id *argp )
- :name: VIDIOC_SUBDEV_G_STD
+.. c:macro:: VIDIOC_SUBDEV_G_STD
-.. c:function:: int ioctl( int fd, VIDIOC_SUBDEV_S_STD, const v4l2_std_id *argp )
- :name: VIDIOC_SUBDEV_S_STD
+``int ioctl(int fd, VIDIOC_SUBDEV_G_STD, v4l2_std_id *argp)``
+.. c:macro:: VIDIOC_SUBDEV_S_STD
+
+``int ioctl(int fd, VIDIOC_SUBDEV_S_STD, const v4l2_std_id *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to :c:type:`v4l2_std_id`.
-
Description
===========
diff --git a/Documentation/userspace-api/media/v4l/vidioc-g-tuner.rst b/Documentation/userspace-api/media/v4l/vidioc-g-tuner.rst
index e3ba5b18a357..116e66c01556 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-g-tuner.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-g-tuner.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_G_TUNER:
@@ -11,27 +12,26 @@ Name
VIDIOC_G_TUNER - VIDIOC_S_TUNER - Get or set tuner attributes
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_G_TUNER, struct v4l2_tuner *argp )
- :name: VIDIOC_G_TUNER
+.. c:macro:: VIDIOC_G_TUNER
+
+``int ioctl(int fd, VIDIOC_G_TUNER, struct v4l2_tuner *argp)``
-.. c:function:: int ioctl( int fd, VIDIOC_S_TUNER, const struct v4l2_tuner *argp )
- :name: VIDIOC_S_TUNER
+.. c:macro:: VIDIOC_S_TUNER
+``int ioctl(int fd, VIDIOC_S_TUNER, const struct v4l2_tuner *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_tuner`.
-
Description
===========
@@ -59,7 +59,6 @@ to zero. The term 'tuner' means SDR receiver in this context.
To change the radio frequency the
:ref:`VIDIOC_S_FREQUENCY <VIDIOC_G_FREQUENCY>` ioctl is available.
-
.. tabularcolumns:: |p{1.3cm}|p{3.0cm}|p{6.6cm}|p{6.6cm}|
.. c:type:: v4l2_tuner
@@ -183,7 +182,6 @@ To change the radio frequency the
Drivers and applications must set the array to zero.
-
.. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
.. c:type:: v4l2_tuner_type
@@ -207,7 +205,6 @@ To change the radio frequency the
- 5
- Tuner controls the RF part of a Software Digital Radio (SDR)
-
.. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
.. _tuner-capability:
@@ -299,7 +296,6 @@ To change the radio frequency the
instead of 62.5 kHz.
-
.. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
.. _tuner-rxsubchans:
@@ -338,7 +334,6 @@ To change the radio frequency the
- The tuner receives an RDS channel.
-
.. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
.. _tuner-audmode:
diff --git a/Documentation/userspace-api/media/v4l/vidioc-log-status.rst b/Documentation/userspace-api/media/v4l/vidioc-log-status.rst
index 74b06dc68109..c218747be762 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-log-status.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-log-status.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_LOG_STATUS:
@@ -11,20 +12,18 @@ Name
VIDIOC_LOG_STATUS - Log driver status information
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_LOG_STATUS)
- :name: VIDIOC_LOG_STATUS
+.. c:macro:: VIDIOC_LOG_STATUS
+``int ioctl(int fd, VIDIOC_LOG_STATUS)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
-
+ File descriptor returned by :c:func:`open()`.
Description
===========
@@ -40,7 +39,6 @@ Mismatches may give an indication where the problem is.
This ioctl is optional and not all drivers support it. It was introduced
in Linux 2.6.15.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-overlay.rst b/Documentation/userspace-api/media/v4l/vidioc-overlay.rst
index 8553fc7a6d8b..f2efaaba24c0 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-overlay.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-overlay.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_OVERLAY:
@@ -11,24 +12,22 @@ Name
VIDIOC_OVERLAY - Start or stop video overlay
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_OVERLAY, const int *argp )
- :name: VIDIOC_OVERLAY
+.. c:macro:: VIDIOC_OVERLAY
+``int ioctl(int fd, VIDIOC_OVERLAY, const int *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to an integer.
-
Description
===========
@@ -41,7 +40,6 @@ Drivers do not support :ref:`VIDIOC_STREAMON` or
:ref:`VIDIOC_STREAMOFF <VIDIOC_STREAMON>` with
``V4L2_BUF_TYPE_VIDEO_OVERLAY``.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-prepare-buf.rst b/Documentation/userspace-api/media/v4l/vidioc-prepare-buf.rst
index df003e5a65ac..45bb1eab2c2d 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-prepare-buf.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-prepare-buf.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_PREPARE_BUF:
@@ -11,24 +12,22 @@ Name
VIDIOC_PREPARE_BUF - Prepare a buffer for I/O
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_PREPARE_BUF, struct v4l2_buffer *argp )
- :name: VIDIOC_PREPARE_BUF
+.. c:macro:: VIDIOC_PREPARE_BUF
+``int ioctl(int fd, VIDIOC_PREPARE_BUF, struct v4l2_buffer *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_buffer`.
-
Description
===========
@@ -41,7 +40,6 @@ in advance saves time during the actual I/O.
The struct :c:type:`v4l2_buffer` structure is specified in
:ref:`buffer`.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-qbuf.rst b/Documentation/userspace-api/media/v4l/vidioc-qbuf.rst
index cd920d0b6adf..fbf8c5962d8a 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-qbuf.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-qbuf.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_QBUF:
@@ -11,27 +12,26 @@ Name
VIDIOC_QBUF - VIDIOC_DQBUF - Exchange a buffer with the driver
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_QBUF, struct v4l2_buffer *argp )
- :name: VIDIOC_QBUF
+.. c:macro:: VIDIOC_QBUF
+
+``int ioctl(int fd, VIDIOC_QBUF, struct v4l2_buffer *argp)``
-.. c:function:: int ioctl( int fd, VIDIOC_DQBUF, struct v4l2_buffer *argp )
- :name: VIDIOC_DQBUF
+.. c:macro:: VIDIOC_DQBUF
+``int ioctl(int fd, VIDIOC_DQBUF, struct v4l2_buffer *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_buffer`.
-
Description
===========
@@ -142,13 +142,12 @@ API is used the ``m.fd`` fields of the passed array of struct
By default ``VIDIOC_DQBUF`` blocks when no buffer is in the outgoing
queue. When the ``O_NONBLOCK`` flag was given to the
-:ref:`open() <func-open>` function, ``VIDIOC_DQBUF`` returns
+:c:func:`open()` function, ``VIDIOC_DQBUF`` returns
immediately with an ``EAGAIN`` error code when no buffer is available.
The struct :c:type:`v4l2_buffer` structure is specified in
:ref:`buffer`.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-query-dv-timings.rst b/Documentation/userspace-api/media/v4l/vidioc-query-dv-timings.rst
index 6942e7e76897..5afdc4b4dc2d 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-query-dv-timings.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-query-dv-timings.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_QUERY_DV_TIMINGS:
@@ -11,27 +12,26 @@ Name
VIDIOC_QUERY_DV_TIMINGS - VIDIOC_SUBDEV_QUERY_DV_TIMINGS - Sense the DV preset received by the current input
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_QUERY_DV_TIMINGS, struct v4l2_dv_timings *argp )
- :name: VIDIOC_QUERY_DV_TIMINGS
+.. c:macro:: VIDIOC_QUERY_DV_TIMINGS
+
+``int ioctl(int fd, VIDIOC_QUERY_DV_TIMINGS, struct v4l2_dv_timings *argp)``
-.. c:function:: int ioctl( int fd, VIDIOC_SUBDEV_QUERY_DV_TIMINGS, struct v4l2_dv_timings *argp )
- :name: VIDIOC_SUBDEV_QUERY_DV_TIMINGS
+.. c:macro:: VIDIOC_SUBDEV_QUERY_DV_TIMINGS
+``int ioctl(int fd, VIDIOC_SUBDEV_QUERY_DV_TIMINGS, struct v4l2_dv_timings *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_dv_timings`.
-
Description
===========
@@ -65,7 +65,6 @@ and returns ``ERANGE``. In that case the application can call
found timings with the hardware's capabilities in order to give more
precise feedback to the user.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-querybuf.rst b/Documentation/userspace-api/media/v4l/vidioc-querybuf.rst
index 1d7ecf2697af..6c615e893866 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-querybuf.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-querybuf.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_QUERYBUF:
@@ -11,24 +12,22 @@ Name
VIDIOC_QUERYBUF - Query the status of a buffer
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_QUERYBUF, struct v4l2_buffer *argp )
- :name: VIDIOC_QUERYBUF
+.. c:macro:: VIDIOC_QUERYBUF
+``int ioctl(int fd, VIDIOC_QUERYBUF, struct v4l2_buffer *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_buffer`.
-
Description
===========
@@ -67,7 +66,6 @@ flags, they are meaningless in this context.
The struct :c:type:`v4l2_buffer` structure is specified in
:ref:`buffer`.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-querycap.rst b/Documentation/userspace-api/media/v4l/vidioc-querycap.rst
index 80b7b79561f5..b512b1fbf9a3 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-querycap.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-querycap.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_QUERYCAP:
@@ -11,24 +12,22 @@ Name
VIDIOC_QUERYCAP - Query device capabilities
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_QUERYCAP, struct v4l2_capability *argp )
- :name: VIDIOC_QUERYCAP
+.. c:macro:: VIDIOC_QUERYCAP
+``int ioctl(int fd, VIDIOC_QUERYCAP, struct v4l2_capability *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_capability`.
-
Description
===========
@@ -39,7 +38,6 @@ pointer to a struct :c:type:`v4l2_capability` which is
filled by the driver. When the driver is not compatible with this
specification the ioctl returns an ``EINVAL`` error code.
-
.. tabularcolumns:: |p{1.5cm}|p{2.5cm}|p{13cm}|
.. c:type:: v4l2_capability
@@ -132,7 +130,6 @@ specification the ioctl returns an ``EINVAL`` error code.
zero.
-
.. tabularcolumns:: |p{6.1cm}|p{2.2cm}|p{8.7cm}|
.. _device-capabilities:
@@ -243,8 +240,8 @@ specification the ioctl returns an ``EINVAL`` error code.
- The device supports the :ref:`metadata` capture interface.
* - ``V4L2_CAP_READWRITE``
- 0x01000000
- - The device supports the :ref:`read() <rw>` and/or
- :ref:`write() <rw>` I/O methods.
+ - The device supports the :c:func:`read()` and/or
+ :c:func:`write()` I/O methods.
* - ``V4L2_CAP_ASYNCIO``
- 0x02000000
- The device supports the :ref:`asynchronous <async>` I/O methods.
@@ -269,7 +266,6 @@ specification the ioctl returns an ``EINVAL`` error code.
only appear in the ``capabilities`` field and never in the
``device_caps`` field.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-queryctrl.rst b/Documentation/userspace-api/media/v4l/vidioc-queryctrl.rst
index 559ad849f7b9..9b8716f90f12 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-queryctrl.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-queryctrl.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_QUERYCTRL:
@@ -11,31 +12,29 @@ Name
VIDIOC_QUERYCTRL - VIDIOC_QUERY_EXT_CTRL - VIDIOC_QUERYMENU - Enumerate controls and menu control items
-
Synopsis
========
-.. c:function:: int ioctl( int fd, int VIDIOC_QUERYCTRL, struct v4l2_queryctrl *argp )
- :name: VIDIOC_QUERYCTRL
+``int ioctl(int fd, int VIDIOC_QUERYCTRL, struct v4l2_queryctrl *argp)``
+
+.. c:macro:: VIDIOC_QUERY_EXT_CTRL
-.. c:function:: int ioctl( int fd, VIDIOC_QUERY_EXT_CTRL, struct v4l2_query_ext_ctrl *argp )
- :name: VIDIOC_QUERY_EXT_CTRL
+``int ioctl(int fd, VIDIOC_QUERY_EXT_CTRL, struct v4l2_query_ext_ctrl *argp)``
-.. c:function:: int ioctl( int fd, VIDIOC_QUERYMENU, struct v4l2_querymenu *argp )
- :name: VIDIOC_QUERYMENU
+.. c:macro:: VIDIOC_QUERYMENU
+``int ioctl(int fd, VIDIOC_QUERYMENU, struct v4l2_querymenu *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_queryctrl`, :c:type:`v4l2_query_ext_ctrl`
or :c:type:`v4l2_querymenu` (depending on the ioctl).
-
Description
===========
@@ -95,7 +94,6 @@ inclusive.
See also the examples in :ref:`control`.
-
.. tabularcolumns:: |p{1.2cm}|p{3.6cm}|p{12.7cm}|
.. _v4l2-queryctrl:
@@ -174,7 +172,6 @@ See also the examples in :ref:`control`.
zero.
-
.. tabularcolumns:: |p{1.2cm}|p{5.0cm}|p{11.3cm}|
.. _v4l2-query-ext-ctrl:
@@ -275,7 +272,6 @@ See also the examples in :ref:`control`.
the array to zero.
-
.. tabularcolumns:: |p{1.2cm}|p{1.0cm}|p{1.7cm}|p{13.0cm}|
.. _v4l2-querymenu:
@@ -311,7 +307,6 @@ See also the examples in :ref:`control`.
zero.
-
.. tabularcolumns:: |p{5.8cm}|p{1.4cm}|p{1.0cm}|p{1.4cm}|p{6.9cm}|
.. c:type:: v4l2_ctrl_type
@@ -582,7 +577,6 @@ See also the examples in :ref:`control`.
streaming is in progress since most drivers do not support changing
the format in that case.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-querystd.rst b/Documentation/userspace-api/media/v4l/vidioc-querystd.rst
index b043ec48cf9d..4a88287d8f61 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-querystd.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-querystd.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_QUERYSTD:
@@ -11,27 +12,26 @@ Name
VIDIOC_QUERYSTD - VIDIOC_SUBDEV_QUERYSTD - Sense the video standard received by the current input
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_QUERYSTD, v4l2_std_id *argp )
- :name: VIDIOC_QUERYSTD
+.. c:macro:: VIDIOC_QUERYSTD
+
+``int ioctl(int fd, VIDIOC_QUERYSTD, v4l2_std_id *argp)``
-.. c:function:: int ioctl( int fd, VIDIOC_SUBDEV_QUERYSTD, v4l2_std_id *argp )
- :name: VIDIOC_SUBDEV_QUERYSTD
+.. c:macro:: VIDIOC_SUBDEV_QUERYSTD
+``int ioctl(int fd, VIDIOC_SUBDEV_QUERYSTD, v4l2_std_id *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to :c:type:`v4l2_std_id`.
-
Description
===========
@@ -58,7 +58,6 @@ or output.
standard is valid they will have to stop streaming, set the new
standard, allocate new buffers and start streaming again.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-reqbufs.rst b/Documentation/userspace-api/media/v4l/vidioc-reqbufs.rst
index afc35cd7b614..c1c88e00b106 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-reqbufs.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-reqbufs.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_REQBUFS:
@@ -11,19 +12,18 @@ Name
VIDIOC_REQBUFS - Initiate Memory Mapping, User Pointer I/O or DMA buffer I/O
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_REQBUFS, struct v4l2_requestbuffers *argp )
- :name: VIDIOC_REQBUFS
+.. c:macro:: VIDIOC_REQBUFS
+``int ioctl(int fd, VIDIOC_REQBUFS, struct v4l2_requestbuffers *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_requestbuffers`.
@@ -69,7 +69,6 @@ fds are closed. A ``count`` value of zero frees or orphans all buffers, after
aborting or finishing any DMA in progress, an implicit
:ref:`VIDIOC_STREAMOFF <VIDIOC_STREAMON>`.
-
.. c:type:: v4l2_requestbuffers
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
@@ -158,7 +157,6 @@ aborting or finishing any DMA in progress, an implicit
:ref:`V4L2_BUF_FLAG_NO_CACHE_INVALIDATE <V4L2-BUF-FLAG-NO-CACHE-INVALIDATE>` and
:ref:`V4L2_BUF_FLAG_NO_CACHE_CLEAN <V4L2-BUF-FLAG-NO-CACHE-CLEAN>`.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-s-hw-freq-seek.rst b/Documentation/userspace-api/media/v4l/vidioc-s-hw-freq-seek.rst
index fb09ea31cad7..1948f31c2dbc 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-s-hw-freq-seek.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-s-hw-freq-seek.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_S_HW_FREQ_SEEK:
@@ -11,24 +12,22 @@ Name
VIDIOC_S_HW_FREQ_SEEK - Perform a hardware frequency seek
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_S_HW_FREQ_SEEK, struct v4l2_hw_freq_seek *argp )
- :name: VIDIOC_S_HW_FREQ_SEEK
+.. c:macro:: VIDIOC_S_HW_FREQ_SEEK
+``int ioctl(int fd, VIDIOC_S_HW_FREQ_SEEK, struct v4l2_hw_freq_seek *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_hw_freq_seek`.
-
Description
===========
@@ -59,7 +58,6 @@ set.
If this ioctl is called from a non-blocking filehandle, then ``EAGAIN``
error code is returned and no seek takes place.
-
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
.. c:type:: v4l2_hw_freq_seek
@@ -116,7 +114,6 @@ error code is returned and no seek takes place.
- Reserved for future extensions. Applications must set the array to
zero.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-streamon.rst b/Documentation/userspace-api/media/v4l/vidioc-streamon.rst
index d9623aa7c425..0bc86f06947b 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-streamon.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-streamon.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_STREAMON:
@@ -11,22 +12,22 @@ Name
VIDIOC_STREAMON - VIDIOC_STREAMOFF - Start or stop streaming I/O
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_STREAMON, const int *argp )
- :name: VIDIOC_STREAMON
+.. c:macro:: VIDIOC_STREAMON
+
+``int ioctl(int fd, VIDIOC_STREAMON, const int *argp)``
-.. c:function:: int ioctl( int fd, VIDIOC_STREAMOFF, const int *argp )
- :name: VIDIOC_STREAMOFF
+.. c:macro:: VIDIOC_STREAMOFF
+``int ioctl(int fd, VIDIOC_STREAMOFF, const int *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to an integer.
@@ -84,7 +85,6 @@ state as mentioned above.
no notion of starting or stopping "now". Buffer timestamps can be used
to synchronize with other events.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-subdev-enum-frame-interval.rst b/Documentation/userspace-api/media/v4l/vidioc-subdev-enum-frame-interval.rst
index 932e8416f11c..17acf3fd8396 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-subdev-enum-frame-interval.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-subdev-enum-frame-interval.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_SUBDEV_ENUM_FRAME_INTERVAL:
@@ -11,24 +12,22 @@ Name
VIDIOC_SUBDEV_ENUM_FRAME_INTERVAL - Enumerate frame intervals
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_SUBDEV_ENUM_FRAME_INTERVAL, struct v4l2_subdev_frame_interval_enum * argp )
- :name: VIDIOC_SUBDEV_ENUM_FRAME_INTERVAL
+.. c:macro:: VIDIOC_SUBDEV_ENUM_FRAME_INTERVAL
+``int ioctl(int fd, VIDIOC_SUBDEV_ENUM_FRAME_INTERVAL, struct v4l2_subdev_frame_interval_enum * argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_subdev_frame_interval_enum`.
-
Description
===========
@@ -97,7 +96,6 @@ multiple pads of the same sub-device is not defined.
- Reserved for future extensions. Applications and drivers must set
the array to zero.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-subdev-enum-frame-size.rst b/Documentation/userspace-api/media/v4l/vidioc-subdev-enum-frame-size.rst
index 3c480573df59..8016fba7023f 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-subdev-enum-frame-size.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-subdev-enum-frame-size.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_SUBDEV_ENUM_FRAME_SIZE:
@@ -11,24 +12,22 @@ Name
VIDIOC_SUBDEV_ENUM_FRAME_SIZE - Enumerate media bus frame sizes
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_SUBDEV_ENUM_FRAME_SIZE, struct v4l2_subdev_frame_size_enum * argp )
- :name: VIDIOC_SUBDEV_ENUM_FRAME_SIZE
+.. c:macro:: VIDIOC_SUBDEV_ENUM_FRAME_SIZE
+``int ioctl(int fd, VIDIOC_SUBDEV_ENUM_FRAME_SIZE, struct v4l2_subdev_frame_size_enum * argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_subdev_frame_size_enum`.
-
Description
===========
@@ -62,7 +61,6 @@ current values of V4L2 controls. See
:ref:`VIDIOC_SUBDEV_G_FMT` for more
information about try formats.
-
.. c:type:: v4l2_subdev_frame_size_enum
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
@@ -103,7 +101,6 @@ information about try formats.
- Reserved for future extensions. Applications and drivers must set
the array to zero.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-subdev-enum-mbus-code.rst b/Documentation/userspace-api/media/v4l/vidioc-subdev-enum-mbus-code.rst
index 3b6a8044c391..1fd950e34a0b 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-subdev-enum-mbus-code.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-subdev-enum-mbus-code.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_SUBDEV_ENUM_MBUS_CODE:
@@ -11,24 +12,22 @@ Name
VIDIOC_SUBDEV_ENUM_MBUS_CODE - Enumerate media bus formats
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_SUBDEV_ENUM_MBUS_CODE, struct v4l2_subdev_mbus_code_enum * argp )
- :name: VIDIOC_SUBDEV_ENUM_MBUS_CODE
+.. c:macro:: VIDIOC_SUBDEV_ENUM_MBUS_CODE
+``int ioctl(int fd, VIDIOC_SUBDEV_ENUM_MBUS_CODE, struct v4l2_subdev_mbus_code_enum * argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_subdev_mbus_code_enum`.
-
Description
===========
@@ -47,7 +46,6 @@ other pads of the sub-device, as well as on the current active links.
See :ref:`VIDIOC_SUBDEV_G_FMT` for more
information about the try formats.
-
.. c:type:: v4l2_subdev_mbus_code_enum
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
diff --git a/Documentation/userspace-api/media/v4l/vidioc-subdev-g-crop.rst b/Documentation/userspace-api/media/v4l/vidioc-subdev-g-crop.rst
index 45c988b13ba6..2d78b4f5928d 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-subdev-g-crop.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-subdev-g-crop.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_SUBDEV_G_CROP:
@@ -11,27 +12,26 @@ Name
VIDIOC_SUBDEV_G_CROP - VIDIOC_SUBDEV_S_CROP - Get or set the crop rectangle on a subdev pad
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_SUBDEV_G_CROP, struct v4l2_subdev_crop *argp )
- :name: VIDIOC_SUBDEV_G_CROP
+.. c:macro:: VIDIOC_SUBDEV_G_CROP
+
+``int ioctl(int fd, VIDIOC_SUBDEV_G_CROP, struct v4l2_subdev_crop *argp)``
-.. c:function:: int ioctl( int fd, VIDIOC_SUBDEV_S_CROP, const struct v4l2_subdev_crop *argp )
- :name: VIDIOC_SUBDEV_S_CROP
+.. c:macro:: VIDIOC_SUBDEV_S_CROP
+``int ioctl(int fd, VIDIOC_SUBDEV_S_CROP, const struct v4l2_subdev_crop *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_subdev_crop`.
-
Description
===========
@@ -76,7 +76,6 @@ rectangle doesn't match the device capabilities. They must instead
modify the rectangle to match what the hardware can provide. The
modified format should be as close as possible to the original request.
-
.. c:type:: v4l2_subdev_crop
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
@@ -101,7 +100,6 @@ modified format should be as close as possible to the original request.
- Reserved for future extensions. Applications and drivers must set
the array to zero.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-subdev-g-fmt.rst b/Documentation/userspace-api/media/v4l/vidioc-subdev-g-fmt.rst
index 76ce46f53c76..90b9bbfb61dd 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-subdev-g-fmt.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-subdev-g-fmt.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_SUBDEV_G_FMT:
@@ -11,27 +12,26 @@ Name
VIDIOC_SUBDEV_G_FMT - VIDIOC_SUBDEV_S_FMT - Get or set the data format on a subdev pad
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_SUBDEV_G_FMT, struct v4l2_subdev_format *argp )
- :name: VIDIOC_SUBDEV_G_FMT
+.. c:macro:: VIDIOC_SUBDEV_G_FMT
+
+``int ioctl(int fd, VIDIOC_SUBDEV_G_FMT, struct v4l2_subdev_format *argp)``
-.. c:function:: int ioctl( int fd, VIDIOC_SUBDEV_S_FMT, struct v4l2_subdev_format *argp )
- :name: VIDIOC_SUBDEV_S_FMT
+.. c:macro:: VIDIOC_SUBDEV_S_FMT
+``int ioctl(int fd, VIDIOC_SUBDEV_S_FMT, struct v4l2_subdev_format *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_subdev_format`.
-
Description
===========
@@ -81,7 +81,6 @@ doesn't match the device capabilities. They must instead modify the
format to match what the hardware can provide. The modified format
should be as close as possible to the original request.
-
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
.. c:type:: v4l2_subdev_format
@@ -108,7 +107,6 @@ should be as close as possible to the original request.
the array to zero.
-
.. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
.. _v4l2-subdev-format-whence:
@@ -125,7 +123,6 @@ should be as close as possible to the original request.
- 1
- Active formats, applied to the hardware.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-subdev-g-frame-interval.rst b/Documentation/userspace-api/media/v4l/vidioc-subdev-g-frame-interval.rst
index 7e0b177e70aa..3a50f8b2843d 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-subdev-g-frame-interval.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-subdev-g-frame-interval.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_SUBDEV_G_FRAME_INTERVAL:
@@ -11,27 +12,26 @@ Name
VIDIOC_SUBDEV_G_FRAME_INTERVAL - VIDIOC_SUBDEV_S_FRAME_INTERVAL - Get or set the frame interval on a subdev pad
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_SUBDEV_G_FRAME_INTERVAL, struct v4l2_subdev_frame_interval *argp )
- :name: VIDIOC_SUBDEV_G_FRAME_INTERVAL
+.. c:macro:: VIDIOC_SUBDEV_G_FRAME_INTERVAL
+
+``int ioctl(int fd, VIDIOC_SUBDEV_G_FRAME_INTERVAL, struct v4l2_subdev_frame_interval *argp)``
-.. c:function:: int ioctl( int fd, VIDIOC_SUBDEV_S_FRAME_INTERVAL, struct v4l2_subdev_frame_interval *argp )
- :name: VIDIOC_SUBDEV_S_FRAME_INTERVAL
+.. c:macro:: VIDIOC_SUBDEV_S_FRAME_INTERVAL
+``int ioctl(int fd, VIDIOC_SUBDEV_S_FRAME_INTERVAL, struct v4l2_subdev_frame_interval *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_subdev_frame_interval`.
-
Description
===========
@@ -74,7 +74,6 @@ Sub-devices that support the frame interval ioctls should implement them
on a single pad only. Their behaviour when supported on multiple pads of
the same sub-device is not defined.
-
.. c:type:: v4l2_subdev_frame_interval
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
@@ -95,7 +94,6 @@ the same sub-device is not defined.
- Reserved for future extensions. Applications and drivers must set
the array to zero.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-subdev-g-selection.rst b/Documentation/userspace-api/media/v4l/vidioc-subdev-g-selection.rst
index 948903a34d6f..f35b9562df21 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-subdev-g-selection.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-subdev-g-selection.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_SUBDEV_G_SELECTION:
@@ -11,27 +12,26 @@ Name
VIDIOC_SUBDEV_G_SELECTION - VIDIOC_SUBDEV_S_SELECTION - Get or set selection rectangles on a subdev pad
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_SUBDEV_G_SELECTION, struct v4l2_subdev_selection *argp )
- :name: VIDIOC_SUBDEV_G_SELECTION
+.. c:macro:: VIDIOC_SUBDEV_G_SELECTION
+
+``int ioctl(int fd, VIDIOC_SUBDEV_G_SELECTION, struct v4l2_subdev_selection *argp)``
-.. c:function:: int ioctl( int fd, VIDIOC_SUBDEV_S_SELECTION, struct v4l2_subdev_selection *argp )
- :name: VIDIOC_SUBDEV_S_SELECTION
+.. c:macro:: VIDIOC_SUBDEV_S_SELECTION
+``int ioctl(int fd, VIDIOC_SUBDEV_S_SELECTION, struct v4l2_subdev_selection *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_subdev_selection`.
-
Description
===========
@@ -58,7 +58,6 @@ There are two types of selection targets: actual and bounds. The actual
targets are the targets which configure the hardware. The BOUNDS target
will return a rectangle that contain all possible actual rectangles.
-
Discovering supported features
------------------------------
@@ -69,7 +68,6 @@ return ``EINVAL``.
Selection targets and flags are documented in
:ref:`v4l2-selections-common`.
-
.. c:type:: v4l2_subdev_selection
.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}|
@@ -100,7 +98,6 @@ Selection targets and flags are documented in
- Reserved for future extensions. Applications and drivers must set
the array to zero.
-
Return Value
============
diff --git a/Documentation/userspace-api/media/v4l/vidioc-subdev-querycap.rst b/Documentation/userspace-api/media/v4l/vidioc-subdev-querycap.rst
index e806385ba176..949d9775b03d 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-subdev-querycap.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-subdev-querycap.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_SUBDEV_QUERYCAP:
@@ -11,24 +12,22 @@ Name
VIDIOC_SUBDEV_QUERYCAP - Query sub-device capabilities
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_SUBDEV_QUERYCAP, struct v4l2_subdev_capability *argp )
- :name: VIDIOC_SUBDEV_QUERYCAP
+.. c:macro:: VIDIOC_SUBDEV_QUERYCAP
+``int ioctl(int fd, VIDIOC_SUBDEV_QUERYCAP, struct v4l2_subdev_capability *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_subdev_capability`.
-
Description
===========
diff --git a/Documentation/userspace-api/media/v4l/vidioc-subscribe-event.rst b/Documentation/userspace-api/media/v4l/vidioc-subscribe-event.rst
index 67827671bbaa..d1ad35164033 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-subscribe-event.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-subscribe-event.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
+.. c:namespace:: V4L
.. _VIDIOC_SUBSCRIBE_EVENT:
.. _VIDIOC_UNSUBSCRIBE_EVENT:
@@ -12,34 +13,32 @@ Name
VIDIOC_SUBSCRIBE_EVENT - VIDIOC_UNSUBSCRIBE_EVENT - Subscribe or unsubscribe event
-
Synopsis
========
-.. c:function:: int ioctl( int fd, VIDIOC_SUBSCRIBE_EVENT, struct v4l2_event_subscription *argp )
- :name: VIDIOC_SUBSCRIBE_EVENT
+.. c:macro:: VIDIOC_SUBSCRIBE_EVENT
+
+``int ioctl(int fd, VIDIOC_SUBSCRIBE_EVENT, struct v4l2_event_subscription *argp)``
-.. c:function:: int ioctl( int fd, VIDIOC_UNSUBSCRIBE_EVENT, struct v4l2_event_subscription *argp )
- :name: VIDIOC_UNSUBSCRIBE_EVENT
+.. c:macro:: VIDIOC_UNSUBSCRIBE_EVENT
+``int ioctl(int fd, VIDIOC_UNSUBSCRIBE_EVENT, struct v4l2_event_subscription *argp)``
Arguments
=========
``fd``
- File descriptor returned by :ref:`open() <func-open>`.
+ File descriptor returned by :c:func:`open()`.
``argp``
Pointer to struct :c:type:`v4l2_event_subscription`.
-
Description
===========
Subscribe or unsubscribe V4L2 event. Subscribed events are dequeued by
using the :ref:`VIDIOC_DQEVENT` ioctl.
-
.. tabularcolumns:: |p{4.6cm}|p{4.4cm}|p{8.7cm}|
.. c:type:: v4l2_event_subscription
@@ -72,7 +71,6 @@ using the :ref:`VIDIOC_DQEVENT` ioctl.
the array to zero.
-
.. tabularcolumns:: |p{6.8cm}|p{2.2cm}|p{8.5cm}|
.. _event-flags:
@@ -107,7 +105,6 @@ using the :ref:`VIDIOC_DQEVENT` ioctl.
Think carefully when you set this flag so you won't get into
situations like that.
-
Return Value
============
diff --git a/Documentation/virt/uml/user_mode_linux_howto_v2.rst b/Documentation/virt/uml/user_mode_linux_howto_v2.rst
index f70e6f5873c6..312e431695d9 100644
--- a/Documentation/virt/uml/user_mode_linux_howto_v2.rst
+++ b/Documentation/virt/uml/user_mode_linux_howto_v2.rst
@@ -679,6 +679,7 @@ Starting UML
We can now run UML.
::
+
# linux mem=2048M umid=TEST \
ubd0=Filesystem.img \
vec0:transport=tap,ifname=tap0,depth=128,gro=1 \
diff --git a/Documentation/vm/hmm.rst b/Documentation/vm/hmm.rst
index dd9f76a4ef29..09e28507f5b2 100644
--- a/Documentation/vm/hmm.rst
+++ b/Documentation/vm/hmm.rst
@@ -360,7 +360,7 @@ between device driver specific code and shared common code:
system memory page, locks the page with ``lock_page()``, and fills in the
``dst`` array entry with::
- dst[i] = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
+ dst[i] = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
Now that the driver knows that this page is being migrated, it can
invalidate device private MMU mappings and copy device private memory
diff --git a/Documentation/vm/ksm.rst b/Documentation/vm/ksm.rst
index d1b7270ad55c..9e37add068e6 100644
--- a/Documentation/vm/ksm.rst
+++ b/Documentation/vm/ksm.rst
@@ -26,7 +26,7 @@ tree.
If a KSM page is shared between less than ``max_page_sharing`` VMAs,
the node of the stable tree that represents such KSM page points to a
-list of :c:type:`struct rmap_item` and the ``page->mapping`` of the
+list of struct rmap_item and the ``page->mapping`` of the
KSM page points to the stable tree node.
When the sharing passes this threshold, KSM adds a second dimension to
diff --git a/Documentation/vm/memory-model.rst b/Documentation/vm/memory-model.rst
index 769449734573..9daadf9faba1 100644
--- a/Documentation/vm/memory-model.rst
+++ b/Documentation/vm/memory-model.rst
@@ -24,7 +24,7 @@ whether it is possible to manually override that default.
although it is still in use by several architectures.
All the memory models track the status of physical page frames using
-:c:type:`struct page` arranged in one or more arrays.
+struct page arranged in one or more arrays.
Regardless of the selected memory model, there exists one-to-one
mapping between the physical page frame number (PFN) and the
@@ -111,7 +111,7 @@ maps for non-volatile memory devices and deferred initialization of
the memory map for larger systems.
The SPARSEMEM model presents the physical memory as a collection of
-sections. A section is represented with :c:type:`struct mem_section`
+sections. A section is represented with struct mem_section
that contains `section_mem_map` that is, logically, a pointer to an
array of struct pages. However, it is stored with some other magic
that aids the sections management. The section size and maximal number
@@ -172,7 +172,7 @@ management.
The virtually mapped memory map allows storing `struct page` objects
for persistent memory devices in pre-allocated storage on those
-devices. This storage is represented with :c:type:`struct vmem_altmap`
+devices. This storage is represented with struct vmem_altmap
that is eventually passed to vmemmap_populate() through a long chain
of function calls. The vmemmap_populate() implementation may use the
`vmem_altmap` along with :c:func:`vmemmap_alloc_block_buf` helper to
diff --git a/MAINTAINERS b/MAINTAINERS
index 0f59b0412953..17ca7c8490a9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1500,7 +1500,7 @@ L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Maintained
F: Documentation/devicetree/bindings/arm/actions.yaml
F: Documentation/devicetree/bindings/clock/actions,owl-cmu.txt
-F: Documentation/devicetree/bindings/dma/owl-dma.txt
+F: Documentation/devicetree/bindings/dma/owl-dma.yaml
F: Documentation/devicetree/bindings/i2c/i2c-owl.txt
F: Documentation/devicetree/bindings/interrupt-controller/actions,owl-sirq.yaml
F: Documentation/devicetree/bindings/mmc/owl-mmc.yaml
@@ -4256,7 +4256,6 @@ F: drivers/net/ethernet/cisco/enic/
CISCO VIC LOW LATENCY NIC DRIVER
M: Christian Benvenuti <benve@cisco.com>
M: Nelson Escobar <neescoba@cisco.com>
-M: Parvi Kaustubhi <pkaustub@cisco.com>
S: Supported
F: drivers/infiniband/hw/usnic/
@@ -5232,7 +5231,6 @@ F: kernel/dma/
DMA-BUF HEAPS FRAMEWORK
M: Sumit Semwal <sumit.semwal@linaro.org>
-R: Andrew F. Davis <afd@ti.com>
R: Benjamin Gaignard <benjamin.gaignard@linaro.org>
R: Liam Mark <lmark@codeaurora.org>
R: Laura Abbott <labbott@redhat.com>
@@ -5868,7 +5866,7 @@ L: linux-renesas-soc@vger.kernel.org
S: Supported
T: git git://linuxtv.org/pinchartl/media drm/du/next
F: Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.txt
-F: Documentation/devicetree/bindings/display/bridge/renesas,lvds.txt
+F: Documentation/devicetree/bindings/display/bridge/renesas,lvds.yaml
F: Documentation/devicetree/bindings/display/renesas,du.txt
F: drivers/gpu/drm/rcar-du/
F: drivers/gpu/drm/shmobile/
@@ -6653,13 +6651,6 @@ L: iommu@lists.linux-foundation.org
S: Maintained
F: drivers/iommu/exynos-iommu.c
-EZchip NPS platform support
-M: Vineet Gupta <vgupta@synopsys.com>
-M: Ofer Levi <oferle@nvidia.com>
-S: Supported
-F: arch/arc/boot/dts/eznps.dts
-F: arch/arc/plat-eznps
-
F2FS FILE SYSTEM
M: Jaegeuk Kim <jaegeuk@kernel.org>
M: Chao Yu <yuchao0@huawei.com>
@@ -7000,7 +6991,7 @@ M: Frank Li <Frank.li@nxp.com>
L: linux-arm-kernel@lists.infradead.org
S: Maintained
F: Documentation/admin-guide/perf/imx-ddr.rst
-F: Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt
+F: Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml
F: drivers/perf/fsl_imx8_ddr_perf.c
FREESCALE IMX I2C DRIVER
@@ -7008,7 +6999,7 @@ M: Oleksij Rempel <o.rempel@pengutronix.de>
R: Pengutronix Kernel Team <kernel@pengutronix.de>
L: linux-i2c@vger.kernel.org
S: Maintained
-F: Documentation/devicetree/bindings/i2c/i2c-imx.txt
+F: Documentation/devicetree/bindings/i2c/i2c-imx.yaml
F: drivers/i2c/busses/i2c-imx.c
FREESCALE IMX LPI2C DRIVER
@@ -7016,7 +7007,7 @@ M: Dong Aisheng <aisheng.dong@nxp.com>
L: linux-i2c@vger.kernel.org
L: linux-imx@nxp.com
S: Maintained
-F: Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.txt
+F: Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml
F: drivers/i2c/busses/i2c-imx-lpi2c.c
FREESCALE QORIQ DPAA ETHERNET DRIVER
@@ -7239,7 +7230,7 @@ FUSE: FILESYSTEM IN USERSPACE
M: Miklos Szeredi <miklos@szeredi.hu>
L: linux-fsdevel@vger.kernel.org
S: Maintained
-W: http://fuse.sourceforge.net/
+W: https://github.com/libfuse/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse.git
F: Documentation/filesystems/fuse.rst
F: fs/fuse/
@@ -7793,8 +7784,8 @@ F: include/linux/cciss*.h
F: include/uapi/linux/cciss*.h
HFI1 DRIVER
-M: Mike Marciniszyn <mike.marciniszyn@intel.com>
-M: Dennis Dalessandro <dennis.dalessandro@intel.com>
+M: Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
+M: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
L: linux-rdma@vger.kernel.org
S: Supported
F: drivers/infiniband/hw/hfi1
@@ -8675,8 +8666,9 @@ INGENIC JZ47xx SoCs
M: Paul Cercueil <paul@crapouillou.net>
S: Maintained
F: arch/mips/boot/dts/ingenic/
-F: arch/mips/include/asm/mach-jz4740/
-F: arch/mips/jz4740/
+F: arch/mips/generic/board-ingenic.c
+F: arch/mips/include/asm/mach-ingenic/
+F: arch/mips/ingenic/Kconfig
F: drivers/clk/ingenic/
F: drivers/dma/dma-jz4780.c
F: drivers/gpu/drm/ingenic/
@@ -9715,7 +9707,7 @@ F: security/keys/encrypted-keys/
KEYS-TRUSTED
M: James Bottomley <jejb@linux.ibm.com>
-M: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
+M: Jarkko Sakkinen <jarkko@kernel.org>
M: Mimi Zohar <zohar@linux.ibm.com>
L: linux-integrity@vger.kernel.org
L: keyrings@vger.kernel.org
@@ -9727,7 +9719,7 @@ F: security/keys/trusted-keys/
KEYS/KEYRINGS
M: David Howells <dhowells@redhat.com>
-M: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
+M: Jarkko Sakkinen <jarkko@kernel.org>
L: keyrings@vger.kernel.org
S: Maintained
F: Documentation/security/keys/core.rst
@@ -11612,6 +11604,7 @@ M: Microchip Linux Driver Support <UNGLinuxDriver@microchip.com>
L: linux-mips@vger.kernel.org
S: Supported
F: Documentation/devicetree/bindings/mips/mscc.txt
+F: Documentation/devicetree/bindings/power/reset/ocelot-reset.txt
F: arch/mips/boot/dts/mscc/
F: arch/mips/configs/generic/board-ocelot.config
F: arch/mips/generic/board-ocelot.c
@@ -11676,7 +11669,7 @@ MIPS GENERIC PLATFORM
M: Paul Burton <paulburton@kernel.org>
L: linux-mips@vger.kernel.org
S: Supported
-F: Documentation/devicetree/bindings/power/mti,mips-cpc.txt
+F: Documentation/devicetree/bindings/power/mti,mips-cpc.yaml
F: arch/mips/generic/
F: arch/mips/tools/generic-board-config.sh
@@ -12567,7 +12560,7 @@ NXP SGTL5000 DRIVER
M: Fabio Estevam <festevam@gmail.com>
L: alsa-devel@alsa-project.org (moderated for non-subscribers)
S: Maintained
-F: Documentation/devicetree/bindings/sound/sgtl5000.txt
+F: Documentation/devicetree/bindings/sound/sgtl5000.yaml
F: sound/soc/codecs/sgtl5000*
NXP SJA1105 ETHERNET SWITCH DRIVER
@@ -12998,8 +12991,8 @@ S: Maintained
F: drivers/char/hw_random/optee-rng.c
OPA-VNIC DRIVER
-M: Dennis Dalessandro <dennis.dalessandro@intel.com>
-M: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
+M: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
+M: Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
L: linux-rdma@vger.kernel.org
S: Supported
F: drivers/infiniband/ulp/opa_vnic
@@ -14300,8 +14293,8 @@ F: drivers/firmware/qemu_fw_cfg.c
F: include/uapi/linux/qemu_fw_cfg.h
QIB DRIVER
-M: Dennis Dalessandro <dennis.dalessandro@intel.com>
-M: Mike Marciniszyn <mike.marciniszyn@intel.com>
+M: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
+M: Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
L: linux-rdma@vger.kernel.org
S: Supported
F: drivers/infiniband/hw/qib/
@@ -14726,8 +14719,8 @@ S: Maintained
F: drivers/net/ethernet/rdc/r6040.c
RDMAVT - RDMA verbs software
-M: Dennis Dalessandro <dennis.dalessandro@intel.com>
-M: Mike Marciniszyn <mike.marciniszyn@intel.com>
+M: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
+M: Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
L: linux-rdma@vger.kernel.org
S: Supported
F: drivers/infiniband/sw/rdmavt
@@ -16867,7 +16860,7 @@ SYNOPSYS DESIGNWARE DMAC DRIVER
M: Viresh Kumar <vireshk@kernel.org>
R: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
S: Maintained
-F: Documentation/devicetree/bindings/dma/snps-dma.txt
+F: Documentation/devicetree/bindings/dma/snps,dma-spear1340.yaml
F: drivers/dma/dw/
F: include/dt-bindings/dma/dw-dmac.h
F: include/linux/dma/dw.h
@@ -17398,7 +17391,7 @@ S: Maintained
F: drivers/thermal/ti-soc-thermal/
TI BQ27XXX POWER SUPPLY DRIVER
-R: Andrew F. Davis <afd@ti.com>
+R: Dan Murphy <dmurphy@ti.com>
F: drivers/power/supply/bq27xxx_battery.c
F: drivers/power/supply/bq27xxx_battery_i2c.c
F: include/linux/power/bq27xxx_battery.h
@@ -17672,8 +17665,9 @@ S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev
F: Documentation/RCU/torture.rst
F: kernel/locking/locktorture.c
-F: kernel/rcu/rcuperf.c
+F: kernel/rcu/rcuscale.c
F: kernel/rcu/rcutorture.c
+F: kernel/rcu/refscale.c
F: kernel/torture.c
TOSHIBA ACPI EXTRAS DRIVER
@@ -17717,7 +17711,7 @@ F: drivers/platform/x86/toshiba-wmi.c
TPM DEVICE DRIVER
M: Peter Huewe <peterhuewe@gmx.de>
-M: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
+M: Jarkko Sakkinen <jarkko@kernel.org>
R: Jason Gunthorpe <jgg@ziepe.ca>
L: linux-integrity@vger.kernel.org
S: Maintained
diff --git a/arch/Kconfig b/arch/Kconfig
index 8519d9f42e33..958be0531eb9 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -420,6 +420,13 @@ config MMU_GATHER_NO_GATHER
bool
depends on MMU_GATHER_TABLE_FREE
+config ARCH_WANT_IRQS_OFF_ACTIVATE_MM
+ bool
+ help
+ Temporary select until all architectures can be converted to have
+ irqs disabled over activate_mm. Architectures that do IPI based TLB
+ shootdowns should enable this.
+
config ARCH_HAVE_NMI_SAFE_CMPXCHG
bool
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index ec8bed9e7b75..ee7b01bb7346 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -479,3 +479,4 @@
547 common openat2 sys_openat2
548 common pidfd_getfd sys_pidfd_getfd
549 common faccessat2 sys_faccessat2
+550 common process_madvise sys_process_madvise
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index ba00c4e1e1c2..19f8f2367d6d 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -96,8 +96,6 @@ menu "ARC Platform/SoC/Board"
source "arch/arc/plat-tb10x/Kconfig"
source "arch/arc/plat-axs10x/Kconfig"
-#New platform adds here
-source "arch/arc/plat-eznps/Kconfig"
source "arch/arc/plat-hsdk/Kconfig"
endmenu
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index d00f8b8afd08..0c6bf0d1df7a 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -94,13 +94,8 @@ core-y += arch/arc/boot/dts/
core-y += arch/arc/plat-sim/
core-$(CONFIG_ARC_PLAT_TB10X) += arch/arc/plat-tb10x/
core-$(CONFIG_ARC_PLAT_AXS10X) += arch/arc/plat-axs10x/
-core-$(CONFIG_ARC_PLAT_EZNPS) += arch/arc/plat-eznps/
core-$(CONFIG_ARC_SOC_HSDK) += arch/arc/plat-hsdk/
-ifdef CONFIG_ARC_PLAT_EZNPS
-KBUILD_CPPFLAGS += -I$(srctree)/arch/arc/plat-eznps/include
-endif
-
drivers-$(CONFIG_OPROFILE) += arch/arc/oprofile/
libs-y += arch/arc/lib/ $(LIBGCC)
diff --git a/arch/arc/boot/dts/axc001.dtsi b/arch/arc/boot/dts/axc001.dtsi
index 79ec27c043c1..2a151607b080 100644
--- a/arch/arc/boot/dts/axc001.dtsi
+++ b/arch/arc/boot/dts/axc001.dtsi
@@ -91,7 +91,7 @@
* avoid duplicating the MB dtsi file given that IRQ from
* this intc to cpu intc are different for axs101 and axs103
*/
- mb_intc: dw-apb-ictl@e0012000 {
+ mb_intc: interrupt-controller@e0012000 {
#interrupt-cells = <1>;
compatible = "snps,dw-apb-ictl";
reg = < 0x0 0xe0012000 0x0 0x200 >;
diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi
index ac8e1b463a70..cd1edcf4f95e 100644
--- a/arch/arc/boot/dts/axc003.dtsi
+++ b/arch/arc/boot/dts/axc003.dtsi
@@ -129,7 +129,7 @@
* avoid duplicating the MB dtsi file given that IRQ from
* this intc to cpu intc are different for axs101 and axs103
*/
- mb_intc: dw-apb-ictl@e0012000 {
+ mb_intc: interrupt-controller@e0012000 {
#interrupt-cells = <1>;
compatible = "snps,dw-apb-ictl";
reg = < 0x0 0xe0012000 0x0 0x200 >;
diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi
index 9da21e7fd246..70779386ca79 100644
--- a/arch/arc/boot/dts/axc003_idu.dtsi
+++ b/arch/arc/boot/dts/axc003_idu.dtsi
@@ -135,7 +135,7 @@
* avoid duplicating the MB dtsi file given that IRQ from
* this intc to cpu intc are different for axs101 and axs103
*/
- mb_intc: dw-apb-ictl@e0012000 {
+ mb_intc: interrupt-controller@e0012000 {
#interrupt-cells = <1>;
compatible = "snps,dw-apb-ictl";
reg = < 0x0 0xe0012000 0x0 0x200 >;
diff --git a/arch/arc/boot/dts/eznps.dts b/arch/arc/boot/dts/eznps.dts
deleted file mode 100644
index a7e2e8d8ff06..000000000000
--- a/arch/arc/boot/dts/eznps.dts
+++ /dev/null
@@ -1,84 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright(c) 2015 EZchip Technologies.
- */
-
-/dts-v1/;
-
-/ {
- compatible = "ezchip,arc-nps";
- #address-cells = <1>;
- #size-cells = <1>;
- interrupt-parent = <&intc>;
- present-cpus = "0-1,16-17";
- possible-cpus = "0-4095";
-
- aliases {
- ethernet0 = &gmac0;
- };
-
- chosen {
- bootargs = "earlycon=uart8250,mmio32be,0xf7209000,115200n8 console=ttyS0,115200n8";
- };
-
- memory {
- device_type = "memory";
- reg = <0x80000000 0x20000000>; /* 512M */
- };
-
- clocks {
- sysclk: sysclk {
- compatible = "fixed-clock";
- #clock-cells = <0>;
- clock-frequency = <83333333>;
- };
- };
-
- soc {
- compatible = "simple-bus";
- #address-cells = <1>;
- #size-cells = <1>;
-
- /* child and parent address space 1:1 mapped */
- ranges;
-
- intc: interrupt-controller {
- compatible = "ezchip,nps400-ic";
- interrupt-controller;
- #interrupt-cells = <1>;
- };
-
- timer0: timer_clkevt {
- compatible = "snps,arc-timer";
- interrupts = <3>;
- clocks = <&sysclk>;
- };
-
- timer1: timer_clksrc {
- compatible = "ezchip,nps400-timer";
- clocks = <&sysclk>;
- clock-names="sysclk";
- };
-
- uart@f7209000 {
- compatible = "snps,dw-apb-uart";
- device_type = "serial";
- reg = <0xf7209000 0x100>;
- interrupts = <6>;
- clocks = <&sysclk>;
- clock-names="baudclk";
- baud = <115200>;
- reg-shift = <2>;
- reg-io-width = <4>;
- native-endian;
- };
-
- gmac0: ethernet@f7470000 {
- compatible = "ezchip,nps-mgt-enet";
- reg = <0xf7470000 0x1940>;
- interrupts = <7>;
- /* Filled in by U-Boot */
- mac-address = [ 00 C0 00 F0 04 03 ];
- };
- };
-};
diff --git a/arch/arc/boot/dts/vdk_axc003.dtsi b/arch/arc/boot/dts/vdk_axc003.dtsi
index f8be7ba8dad4..c21d0eb07bf6 100644
--- a/arch/arc/boot/dts/vdk_axc003.dtsi
+++ b/arch/arc/boot/dts/vdk_axc003.dtsi
@@ -46,7 +46,7 @@
};
- mb_intc: dw-apb-ictl@e0012000 {
+ mb_intc: interrupt-controller@e0012000 {
#interrupt-cells = <1>;
compatible = "snps,dw-apb-ictl";
reg = < 0xe0012000 0x200 >;
diff --git a/arch/arc/boot/dts/vdk_axc003_idu.dtsi b/arch/arc/boot/dts/vdk_axc003_idu.dtsi
index 0afa3e53a4e3..4d348853ac7c 100644
--- a/arch/arc/boot/dts/vdk_axc003_idu.dtsi
+++ b/arch/arc/boot/dts/vdk_axc003_idu.dtsi
@@ -54,7 +54,7 @@
};
- mb_intc: dw-apb-ictl@e0012000 {
+ mb_intc: interrupt-controller@e0012000 {
#interrupt-cells = <1>;
compatible = "snps,dw-apb-ictl";
reg = < 0xe0012000 0x200 >;
diff --git a/arch/arc/configs/nps_defconfig b/arch/arc/configs/nps_defconfig
deleted file mode 100644
index f7a978dfdf1d..000000000000
--- a/arch/arc/configs/nps_defconfig
+++ /dev/null
@@ -1,80 +0,0 @@
-# CONFIG_LOCALVERSION_AUTO is not set
-# CONFIG_SWAP is not set
-CONFIG_SYSVIPC=y
-CONFIG_NO_HZ_IDLE=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
-# CONFIG_EPOLL is not set
-# CONFIG_SIGNALFD is not set
-# CONFIG_TIMERFD is not set
-# CONFIG_EVENTFD is not set
-# CONFIG_AIO is not set
-CONFIG_EMBEDDED=y
-CONFIG_PERF_EVENTS=y
-# CONFIG_COMPAT_BRK is not set
-CONFIG_ISA_ARCOMPACT=y
-CONFIG_KPROBES=y
-CONFIG_MODULES=y
-CONFIG_MODULE_FORCE_LOAD=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_ARC_PLAT_EZNPS=y
-CONFIG_SMP=y
-CONFIG_NR_CPUS=4096
-CONFIG_ARC_CACHE_LINE_SHIFT=5
-# CONFIG_ARC_CACHE_PAGES is not set
-# CONFIG_ARC_HAS_LLSC is not set
-CONFIG_ARC_KVADDR_SIZE=402
-CONFIG_ARC_EMUL_UNALIGNED=y
-CONFIG_PREEMPT=y
-CONFIG_NET=y
-CONFIG_UNIX=y
-CONFIG_INET=y
-CONFIG_IP_PNP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_DIAG is not set
-# CONFIG_IPV6 is not set
-# CONFIG_WIRELESS is not set
-CONFIG_DEVTMPFS=y
-CONFIG_DEVTMPFS_MOUNT=y
-# CONFIG_PREVENT_FIRMWARE_BUILD is not set
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_COUNT=1
-CONFIG_BLK_DEV_RAM_SIZE=2048
-CONFIG_NETDEVICES=y
-CONFIG_NETCONSOLE=y
-# CONFIG_NET_VENDOR_BROADCOM is not set
-# CONFIG_NET_VENDOR_MICREL is not set
-# CONFIG_NET_VENDOR_STMICRO is not set
-# CONFIG_WLAN is not set
-# CONFIG_INPUT_MOUSEDEV is not set
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
-# CONFIG_LEGACY_PTYS is not set
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_NR_UARTS=1
-CONFIG_SERIAL_8250_RUNTIME_UARTS=1
-CONFIG_SERIAL_8250_DW=y
-CONFIG_SERIAL_OF_PLATFORM=y
-# CONFIG_HW_RANDOM is not set
-# CONFIG_HWMON is not set
-# CONFIG_USB_SUPPORT is not set
-# CONFIG_DNOTIFY is not set
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-# CONFIG_MISC_FILESYSTEMS is not set
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3_ACL=y
-CONFIG_ROOT_NFS=y
-CONFIG_DEBUG_INFO=y
-# CONFIG_ENABLE_MUST_CHECK is not set
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_MEMORY_INIT=y
-CONFIG_ENABLE_DEFAULT_TRACERS=y
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index c614857eb209..5afc79c9b2f5 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -14,8 +14,6 @@
#include <asm/barrier.h>
#include <asm/smp.h>
-#ifndef CONFIG_ARC_PLAT_EZNPS
-
#define atomic_read(v) READ_ONCE((v)->counter)
#ifdef CONFIG_ARC_HAS_LLSC
@@ -45,7 +43,7 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
\
/* \
* Explicit full memory barrier needed before/after as \
- * LLOCK/SCOND thmeselves don't provide any such semantics \
+ * LLOCK/SCOND themselves don't provide any such semantics \
*/ \
smp_mb(); \
\
@@ -71,7 +69,7 @@ static inline int atomic_fetch_##op(int i, atomic_t *v) \
\
/* \
* Explicit full memory barrier needed before/after as \
- * LLOCK/SCOND thmeselves don't provide any such semantics \
+ * LLOCK/SCOND themselves don't provide any such semantics \
*/ \
smp_mb(); \
\
@@ -195,108 +193,6 @@ ATOMIC_OPS(andnot, &= ~, bic)
ATOMIC_OPS(or, |=, or)
ATOMIC_OPS(xor, ^=, xor)
-#else /* CONFIG_ARC_PLAT_EZNPS */
-
-static inline int atomic_read(const atomic_t *v)
-{
- int temp;
-
- __asm__ __volatile__(
- " ld.di %0, [%1]"
- : "=r"(temp)
- : "r"(&v->counter)
- : "memory");
- return temp;
-}
-
-static inline void atomic_set(atomic_t *v, int i)
-{
- __asm__ __volatile__(
- " st.di %0,[%1]"
- :
- : "r"(i), "r"(&v->counter)
- : "memory");
-}
-
-#define ATOMIC_OP(op, c_op, asm_op) \
-static inline void atomic_##op(int i, atomic_t *v) \
-{ \
- __asm__ __volatile__( \
- " mov r2, %0\n" \
- " mov r3, %1\n" \
- " .word %2\n" \
- : \
- : "r"(i), "r"(&v->counter), "i"(asm_op) \
- : "r2", "r3", "memory"); \
-} \
-
-#define ATOMIC_OP_RETURN(op, c_op, asm_op) \
-static inline int atomic_##op##_return(int i, atomic_t *v) \
-{ \
- unsigned int temp = i; \
- \
- /* Explicit full memory barrier needed before/after */ \
- smp_mb(); \
- \
- __asm__ __volatile__( \
- " mov r2, %0\n" \
- " mov r3, %1\n" \
- " .word %2\n" \
- " mov %0, r2" \
- : "+r"(temp) \
- : "r"(&v->counter), "i"(asm_op) \
- : "r2", "r3", "memory"); \
- \
- smp_mb(); \
- \
- temp c_op i; \
- \
- return temp; \
-}
-
-#define ATOMIC_FETCH_OP(op, c_op, asm_op) \
-static inline int atomic_fetch_##op(int i, atomic_t *v) \
-{ \
- unsigned int temp = i; \
- \
- /* Explicit full memory barrier needed before/after */ \
- smp_mb(); \
- \
- __asm__ __volatile__( \
- " mov r2, %0\n" \
- " mov r3, %1\n" \
- " .word %2\n" \
- " mov %0, r2" \
- : "+r"(temp) \
- : "r"(&v->counter), "i"(asm_op) \
- : "r2", "r3", "memory"); \
- \
- smp_mb(); \
- \
- return temp; \
-}
-
-#define ATOMIC_OPS(op, c_op, asm_op) \
- ATOMIC_OP(op, c_op, asm_op) \
- ATOMIC_OP_RETURN(op, c_op, asm_op) \
- ATOMIC_FETCH_OP(op, c_op, asm_op)
-
-ATOMIC_OPS(add, +=, CTOP_INST_AADD_DI_R2_R2_R3)
-#define atomic_sub(i, v) atomic_add(-(i), (v))
-#define atomic_sub_return(i, v) atomic_add_return(-(i), (v))
-#define atomic_fetch_sub(i, v) atomic_fetch_add(-(i), (v))
-
-#undef ATOMIC_OPS
-#define ATOMIC_OPS(op, c_op, asm_op) \
- ATOMIC_OP(op, c_op, asm_op) \
- ATOMIC_FETCH_OP(op, c_op, asm_op)
-
-ATOMIC_OPS(and, &=, CTOP_INST_AAND_DI_R2_R2_R3)
-ATOMIC_OPS(or, |=, CTOP_INST_AOR_DI_R2_R2_R3)
-ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
-
-#endif /* CONFIG_ARC_PLAT_EZNPS */
-
#undef ATOMIC_OPS
#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
diff --git a/arch/arc/include/asm/barrier.h b/arch/arc/include/asm/barrier.h
index 7823811e7cf5..4637de9e02fa 100644
--- a/arch/arc/include/asm/barrier.h
+++ b/arch/arc/include/asm/barrier.h
@@ -27,7 +27,7 @@
#define rmb() asm volatile("dmb 1\n" : : : "memory")
#define wmb() asm volatile("dmb 2\n" : : : "memory")
-#elif !defined(CONFIG_ARC_PLAT_EZNPS) /* CONFIG_ISA_ARCOMPACT */
+#else
/*
* ARCompact based cores (ARC700) only have SYNC instruction which is super
@@ -37,13 +37,6 @@
#define mb() asm volatile("sync\n" : : : "memory")
-#else /* CONFIG_ARC_PLAT_EZNPS */
-
-#include <plat/ctop.h>
-
-#define mb() asm volatile (".word %0" : : "i"(CTOP_INST_SCHD_RW) : "memory")
-#define rmb() asm volatile (".word %0" : : "i"(CTOP_INST_SCHD_RD) : "memory")
-
#endif
#include <asm-generic/barrier.h>
diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h
index 50eb3f64a77c..c6606f4d20d6 100644
--- a/arch/arc/include/asm/bitops.h
+++ b/arch/arc/include/asm/bitops.h
@@ -85,7 +85,7 @@ static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *
return (old & (1 << nr)) != 0; \
}
-#elif !defined(CONFIG_ARC_PLAT_EZNPS)
+#else /* !CONFIG_ARC_HAS_LLSC */
/*
* Non hardware assisted Atomic-R-M-W
@@ -136,55 +136,7 @@ static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *
return (old & (1UL << (nr & 0x1f))) != 0; \
}
-#else /* CONFIG_ARC_PLAT_EZNPS */
-
-#define BIT_OP(op, c_op, asm_op) \
-static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\
-{ \
- m += nr >> 5; \
- \
- nr = (1UL << (nr & 0x1f)); \
- if (asm_op == CTOP_INST_AAND_DI_R2_R2_R3) \
- nr = ~nr; \
- \
- __asm__ __volatile__( \
- " mov r2, %0\n" \
- " mov r3, %1\n" \
- " .word %2\n" \
- : \
- : "r"(nr), "r"(m), "i"(asm_op) \
- : "r2", "r3", "memory"); \
-}
-
-#define TEST_N_BIT_OP(op, c_op, asm_op) \
-static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
-{ \
- unsigned long old; \
- \
- m += nr >> 5; \
- \
- nr = old = (1UL << (nr & 0x1f)); \
- if (asm_op == CTOP_INST_AAND_DI_R2_R2_R3) \
- old = ~old; \
- \
- /* Explicit full memory barrier needed before/after */ \
- smp_mb(); \
- \
- __asm__ __volatile__( \
- " mov r2, %0\n" \
- " mov r3, %1\n" \
- " .word %2\n" \
- " mov %0, r2" \
- : "+r"(old) \
- : "r"(m), "i"(asm_op) \
- : "r2", "r3", "memory"); \
- \
- smp_mb(); \
- \
- return (old & nr) != 0; \
-}
-
-#endif /* CONFIG_ARC_PLAT_EZNPS */
+#endif
/***************************************
* Non atomic variants
@@ -226,15 +178,9 @@ static inline int __test_and_##op##_bit(unsigned long nr, volatile unsigned long
/* __test_and_set_bit(), __test_and_clear_bit(), __test_and_change_bit() */\
__TEST_N_BIT_OP(op, c_op, asm_op)
-#ifndef CONFIG_ARC_PLAT_EZNPS
BIT_OPS(set, |, bset)
BIT_OPS(clear, & ~, bclr)
BIT_OPS(change, ^, bxor)
-#else
-BIT_OPS(set, |, CTOP_INST_AOR_DI_R2_R2_R3)
-BIT_OPS(clear, & ~, CTOP_INST_AAND_DI_R2_R2_R3)
-BIT_OPS(change, ^, CTOP_INST_AXOR_DI_R2_R2_R3)
-#endif
/*
* This routine doesn't need to be atomic.
diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h
index c11398160240..9b87e162e539 100644
--- a/arch/arc/include/asm/cmpxchg.h
+++ b/arch/arc/include/asm/cmpxchg.h
@@ -20,7 +20,7 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
/*
* Explicit full memory barrier needed before/after as
- * LLOCK/SCOND thmeselves don't provide any such semantics
+ * LLOCK/SCOND themselves don't provide any such semantics
*/
smp_mb();
@@ -41,7 +41,7 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
return prev;
}
-#elif !defined(CONFIG_ARC_PLAT_EZNPS)
+#else /* !CONFIG_ARC_HAS_LLSC */
static inline unsigned long
__cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
@@ -61,33 +61,7 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
return prev;
}
-#else /* CONFIG_ARC_PLAT_EZNPS */
-
-static inline unsigned long
-__cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
-{
- /*
- * Explicit full memory barrier needed before/after
- */
- smp_mb();
-
- write_aux_reg(CTOP_AUX_GPA1, expected);
-
- __asm__ __volatile__(
- " mov r2, %0\n"
- " mov r3, %1\n"
- " .word %2\n"
- " mov %0, r2"
- : "+r"(new)
- : "r"(ptr), "i"(CTOP_INST_EXC_DI_R2_R2_R3)
- : "r2", "r3", "memory");
-
- smp_mb();
-
- return new;
-}
-
-#endif /* CONFIG_ARC_HAS_LLSC */
+#endif
#define cmpxchg(ptr, o, n) ({ \
(typeof(*(ptr)))__cmpxchg((ptr), \
@@ -104,8 +78,6 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
-#ifndef CONFIG_ARC_PLAT_EZNPS
-
/*
* xchg (reg with memory) based on "Native atomic" EX insn
*/
@@ -168,44 +140,6 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
#endif
-#else /* CONFIG_ARC_PLAT_EZNPS */
-
-static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
- int size)
-{
- extern unsigned long __xchg_bad_pointer(void);
-
- switch (size) {
- case 4:
- /*
- * Explicit full memory barrier needed before/after
- */
- smp_mb();
-
- __asm__ __volatile__(
- " mov r2, %0\n"
- " mov r3, %1\n"
- " .word %2\n"
- " mov %0, r2\n"
- : "+r"(val)
- : "r"(ptr), "i"(CTOP_INST_XEX_DI_R2_R2_R3)
- : "r2", "r3", "memory");
-
- smp_mb();
-
- return val;
- }
- return __xchg_bad_pointer();
-}
-
-#define xchg(ptr, with) ({ \
- (typeof(*(ptr)))__xchg((unsigned long)(with), \
- (ptr), \
- sizeof(*(ptr))); \
-})
-
-#endif /* CONFIG_ARC_PLAT_EZNPS */
-
/*
* "atomic" variant of xchg()
* REQ: It needs to follow the same serialization rules as other atomic_xxx()
diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h
index c3aa775878dc..6dbf5cecc8cc 100644
--- a/arch/arc/include/asm/entry-compact.h
+++ b/arch/arc/include/asm/entry-compact.h
@@ -33,10 +33,6 @@
#include <asm/irqflags-compact.h>
#include <asm/thread_info.h> /* For THREAD_SIZE */
-#ifdef CONFIG_ARC_PLAT_EZNPS
-#include <plat/ctop.h>
-#endif
-
/*--------------------------------------------------------------
* Switch to Kernel Mode stack if SP points to User Mode stack
*
@@ -189,12 +185,6 @@
PUSHAX lp_start
PUSHAX erbta
-#ifdef CONFIG_ARC_PLAT_EZNPS
- .word CTOP_INST_SCHD_RW
- PUSHAX CTOP_AUX_GPA1
- PUSHAX CTOP_AUX_EFLAGS
-#endif
-
lr r10, [ecr]
st r10, [sp, PT_event] /* EV_Trap expects r10 to have ECR */
.endm
@@ -211,11 +201,6 @@
* by hardware and that is not good.
*-------------------------------------------------------------*/
.macro EXCEPTION_EPILOGUE
-#ifdef CONFIG_ARC_PLAT_EZNPS
- .word CTOP_INST_SCHD_RW
- POPAX CTOP_AUX_EFLAGS
- POPAX CTOP_AUX_GPA1
-#endif
POPAX erbta
POPAX lp_start
@@ -278,11 +263,6 @@
PUSHAX lp_start
PUSHAX bta_l\LVL\()
-#ifdef CONFIG_ARC_PLAT_EZNPS
- .word CTOP_INST_SCHD_RW
- PUSHAX CTOP_AUX_GPA1
- PUSHAX CTOP_AUX_EFLAGS
-#endif
.endm
/*--------------------------------------------------------------
@@ -295,11 +275,6 @@
* by hardware and that is not good.
*-------------------------------------------------------------*/
.macro INTERRUPT_EPILOGUE LVL
-#ifdef CONFIG_ARC_PLAT_EZNPS
- .word CTOP_INST_SCHD_RW
- POPAX CTOP_AUX_EFLAGS
- POPAX CTOP_AUX_GPA1
-#endif
POPAX bta_l\LVL\()
POPAX lp_start
@@ -327,13 +302,11 @@
bic \reg, sp, (THREAD_SIZE - 1)
.endm
-#ifndef CONFIG_ARC_PLAT_EZNPS
/* Get CPU-ID of this core */
.macro GET_CPU_ID reg
lr \reg, [identity]
lsr \reg, \reg, 8
bmsk \reg, \reg, 7
.endm
-#endif
#endif /* __ASM_ARC_ENTRY_COMPACT_H */
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index 0fcea5bad343..e4031ecd3c8c 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -17,13 +17,6 @@
#include <asm/dsp.h>
#include <asm/fpu.h>
-#ifdef CONFIG_ARC_PLAT_EZNPS
-struct eznps_dp {
- unsigned int eflags;
- unsigned int gpa1;
-};
-#endif
-
/* Arch specific stuff which needs to be saved per task.
* However these items are not so important so as to earn a place in
* struct thread_info
@@ -38,9 +31,6 @@ struct thread_struct {
#ifdef CONFIG_ARC_FPU_SAVE_RESTORE
struct arc_fpu fpu;
#endif
-#ifdef CONFIG_ARC_PLAT_EZNPS
- struct eznps_dp dp;
-#endif
};
#define INIT_THREAD { \
@@ -60,17 +50,8 @@ struct task_struct;
* A lot of busy-wait loops in SMP are based off of non-volatile data otherwise
* get optimised away by gcc
*/
-#ifndef CONFIG_EZNPS_MTM_EXT
-
#define cpu_relax() barrier()
-#else
-
-#define cpu_relax() \
- __asm__ __volatile__ (".word %0" : : "i"(CTOP_INST_SCHD_RW) : "memory")
-
-#endif
-
#define KSTK_EIP(tsk) (task_pt_regs(tsk)->ret)
#define KSTK_ESP(tsk) (task_pt_regs(tsk)->sp)
@@ -118,25 +99,7 @@ extern unsigned int get_wchan(struct task_struct *p);
#define USER_KERNEL_GUTTER (VMALLOC_START - TASK_SIZE)
-#ifdef CONFIG_ARC_PLAT_EZNPS
-/* NPS architecture defines special window of 129M in user address space for
- * special memory areas, when accessing this window the MMU do not use TLB.
- * Instead MMU direct the access to:
- * 0x57f00000:0x57ffffff -- 1M of closely coupled memory (aka CMEM)
- * 0x58000000:0x5fffffff -- 16 huge pages, 8M each, with fixed map (aka FMTs)
- *
- * CMEM - is the fastest memory we got and its size is 16K.
- * FMT - is used to map either to internal/external memory.
- * Internal memory is the second fast memory and its size is 16M
- * External memory is the biggest memory (16G) and also the slowest.
- *
- * STACK_TOP need to be PMD align (21bit) that is why we supply 0x57e00000.
- */
-#define STACK_TOP 0x57e00000
-#else
#define STACK_TOP TASK_SIZE
-#endif
-
#define STACK_TOP_MAX STACK_TOP
/* This decides where the kernel will search for a free chunk of vm
diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h
index 2fdb87addadc..4c3c9be5bd16 100644
--- a/arch/arc/include/asm/ptrace.h
+++ b/arch/arc/include/asm/ptrace.h
@@ -16,11 +16,6 @@
#ifdef CONFIG_ISA_ARCOMPACT
struct pt_regs {
-#ifdef CONFIG_ARC_PLAT_EZNPS
- unsigned long eflags; /* Extended FLAGS */
- unsigned long gpa1; /* General Purpose Aux */
-#endif
-
/* Real registers */
unsigned long bta; /* bta_l1, bta_l2, erbta */
diff --git a/arch/arc/include/asm/setup.h b/arch/arc/include/asm/setup.h
index 61a97fe70b86..01f85478170d 100644
--- a/arch/arc/include/asm/setup.h
+++ b/arch/arc/include/asm/setup.h
@@ -9,11 +9,7 @@
#include <linux/types.h>
#include <uapi/asm/setup.h>
-#ifdef CONFIG_ARC_PLAT_EZNPS
-#define COMMAND_LINE_SIZE 2048
-#else
#define COMMAND_LINE_SIZE 256
-#endif
/*
* Data structure to map a ID to string
diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h
index 94bbed88e3fc..192871608925 100644
--- a/arch/arc/include/asm/spinlock.h
+++ b/arch/arc/include/asm/spinlock.h
@@ -232,15 +232,9 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
__asm__ __volatile__(
"1: ex %0, [%1] \n"
-#ifdef CONFIG_EZNPS_MTM_EXT
- " .word %3 \n"
-#endif
" breq %0, %2, 1b \n"
: "+&r" (val)
: "r"(&(lock->slock)), "ir"(__ARCH_SPIN_LOCK_LOCKED__)
-#ifdef CONFIG_EZNPS_MTM_EXT
- , "i"(CTOP_INST_SCHD_RW)
-#endif
: "memory");
smp_mb();
diff --git a/arch/arc/include/asm/switch_to.h b/arch/arc/include/asm/switch_to.h
index 4a3d67989d19..1f85de8288b1 100644
--- a/arch/arc/include/asm/switch_to.h
+++ b/arch/arc/include/asm/switch_to.h
@@ -12,19 +12,10 @@
#include <asm/dsp-impl.h>
#include <asm/fpu.h>
-#ifdef CONFIG_ARC_PLAT_EZNPS
-extern void dp_save_restore(struct task_struct *p, struct task_struct *n);
-#define ARC_EZNPS_DP_PREV(p, n) dp_save_restore(p, n)
-#else
-#define ARC_EZNPS_DP_PREV(p, n)
-
-#endif /* !CONFIG_ARC_PLAT_EZNPS */
-
struct task_struct *__switch_to(struct task_struct *p, struct task_struct *n);
#define switch_to(prev, next, last) \
do { \
- ARC_EZNPS_DP_PREV(prev, next); \
dsp_save_restore(prev, next); \
fpu_save_restore(prev, next); \
last = __switch_to(prev, next);\
diff --git a/arch/arc/kernel/ctx_sw.c b/arch/arc/kernel/ctx_sw.c
index e172c3333a84..1a76f2d6f694 100644
--- a/arch/arc/kernel/ctx_sw.c
+++ b/arch/arc/kernel/ctx_sw.c
@@ -14,9 +14,6 @@
#include <asm/asm-offsets.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
-#ifdef CONFIG_ARC_PLAT_EZNPS
-#include <plat/ctop.h>
-#endif
#define KSP_WORD_OFF ((TASK_THREAD + THREAD_KSP) / 4)
@@ -68,16 +65,9 @@ __switch_to(struct task_struct *prev_task, struct task_struct *next_task)
#ifndef CONFIG_SMP
"st %2, [@_current_task] \n\t"
#else
-#ifdef CONFIG_ARC_PLAT_EZNPS
- "lr r24, [%4] \n\t"
-#ifndef CONFIG_EZNPS_MTM_EXT
- "lsr r24, r24, 4 \n\t"
-#endif
-#else
"lr r24, [identity] \n\t"
"lsr r24, r24, 8 \n\t"
"bmsk r24, r24, 7 \n\t"
-#endif
"add2 r24, @_current_task, r24 \n\t"
"st %2, [r24] \n\t"
#endif
@@ -115,9 +105,6 @@ __switch_to(struct task_struct *prev_task, struct task_struct *next_task)
: "=r"(tmp)
: "n"(KSP_WORD_OFF), "r"(next), "r"(prev)
-#ifdef CONFIG_ARC_PLAT_EZNPS
- , "i"(CTOP_AUX_LOGIC_GLOBAL_ID)
-#endif
: "blink"
);
diff --git a/arch/arc/kernel/devtree.c b/arch/arc/kernel/devtree.c
index fa86d13df5ed..721d465f1580 100644
--- a/arch/arc/kernel/devtree.c
+++ b/arch/arc/kernel/devtree.c
@@ -29,8 +29,6 @@ static void __init arc_set_early_base_baud(unsigned long dt_root)
else if (of_flat_dt_is_compatible(dt_root, "snps,arc-sdp") ||
of_flat_dt_is_compatible(dt_root, "snps,hsdk"))
arc_base_baud = 33333333; /* Fixed 33MHz clk (AXS10x & HSDK) */
- else if (of_flat_dt_is_compatible(dt_root, "ezchip,arc-nps"))
- arc_base_baud = 800000000; /* Fixed 800MHz clk (NPS) */
else
arc_base_baud = 50000000; /* Fixed default 50MHz */
}
diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
index efeba1fe7252..37f724ad5e39 100644
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@@ -116,17 +116,6 @@ void arch_cpu_idle(void)
:"I"(arg)); /* can't be "r" has to be embedded const */
}
-#elif defined(CONFIG_EZNPS_MTM_EXT) /* ARC700 variant in NPS */
-
-void arch_cpu_idle(void)
-{
- /* only the calling HW thread needs to sleep */
- __asm__ __volatile__(
- ".word %0 \n"
- :
- :"i"(CTOP_INST_HWSCHD_WFT_IE12));
-}
-
#else /* ARC700 */
void arch_cpu_idle(void)
@@ -278,10 +267,6 @@ void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long usp)
*/
regs->status32 = STATUS_U_MASK | STATUS_L_MASK | ISA_INIT_STATUS_BITS;
-#ifdef CONFIG_EZNPS_MTM_EXT
- regs->eflags = 0;
-#endif
-
fpu_init_task(regs);
/* bogus seed values for debugging */
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index eca35e02ce06..52906d314537 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -226,7 +226,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
}
if (!cpu_online(cpu)) {
- pr_info("Timeout: CPU%u FAILED to comeup !!!\n", cpu);
+ pr_info("Timeout: CPU%u FAILED to come up !!!\n", cpu);
return -1;
}
diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
index 31f54bdd95f2..062fae46c3f8 100644
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -281,13 +281,6 @@ ex_saved_reg1:
.macro COMMIT_ENTRY_TO_MMU
#if (CONFIG_ARC_MMU_VER < 4)
-#ifdef CONFIG_EZNPS_MTM_EXT
- /* verify if entry for this vaddr+ASID already exists */
- sr TLBProbe, [ARC_REG_TLBCOMMAND]
- lr r0, [ARC_REG_TLBINDEX]
- bbit0 r0, 31, 88f
-#endif
-
/* Get free TLB slot: Set = computed from vaddr, way = random */
sr TLBGetIndex, [ARC_REG_TLBCOMMAND]
diff --git a/arch/arc/plat-eznps/Kconfig b/arch/arc/plat-eznps/Kconfig
deleted file mode 100644
index a645bca5899a..000000000000
--- a/arch/arc/plat-eznps/Kconfig
+++ /dev/null
@@ -1,58 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# For a description of the syntax of this configuration file,
-# see Documentation/kbuild/kconfig-language.rst.
-#
-
-menuconfig ARC_PLAT_EZNPS
- bool "\"EZchip\" ARC dev platform"
- depends on ISA_ARCOMPACT
- select CPU_BIG_ENDIAN
- select CLKSRC_NPS if !PHYS_ADDR_T_64BIT
- select EZNPS_GIC
- select EZCHIP_NPS_MANAGEMENT_ENET if ETHERNET
- help
- Support for EZchip development platforms,
- based on ARC700 cores.
- We handle few flavors:
- - Hardware Emulator AKA HE which is FPGA based chassis
- - Simulator based on MetaWare nSIM
- - NPS400 chip based on ASIC
-
-config EZNPS_MTM_EXT
- bool "ARC-EZchip MTM Extensions"
- select CPUMASK_OFFSTACK
- depends on ARC_PLAT_EZNPS && SMP
- default y
- help
- Here we add new hierarchy for CPUs topology.
- We got:
- Core
- Thread
- At the new thread level each CPU represent one HW thread.
- At highest hierarchy each core contain 16 threads,
- any of them seem like CPU from Linux point of view.
- All threads within same core share the execution unit of the
- core and HW scheduler round robin between them.
-
-config EZNPS_MEM_ERROR_ALIGN
- bool "ARC-EZchip Memory error as an exception"
- depends on EZNPS_MTM_EXT
- default n
- help
- On the real chip of the NPS, user memory errors are handled
- as a machine check exception, which is fatal, whereas on
- simulator platform for NPS, is handled as a Level 2 interrupt
- (just a stock ARC700) which is recoverable. This option makes
- simulator behave like hardware.
-
-config EZNPS_SHARED_AUX_REGS
- bool "ARC-EZchip Shared Auxiliary Registers Per Core"
- depends on ARC_PLAT_EZNPS
- default y
- help
- On the real chip of the NPS, auxiliary registers are shared between
- all the cpus of the core, whereas on simulator platform for NPS,
- each cpu has a different set of auxiliary registers. Configuration
- should be unset if auxiliary registers are not shared between the cpus
- of the core, so there will be a need to initialize them per cpu.
diff --git a/arch/arc/plat-eznps/Makefile b/arch/arc/plat-eznps/Makefile
deleted file mode 100644
index ebb9723002cf..000000000000
--- a/arch/arc/plat-eznps/Makefile
+++ /dev/null
@@ -1,8 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# Makefile for the linux kernel.
-#
-
-obj-y := entry.o platform.o ctop.o
-obj-$(CONFIG_SMP) += smp.o
-obj-$(CONFIG_EZNPS_MTM_EXT) += mtm.o
diff --git a/arch/arc/plat-eznps/ctop.c b/arch/arc/plat-eznps/ctop.c
deleted file mode 100644
index b398e6e838a9..000000000000
--- a/arch/arc/plat-eznps/ctop.c
+++ /dev/null
@@ -1,21 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright(c) 2015 EZchip Technologies.
- */
-
-#include <linux/sched.h>
-#include <asm/processor.h>
-#include <plat/ctop.h>
-
-void dp_save_restore(struct task_struct *prev, struct task_struct *next)
-{
- struct eznps_dp *prev_task_dp = &prev->thread.dp;
- struct eznps_dp *next_task_dp = &next->thread.dp;
-
- /* Here we save all Data Plane related auxiliary registers */
- prev_task_dp->eflags = read_aux_reg(CTOP_AUX_EFLAGS);
- write_aux_reg(CTOP_AUX_EFLAGS, next_task_dp->eflags);
-
- prev_task_dp->gpa1 = read_aux_reg(CTOP_AUX_GPA1);
- write_aux_reg(CTOP_AUX_GPA1, next_task_dp->gpa1);
-}
diff --git a/arch/arc/plat-eznps/entry.S b/arch/arc/plat-eznps/entry.S
deleted file mode 100644
index 3f18c0108e72..000000000000
--- a/arch/arc/plat-eznps/entry.S
+++ /dev/null
@@ -1,60 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*******************************************************************************
-
- EZNPS CPU startup Code
- Copyright(c) 2012 EZchip Technologies.
-
-
-*******************************************************************************/
-#include <linux/linkage.h>
-#include <asm/entry.h>
-#include <asm/cache.h>
-#include <plat/ctop.h>
-
- .cpu A7
-
- .section .init.text, "ax",@progbits
- .align 1024 ; HW requierment for restart first PC
-
-ENTRY(res_service)
-#if defined(CONFIG_EZNPS_MTM_EXT) && defined(CONFIG_EZNPS_SHARED_AUX_REGS)
- ; There is no work for HW thread id != 0
- lr r3, [CTOP_AUX_THREAD_ID]
- cmp r3, 0
- jne stext
-#endif
-
-#ifdef CONFIG_ARC_HAS_DCACHE
- ; With no cache coherency mechanism D$ need to be used very carefully.
- ; Address space:
- ; 0G-2G: We disable CONFIG_ARC_CACHE_PAGES.
- ; 2G-3G: We disable D$ by setting this bit.
- ; 3G-4G: D$ is disabled by architecture.
- ; FMT are huge pages for user application reside at 0-2G.
- ; Only FMT left as one who can use D$ where each such page got
- ; disable/enable bit for cachability.
- ; Programmer will use FMT pages for private data so cache coherency
- ; would not be a problem.
- ; First thing we invalidate D$
- sr 1, [ARC_REG_DC_IVDC]
- sr HW_COMPLY_KRN_NOT_D_CACHED, [CTOP_AUX_HW_COMPLY]
-#endif
-
-#ifdef CONFIG_SMP
- ; We set logical cpuid to be used by GET_CPUID
- ; We do not use physical cpuid since we want ids to be continious when
- ; it comes to cpus on the same quad cluster.
- ; This is useful for applications that used shared resources of a quad
- ; cluster such SRAMS.
- lr r3, [CTOP_AUX_CORE_ID]
- sr r3, [CTOP_AUX_LOGIC_CORE_ID]
- lr r3, [CTOP_AUX_CLUSTER_ID]
- ; Set logical is acheived by swap of 2 middle bits of cluster id (4 bit)
- ; r3 is used since we use short instruction and we need q-class reg
- .short CTOP_INST_MOV2B_FLIP_R3_B1_B2_INST
- .word CTOP_INST_MOV2B_FLIP_R3_B1_B2_LIMM
- sr r3, [CTOP_AUX_LOGIC_CLUSTER_ID]
-#endif
-
- j stext
-END(res_service)
diff --git a/arch/arc/plat-eznps/include/plat/ctop.h b/arch/arc/plat-eznps/include/plat/ctop.h
deleted file mode 100644
index 77712c5ffe84..000000000000
--- a/arch/arc/plat-eznps/include/plat/ctop.h
+++ /dev/null
@@ -1,208 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright(c) 2015 EZchip Technologies.
- */
-
-#ifndef _PLAT_EZNPS_CTOP_H
-#define _PLAT_EZNPS_CTOP_H
-
-#ifndef CONFIG_ARC_PLAT_EZNPS
-#error "Incorrect ctop.h include"
-#endif
-
-#include <linux/bits.h>
-#include <linux/types.h>
-#include <soc/nps/common.h>
-
-/* core auxiliary registers */
-#ifdef __ASSEMBLY__
-#define CTOP_AUX_BASE (-0x800)
-#else
-#define CTOP_AUX_BASE 0xFFFFF800
-#endif
-
-#define CTOP_AUX_GLOBAL_ID (CTOP_AUX_BASE + 0x000)
-#define CTOP_AUX_CLUSTER_ID (CTOP_AUX_BASE + 0x004)
-#define CTOP_AUX_CORE_ID (CTOP_AUX_BASE + 0x008)
-#define CTOP_AUX_THREAD_ID (CTOP_AUX_BASE + 0x00C)
-#define CTOP_AUX_LOGIC_GLOBAL_ID (CTOP_AUX_BASE + 0x010)
-#define CTOP_AUX_LOGIC_CLUSTER_ID (CTOP_AUX_BASE + 0x014)
-#define CTOP_AUX_LOGIC_CORE_ID (CTOP_AUX_BASE + 0x018)
-#define CTOP_AUX_MT_CTRL (CTOP_AUX_BASE + 0x020)
-#define CTOP_AUX_HW_COMPLY (CTOP_AUX_BASE + 0x024)
-#define CTOP_AUX_DPC (CTOP_AUX_BASE + 0x02C)
-#define CTOP_AUX_LPC (CTOP_AUX_BASE + 0x030)
-#define CTOP_AUX_EFLAGS (CTOP_AUX_BASE + 0x080)
-#define CTOP_AUX_GPA1 (CTOP_AUX_BASE + 0x08C)
-#define CTOP_AUX_UDMC (CTOP_AUX_BASE + 0x300)
-
-/* EZchip core instructions */
-#define CTOP_INST_HWSCHD_WFT_IE12 0x3E6F7344
-#define CTOP_INST_HWSCHD_OFF_R4 0x3C6F00BF
-#define CTOP_INST_HWSCHD_RESTORE_R4 0x3E6F7103
-#define CTOP_INST_SCHD_RW 0x3E6F7004
-#define CTOP_INST_SCHD_RD 0x3E6F7084
-#define CTOP_INST_ASRI_0_R3 0x3B56003E
-#define CTOP_INST_XEX_DI_R2_R2_R3 0x4A664C00
-#define CTOP_INST_EXC_DI_R2_R2_R3 0x4A664C01
-#define CTOP_INST_AADD_DI_R2_R2_R3 0x4A664C02
-#define CTOP_INST_AAND_DI_R2_R2_R3 0x4A664C04
-#define CTOP_INST_AOR_DI_R2_R2_R3 0x4A664C05
-#define CTOP_INST_AXOR_DI_R2_R2_R3 0x4A664C06
-
-/* Do not use D$ for address in 2G-3G */
-#define HW_COMPLY_KRN_NOT_D_CACHED BIT(28)
-
-#define NPS_MSU_EN_CFG 0x80
-#define NPS_CRG_BLKID 0x480
-#define NPS_CRG_SYNC_BIT BIT(0)
-#define NPS_GIM_BLKID 0x5C0
-
-/* GIM registers and fields*/
-#define NPS_GIM_UART_LINE BIT(7)
-#define NPS_GIM_DBG_LAN_EAST_TX_DONE_LINE BIT(10)
-#define NPS_GIM_DBG_LAN_EAST_RX_RDY_LINE BIT(11)
-#define NPS_GIM_DBG_LAN_WEST_TX_DONE_LINE BIT(25)
-#define NPS_GIM_DBG_LAN_WEST_RX_RDY_LINE BIT(26)
-
-#ifndef __ASSEMBLY__
-/* Functional registers definition */
-struct nps_host_reg_mtm_cfg {
- union {
- struct {
- u32 gen:1, gdis:1, clk_gate_dis:1, asb:1,
- __reserved:9, nat:3, ten:16;
- };
- u32 value;
- };
-};
-
-struct nps_host_reg_mtm_cpu_cfg {
- union {
- struct {
- u32 csa:22, dmsid:6, __reserved:3, cs:1;
- };
- u32 value;
- };
-};
-
-struct nps_host_reg_thr_init {
- union {
- struct {
- u32 str:1, __reserved:27, thr_id:4;
- };
- u32 value;
- };
-};
-
-struct nps_host_reg_thr_init_sts {
- union {
- struct {
- u32 bsy:1, err:1, __reserved:26, thr_id:4;
- };
- u32 value;
- };
-};
-
-struct nps_host_reg_msu_en_cfg {
- union {
- struct {
- u32 __reserved1:11,
- rtc_en:1, ipc_en:1, gim_1_en:1,
- gim_0_en:1, ipi_en:1, buff_e_rls_bmuw:1,
- buff_e_alc_bmuw:1, buff_i_rls_bmuw:1, buff_i_alc_bmuw:1,
- buff_e_rls_bmue:1, buff_e_alc_bmue:1, buff_i_rls_bmue:1,
- buff_i_alc_bmue:1, __reserved2:1, buff_e_pre_en:1,
- buff_i_pre_en:1, pmuw_ja_en:1, pmue_ja_en:1,
- pmuw_nj_en:1, pmue_nj_en:1, msu_en:1;
- };
- u32 value;
- };
-};
-
-struct nps_host_reg_gim_p_int_dst {
- union {
- struct {
- u32 int_out_en:1, __reserved1:4,
- is:1, intm:2, __reserved2:4,
- nid:4, __reserved3:4, cid:4,
- __reserved4:4, tid:4;
- };
- u32 value;
- };
-};
-
-/* AUX registers definition */
-struct nps_host_reg_aux_dpc {
- union {
- struct {
- u32 ien:1, men:1, hen:1, reserved:29;
- };
- u32 value;
- };
-};
-
-struct nps_host_reg_aux_udmc {
- union {
- struct {
- u32 dcp:1, cme:1, __reserved:19, nat:3,
- __reserved2:5, dcas:3;
- };
- u32 value;
- };
-};
-
-struct nps_host_reg_aux_mt_ctrl {
- union {
- struct {
- u32 mten:1, hsen:1, scd:1, sten:1,
- st_cnt:8, __reserved:8,
- hs_cnt:8, __reserved1:4;
- };
- u32 value;
- };
-};
-
-struct nps_host_reg_aux_hw_comply {
- union {
- struct {
- u32 me:1, le:1, te:1, knc:1, __reserved:28;
- };
- u32 value;
- };
-};
-
-struct nps_host_reg_aux_lpc {
- union {
- struct {
- u32 mep:1, __reserved:31;
- };
- u32 value;
- };
-};
-
-/* CRG registers */
-#define REG_GEN_PURP_0 nps_host_reg_non_cl(NPS_CRG_BLKID, 0x1BF)
-
-/* GIM registers */
-#define REG_GIM_P_INT_EN_0 nps_host_reg_non_cl(NPS_GIM_BLKID, 0x100)
-#define REG_GIM_P_INT_POL_0 nps_host_reg_non_cl(NPS_GIM_BLKID, 0x110)
-#define REG_GIM_P_INT_SENS_0 nps_host_reg_non_cl(NPS_GIM_BLKID, 0x114)
-#define REG_GIM_P_INT_BLK_0 nps_host_reg_non_cl(NPS_GIM_BLKID, 0x118)
-#define REG_GIM_P_INT_DST_10 nps_host_reg_non_cl(NPS_GIM_BLKID, 0x13A)
-#define REG_GIM_P_INT_DST_11 nps_host_reg_non_cl(NPS_GIM_BLKID, 0x13B)
-#define REG_GIM_P_INT_DST_25 nps_host_reg_non_cl(NPS_GIM_BLKID, 0x149)
-#define REG_GIM_P_INT_DST_26 nps_host_reg_non_cl(NPS_GIM_BLKID, 0x14A)
-
-#else
-
-.macro GET_CPU_ID reg
- lr \reg, [CTOP_AUX_LOGIC_GLOBAL_ID]
-#ifndef CONFIG_EZNPS_MTM_EXT
- lsr \reg, \reg, 4
-#endif
-.endm
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* _PLAT_EZNPS_CTOP_H */
diff --git a/arch/arc/plat-eznps/include/plat/mtm.h b/arch/arc/plat-eznps/include/plat/mtm.h
deleted file mode 100644
index 7c55becc891b..000000000000
--- a/arch/arc/plat-eznps/include/plat/mtm.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright(c) 2015 EZchip Technologies.
- */
-
-#ifndef _PLAT_EZNPS_MTM_H
-#define _PLAT_EZNPS_MTM_H
-
-#include <plat/ctop.h>
-
-static inline void *nps_mtm_reg_addr(u32 cpu, u32 reg)
-{
- struct global_id gid;
- u32 core, blkid;
-
- gid.value = cpu;
- core = gid.core;
- blkid = (((core & 0x0C) << 2) | (core & 0x03));
-
- return nps_host_reg(cpu, blkid, reg);
-}
-
-#ifdef CONFIG_EZNPS_MTM_EXT
-#define NPS_CPU_TO_THREAD_NUM(cpu) \
- ({ struct global_id gid; gid.value = cpu; gid.thread; })
-
-/* MTM registers */
-#define MTM_CFG(cpu) nps_mtm_reg_addr(cpu, 0x81)
-#define MTM_THR_INIT(cpu) nps_mtm_reg_addr(cpu, 0x92)
-#define MTM_THR_INIT_STS(cpu) nps_mtm_reg_addr(cpu, 0x93)
-
-#define get_thread(map) map.thread
-#define eznps_max_cpus 4096
-#define eznps_cpus_per_cluster 256
-
-void mtm_enable_core(unsigned int cpu);
-int mtm_enable_thread(int cpu);
-#else /* !CONFIG_EZNPS_MTM_EXT */
-
-#define get_thread(map) 0
-#define eznps_max_cpus 256
-#define eznps_cpus_per_cluster 16
-#define mtm_enable_core(cpu)
-#define mtm_enable_thread(cpu) 1
-#define NPS_CPU_TO_THREAD_NUM(cpu) 0
-
-#endif /* CONFIG_EZNPS_MTM_EXT */
-
-#endif /* _PLAT_EZNPS_MTM_H */
diff --git a/arch/arc/plat-eznps/include/plat/smp.h b/arch/arc/plat-eznps/include/plat/smp.h
deleted file mode 100644
index e433f118bdca..000000000000
--- a/arch/arc/plat-eznps/include/plat/smp.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright(c) 2015 EZchip Technologies.
- */
-
-#ifndef __PLAT_EZNPS_SMP_H
-#define __PLAT_EZNPS_SMP_H
-
-#ifdef CONFIG_SMP
-
-extern void res_service(void);
-
-#endif /* CONFIG_SMP */
-
-#endif
diff --git a/arch/arc/plat-eznps/mtm.c b/arch/arc/plat-eznps/mtm.c
deleted file mode 100644
index 3dcf5a9e2976..000000000000
--- a/arch/arc/plat-eznps/mtm.c
+++ /dev/null
@@ -1,166 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright(c) 2015 EZchip Technologies.
- */
-
-#include <linux/smp.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/io.h>
-#include <linux/log2.h>
-#include <asm/arcregs.h>
-#include <plat/mtm.h>
-#include <plat/smp.h>
-
-#define MT_HS_CNT_MIN 0x01
-#define MT_HS_CNT_MAX 0xFF
-#define MT_CTRL_ST_CNT 0xF
-#define NPS_NUM_HW_THREADS 0x10
-
-static int mtm_hs_ctr = MT_HS_CNT_MAX;
-
-#ifdef CONFIG_EZNPS_MEM_ERROR_ALIGN
-int do_memory_error(unsigned long address, struct pt_regs *regs)
-{
- die("Invalid Mem Access", regs, address);
-
- return 1;
-}
-#endif
-
-static void mtm_init_nat(int cpu)
-{
- struct nps_host_reg_mtm_cfg mtm_cfg;
- struct nps_host_reg_aux_udmc udmc;
- int log_nat, nat = 0, i, t;
-
- /* Iterate core threads and update nat */
- for (i = 0, t = cpu; i < NPS_NUM_HW_THREADS; i++, t++)
- nat += test_bit(t, cpumask_bits(cpu_possible_mask));
-
- log_nat = ilog2(nat);
-
- udmc.value = read_aux_reg(CTOP_AUX_UDMC);
- udmc.nat = log_nat;
- write_aux_reg(CTOP_AUX_UDMC, udmc.value);
-
- mtm_cfg.value = ioread32be(MTM_CFG(cpu));
- mtm_cfg.nat = log_nat;
- iowrite32be(mtm_cfg.value, MTM_CFG(cpu));
-}
-
-static void mtm_init_thread(int cpu)
-{
- int i, tries = 5;
- struct nps_host_reg_thr_init thr_init;
- struct nps_host_reg_thr_init_sts thr_init_sts;
-
- /* Set thread init register */
- thr_init.value = 0;
- iowrite32be(thr_init.value, MTM_THR_INIT(cpu));
- thr_init.thr_id = NPS_CPU_TO_THREAD_NUM(cpu);
- thr_init.str = 1;
- iowrite32be(thr_init.value, MTM_THR_INIT(cpu));
-
- /* Poll till thread init is done */
- for (i = 0; i < tries; i++) {
- thr_init_sts.value = ioread32be(MTM_THR_INIT_STS(cpu));
- if (thr_init_sts.thr_id == thr_init.thr_id) {
- if (thr_init_sts.bsy)
- continue;
- else if (thr_init_sts.err)
- pr_warn("Failed to thread init cpu %u\n", cpu);
- break;
- }
-
- pr_warn("Wrong thread id in thread init for cpu %u\n", cpu);
- break;
- }
-
- if (i == tries)
- pr_warn("Got thread init timeout for cpu %u\n", cpu);
-}
-
-int mtm_enable_thread(int cpu)
-{
- struct nps_host_reg_mtm_cfg mtm_cfg;
-
- if (NPS_CPU_TO_THREAD_NUM(cpu) == 0)
- return 1;
-
- /* Enable thread in mtm */
- mtm_cfg.value = ioread32be(MTM_CFG(cpu));
- mtm_cfg.ten |= (1 << (NPS_CPU_TO_THREAD_NUM(cpu)));
- iowrite32be(mtm_cfg.value, MTM_CFG(cpu));
-
- return 0;
-}
-
-void mtm_enable_core(unsigned int cpu)
-{
- int i;
- struct nps_host_reg_aux_mt_ctrl mt_ctrl;
- struct nps_host_reg_mtm_cfg mtm_cfg;
- struct nps_host_reg_aux_dpc dpc;
-
- /*
- * Initializing dpc register in each CPU.
- * Overwriting the init value of the DPC
- * register so that CMEM and FMT virtual address
- * spaces are accessible, and Data Plane HW
- * facilities are enabled.
- */
- dpc.ien = 1;
- dpc.men = 1;
- write_aux_reg(CTOP_AUX_DPC, dpc.value);
-
- if (NPS_CPU_TO_THREAD_NUM(cpu) != 0)
- return;
-
- /* Initialize Number of Active Threads */
- mtm_init_nat(cpu);
-
- /* Initialize mtm_cfg */
- mtm_cfg.value = ioread32be(MTM_CFG(cpu));
- mtm_cfg.ten = 1;
- iowrite32be(mtm_cfg.value, MTM_CFG(cpu));
-
- /* Initialize all other threads in core */
- for (i = 1; i < NPS_NUM_HW_THREADS; i++)
- mtm_init_thread(cpu + i);
-
-
- /* Enable HW schedule, stall counter, mtm */
- mt_ctrl.value = 0;
- mt_ctrl.hsen = 1;
- mt_ctrl.hs_cnt = mtm_hs_ctr;
- mt_ctrl.mten = 1;
- write_aux_reg(CTOP_AUX_MT_CTRL, mt_ctrl.value);
-
- /*
- * HW scheduling mechanism will start working
- * Only after call to instruction "schd.rw".
- * cpu_relax() calls "schd.rw" instruction.
- */
- cpu_relax();
-}
-
-/* Verify and set the value of the mtm hs counter */
-static int __init set_mtm_hs_ctr(char *ctr_str)
-{
- int hs_ctr;
- int ret;
-
- ret = kstrtoint(ctr_str, 0, &hs_ctr);
-
- if (ret || hs_ctr > MT_HS_CNT_MAX || hs_ctr < MT_HS_CNT_MIN) {
- pr_err("** Invalid @nps_mtm_hs_ctr [%d] needs to be [%d:%d] (incl)\n",
- hs_ctr, MT_HS_CNT_MIN, MT_HS_CNT_MAX);
- return -EINVAL;
- }
-
- mtm_hs_ctr = hs_ctr;
-
- return 0;
-}
-early_param("nps_mtm_hs_ctr", set_mtm_hs_ctr);
diff --git a/arch/arc/plat-eznps/platform.c b/arch/arc/plat-eznps/platform.c
deleted file mode 100644
index 6de2fe840043..000000000000
--- a/arch/arc/plat-eznps/platform.c
+++ /dev/null
@@ -1,91 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright(c) 2015 EZchip Technologies.
- */
-
-#include <linux/init.h>
-#include <linux/io.h>
-#include <asm/mach_desc.h>
-#include <plat/mtm.h>
-
-static void __init eznps_configure_msu(void)
-{
- int cpu;
- struct nps_host_reg_msu_en_cfg msu_en_cfg = {.value = 0};
-
- msu_en_cfg.msu_en = 1;
- msu_en_cfg.ipi_en = 1;
- msu_en_cfg.gim_0_en = 1;
- msu_en_cfg.gim_1_en = 1;
-
- /* enable IPI and GIM messages on all clusters */
- for (cpu = 0 ; cpu < eznps_max_cpus; cpu += eznps_cpus_per_cluster)
- iowrite32be(msu_en_cfg.value,
- nps_host_reg(cpu, NPS_MSU_BLKID, NPS_MSU_EN_CFG));
-}
-
-static void __init eznps_configure_gim(void)
-{
- u32 reg_value;
- u32 gim_int_lines;
- struct nps_host_reg_gim_p_int_dst gim_p_int_dst = {.value = 0};
-
- gim_int_lines = NPS_GIM_UART_LINE;
- gim_int_lines |= NPS_GIM_DBG_LAN_EAST_TX_DONE_LINE;
- gim_int_lines |= NPS_GIM_DBG_LAN_EAST_RX_RDY_LINE;
- gim_int_lines |= NPS_GIM_DBG_LAN_WEST_TX_DONE_LINE;
- gim_int_lines |= NPS_GIM_DBG_LAN_WEST_RX_RDY_LINE;
-
- /*
- * IRQ polarity
- * low or high level
- * negative or positive edge
- */
- reg_value = ioread32be(REG_GIM_P_INT_POL_0);
- reg_value &= ~gim_int_lines;
- iowrite32be(reg_value, REG_GIM_P_INT_POL_0);
-
- /* IRQ type level or edge */
- reg_value = ioread32be(REG_GIM_P_INT_SENS_0);
- reg_value |= NPS_GIM_DBG_LAN_EAST_TX_DONE_LINE;
- reg_value |= NPS_GIM_DBG_LAN_WEST_TX_DONE_LINE;
- iowrite32be(reg_value, REG_GIM_P_INT_SENS_0);
-
- /*
- * GIM interrupt select type for
- * dbg_lan TX and RX interrupts
- * should be type 1
- * type 0 = IRQ line 6
- * type 1 = IRQ line 7
- */
- gim_p_int_dst.is = 1;
- iowrite32be(gim_p_int_dst.value, REG_GIM_P_INT_DST_10);
- iowrite32be(gim_p_int_dst.value, REG_GIM_P_INT_DST_11);
- iowrite32be(gim_p_int_dst.value, REG_GIM_P_INT_DST_25);
- iowrite32be(gim_p_int_dst.value, REG_GIM_P_INT_DST_26);
-
- /*
- * CTOP IRQ lines should be defined
- * as blocking in GIM
- */
- iowrite32be(gim_int_lines, REG_GIM_P_INT_BLK_0);
-
- /* enable CTOP IRQ lines in GIM */
- iowrite32be(gim_int_lines, REG_GIM_P_INT_EN_0);
-}
-
-static void __init eznps_early_init(void)
-{
- eznps_configure_msu();
- eznps_configure_gim();
-}
-
-static const char *eznps_compat[] __initconst = {
- "ezchip,arc-nps",
- NULL,
-};
-
-MACHINE_START(NPS, "nps")
- .dt_compat = eznps_compat,
- .init_early = eznps_early_init,
-MACHINE_END
diff --git a/arch/arc/plat-eznps/smp.c b/arch/arc/plat-eznps/smp.c
deleted file mode 100644
index f119cb7de2ae..000000000000
--- a/arch/arc/plat-eznps/smp.c
+++ /dev/null
@@ -1,138 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright(c) 2015 EZchip Technologies.
- */
-
-#include <linux/smp.h>
-#include <linux/of_fdt.h>
-#include <linux/io.h>
-#include <linux/irqdomain.h>
-#include <asm/irq.h>
-#include <plat/ctop.h>
-#include <plat/smp.h>
-#include <plat/mtm.h>
-
-#define NPS_DEFAULT_MSID 0x34
-#define NPS_MTM_CPU_CFG 0x90
-
-static char smp_cpuinfo_buf[128] = {"Extn [EZNPS-SMP]\t: On\n"};
-
-/* Get cpu map from device tree */
-static int __init eznps_get_map(const char *name, struct cpumask *cpumask)
-{
- unsigned long dt_root = of_get_flat_dt_root();
- const char *buf;
-
- buf = of_get_flat_dt_prop(dt_root, name, NULL);
- if (!buf)
- return 1;
-
- cpulist_parse(buf, cpumask);
-
- return 0;
-}
-
-/* Update board cpu maps */
-static void __init eznps_init_cpumasks(void)
-{
- struct cpumask cpumask;
-
- if (eznps_get_map("present-cpus", &cpumask)) {
- pr_err("Failed to get present-cpus from dtb");
- return;
- }
- init_cpu_present(&cpumask);
-
- if (eznps_get_map("possible-cpus", &cpumask)) {
- pr_err("Failed to get possible-cpus from dtb");
- return;
- }
- init_cpu_possible(&cpumask);
-}
-
-static void eznps_init_core(unsigned int cpu)
-{
- u32 sync_value;
- struct nps_host_reg_aux_hw_comply hw_comply;
- struct nps_host_reg_aux_lpc lpc;
-
- if (NPS_CPU_TO_THREAD_NUM(cpu) != 0)
- return;
-
- hw_comply.value = read_aux_reg(CTOP_AUX_HW_COMPLY);
- hw_comply.me = 1;
- hw_comply.le = 1;
- hw_comply.te = 1;
- write_aux_reg(CTOP_AUX_HW_COMPLY, hw_comply.value);
-
- /* Enable MMU clock */
- lpc.mep = 1;
- write_aux_reg(CTOP_AUX_LPC, lpc.value);
-
- /* Boot CPU only */
- if (!cpu) {
- /* Write to general purpose register in CRG */
- sync_value = ioread32be(REG_GEN_PURP_0);
- sync_value |= NPS_CRG_SYNC_BIT;
- iowrite32be(sync_value, REG_GEN_PURP_0);
- }
-}
-
-/*
- * Master kick starting another CPU
- */
-static void __init eznps_smp_wakeup_cpu(int cpu, unsigned long pc)
-{
- struct nps_host_reg_mtm_cpu_cfg cpu_cfg;
-
- if (mtm_enable_thread(cpu) == 0)
- return;
-
- /* set PC, dmsid, and start CPU */
- cpu_cfg.value = (u32)res_service;
- cpu_cfg.dmsid = NPS_DEFAULT_MSID;
- cpu_cfg.cs = 1;
- iowrite32be(cpu_cfg.value, nps_mtm_reg_addr(cpu, NPS_MTM_CPU_CFG));
-}
-
-static void eznps_ipi_send(int cpu)
-{
- struct global_id gid;
- struct {
- union {
- struct {
- u32 num:8, cluster:8, core:8, thread:8;
- };
- u32 value;
- };
- } ipi;
-
- gid.value = cpu;
- ipi.thread = get_thread(gid);
- ipi.core = gid.core;
- ipi.cluster = nps_cluster_logic_to_phys(gid.cluster);
- ipi.num = NPS_IPI_IRQ;
-
- __asm__ __volatile__(
- " mov r3, %0\n"
- " .word %1\n"
- :
- : "r"(ipi.value), "i"(CTOP_INST_ASRI_0_R3)
- : "r3");
-}
-
-static void eznps_init_per_cpu(int cpu)
-{
- smp_ipi_irq_setup(cpu, NPS_IPI_IRQ);
-
- eznps_init_core(cpu);
- mtm_enable_core(cpu);
-}
-
-struct plat_smp_ops plat_smp_ops = {
- .info = smp_cpuinfo_buf,
- .init_early_smp = eznps_init_cpumasks,
- .cpu_kick = eznps_smp_wakeup_cpu,
- .ipi_send = eznps_ipi_send,
- .init_per_cpu = eznps_init_per_cpu,
-};
diff --git a/arch/arc/plat-hsdk/Kconfig b/arch/arc/plat-hsdk/Kconfig
index ce8101834518..6b5c54576f54 100644
--- a/arch/arc/plat-hsdk/Kconfig
+++ b/arch/arc/plat-hsdk/Kconfig
@@ -8,5 +8,6 @@ menuconfig ARC_SOC_HSDK
select ARC_HAS_ACCL_REGS
select ARC_IRQ_NO_AUTOSAVE
select CLK_HSDK
+ select RESET_CONTROLLER
select RESET_HSDK
select HAVE_PCI
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index 80000a66a4e3..87912e5c2256 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -1546,6 +1546,17 @@ config DEBUG_SIRFSOC_UART
bool
depends on ARCH_SIRF
+config DEBUG_UART_FLOW_CONTROL
+ bool "Enable flow control (CTS) for the debug UART"
+ depends on DEBUG_LL
+ default y if ARCH_EBSA110 || DEBUG_FOOTBRIDGE_COM1 || DEBUG_GEMINI || ARCH_RPC
+ help
+ Some UART ports are connected to terminals that will use modem
+ control signals to indicate whether they are ready to receive text.
+ In practice this means that the terminal is asserting the special
+ control signal CTS (Clear To Send). If your debug UART supports
+ this and your debug terminal will require it, enable this option.
+
config DEBUG_LL_INCLUDE
string
default "debug/sa1100.S" if DEBUG_SA1100
@@ -1893,11 +1904,6 @@ config DEBUG_UART_8250_PALMCHIP
except for having a different register layout. Say Y here if
the debug UART is of this type.
-config DEBUG_UART_8250_FLOW_CONTROL
- bool "Enable flow control for 8250 UART"
- depends on DEBUG_LL_UART_8250 || DEBUG_UART_8250
- default y if ARCH_EBSA110 || DEBUG_FOOTBRIDGE_COM1 || DEBUG_GEMINI || ARCH_RPC
-
config DEBUG_UNCOMPRESS
bool "Enable decompressor debugging via DEBUG_LL output"
depends on ARCH_MULTIPLATFORM || PLAT_SAMSUNG || ARM_SINGLE_ARMV7M
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index e589da3c8949..c4301437ca72 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -143,6 +143,9 @@ head-y := arch/arm/kernel/head$(MMUEXT).o
# Text offset. This list is sorted numerically by address in order to
# provide a means to avoid/resolve conflicts in multi-arch kernels.
+# Note: the 32kB below this value is reserved for use by the kernel
+# during boot, and this offset is critical to the functioning of
+# kexec-tools.
textofs-y := 0x00008000
# We don't want the htc bootloader to corrupt kernel during resume
textofs-$(CONFIG_PM_H1940) := 0x00108000
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 58028abd05d9..47f001ca5499 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -7,11 +7,11 @@
OBJS =
-AFLAGS_head.o += -DTEXT_OFFSET=$(TEXT_OFFSET)
HEAD = head.o
OBJS += misc.o decompress.o
ifeq ($(CONFIG_DEBUG_UNCOMPRESS),y)
OBJS += debug.o
+AFLAGS_head.o += -DDEBUG
endif
FONTC = $(srctree)/lib/fonts/font_acorn_8x8.c
@@ -68,7 +68,12 @@ ZTEXTADDR := 0
ZBSSADDR := ALIGN(8)
endif
+MALLOC_SIZE := 65536
+
+AFLAGS_head.o += -DTEXT_OFFSET=$(TEXT_OFFSET) -DMALLOC_SIZE=$(MALLOC_SIZE)
CPPFLAGS_vmlinux.lds := -DTEXT_START="$(ZTEXTADDR)" -DBSS_START="$(ZBSSADDR)"
+CPPFLAGS_vmlinux.lds += -DTEXT_OFFSET="$(TEXT_OFFSET)"
+CPPFLAGS_vmlinux.lds += -DMALLOC_SIZE="$(MALLOC_SIZE)"
compress-$(CONFIG_KERNEL_GZIP) = gzip
compress-$(CONFIG_KERNEL_LZO) = lzo
diff --git a/arch/arm/boot/compressed/debug.S b/arch/arm/boot/compressed/debug.S
index 6bf2917a4621..fac40a717fcf 100644
--- a/arch/arm/boot/compressed/debug.S
+++ b/arch/arm/boot/compressed/debug.S
@@ -8,7 +8,10 @@
ENTRY(putc)
addruart r1, r2, r3
- waituart r3, r1
+#ifdef CONFIG_DEBUG_UART_FLOW_CONTROL
+ waituartcts r3, r1
+#endif
+ waituarttxrdy r3, r1
senduart r0, r1
busyuart r3, r1
mov pc, lr
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 434a16982e34..2e04ec5b5446 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -28,19 +28,19 @@
#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7)
.macro loadsp, rb, tmp1, tmp2
.endm
- .macro writeb, ch, rb
+ .macro writeb, ch, rb, tmp
mcr p14, 0, \ch, c0, c5, 0
.endm
#elif defined(CONFIG_CPU_XSCALE)
.macro loadsp, rb, tmp1, tmp2
.endm
- .macro writeb, ch, rb
+ .macro writeb, ch, rb, tmp
mcr p14, 0, \ch, c8, c0, 0
.endm
#else
.macro loadsp, rb, tmp1, tmp2
.endm
- .macro writeb, ch, rb
+ .macro writeb, ch, rb, tmp
mcr p14, 0, \ch, c1, c0, 0
.endm
#endif
@@ -49,8 +49,13 @@
#include CONFIG_DEBUG_LL_INCLUDE
- .macro writeb, ch, rb
+ .macro writeb, ch, rb, tmp
+#ifdef CONFIG_DEBUG_UART_FLOW_CONTROL
+ waituartcts \tmp, \rb
+#endif
+ waituarttxrdy \tmp, \rb
senduart \ch, \rb
+ busyuart \tmp, \rb
.endm
#if defined(CONFIG_ARCH_SA1100)
@@ -81,42 +86,11 @@
bl phex
.endm
- .macro debug_reloc_start
-#ifdef DEBUG
- kputc #'\n'
- kphex r6, 8 /* processor id */
- kputc #':'
- kphex r7, 8 /* architecture id */
-#ifdef CONFIG_CPU_CP15
- kputc #':'
- mrc p15, 0, r0, c1, c0
- kphex r0, 8 /* control reg */
-#endif
- kputc #'\n'
- kphex r5, 8 /* decompressed kernel start */
- kputc #'-'
- kphex r9, 8 /* decompressed kernel end */
- kputc #'>'
- kphex r4, 8 /* kernel execution address */
- kputc #'\n'
-#endif
- .endm
-
- .macro debug_reloc_end
-#ifdef DEBUG
- kphex r5, 8 /* end of kernel */
- kputc #'\n'
- mov r0, r4
- bl memdump /* dump 256 bytes at start of kernel */
-#endif
- .endm
-
/*
* Debug kernel copy by printing the memory addresses involved
*/
.macro dbgkc, begin, end, cbegin, cend
#ifdef DEBUG
- kputc #'\n'
kputc #'C'
kputc #':'
kputc #'0'
@@ -136,7 +110,28 @@
kputc #'x'
kphex \cend, 8 /* End of kernel copy */
kputc #'\n'
- kputc #'\r'
+#endif
+ .endm
+
+ /*
+ * Debug print of the final appended DTB location
+ */
+ .macro dbgadtb, begin, end
+#ifdef DEBUG
+ kputc #'D'
+ kputc #'T'
+ kputc #'B'
+ kputc #':'
+ kputc #'0'
+ kputc #'x'
+ kphex \begin, 8 /* Start of appended DTB */
+ kputc #' '
+ kputc #'('
+ kputc #'0'
+ kputc #'x'
+ kphex \end, 8 /* End of appended DTB */
+ kputc #')'
+ kputc #'\n'
#endif
.endm
@@ -303,7 +298,7 @@ restart: adr r0, LC1
#ifndef CONFIG_ZBOOT_ROM
/* malloc space is above the relocated stack (64k max) */
- add r10, sp, #0x10000
+ add r10, sp, #MALLOC_SIZE
#else
/*
* With ZBOOT_ROM the bss/stack is non relocatable,
@@ -357,6 +352,7 @@ restart: adr r0, LC1
mov r5, r5, ror #8
eor r5, r5, r1, lsr #8
#endif
+ dbgadtb r6, r5
/* 50% DTB growth should be good enough */
add r5, r5, r5, lsr #1
/* preserve 64-bit alignment */
@@ -614,7 +610,7 @@ not_relocated: mov r0, #0
*/
mov r0, r4
mov r1, sp @ malloc space above stack
- add r2, sp, #0x10000 @ 64k max
+ add r2, sp, #MALLOC_SIZE @ 64k max
mov r3, r7
bl decompress_kernel
@@ -1356,7 +1352,7 @@ puts: loadsp r3, r2, r1
1: ldrb r2, [r0], #1
teq r2, #0
moveq pc, lr
-2: writeb r2, r3
+2: writeb r2, r3, r1
mov r1, #0x00020000
3: subs r1, r1, #1
bne 3b
diff --git a/arch/arm/boot/compressed/vmlinux.lds.S b/arch/arm/boot/compressed/vmlinux.lds.S
index b914be3a207b..1bcb68ac4b01 100644
--- a/arch/arm/boot/compressed/vmlinux.lds.S
+++ b/arch/arm/boot/compressed/vmlinux.lds.S
@@ -44,10 +44,12 @@ SECTIONS
}
.table : ALIGN(4) {
_table_start = .;
- LONG(ZIMAGE_MAGIC(4))
+ LONG(ZIMAGE_MAGIC(6))
LONG(ZIMAGE_MAGIC(0x5a534c4b))
LONG(ZIMAGE_MAGIC(__piggy_size_addr - _start))
LONG(ZIMAGE_MAGIC(_kernel_bss_size))
+ LONG(ZIMAGE_MAGIC(TEXT_OFFSET))
+ LONG(ZIMAGE_MAGIC(MALLOC_SIZE))
LONG(0)
_table_end = .;
}
diff --git a/arch/arm/include/debug/8250.S b/arch/arm/include/debug/8250.S
index e4a036f082c2..e3692a37cede 100644
--- a/arch/arm/include/debug/8250.S
+++ b/arch/arm/include/debug/8250.S
@@ -45,10 +45,11 @@
bne 1002b
.endm
- .macro waituart,rd,rx
-#ifdef CONFIG_DEBUG_UART_8250_FLOW_CONTROL
+ .macro waituarttxrdy,rd,rx
+ .endm
+
+ .macro waituartcts,rd,rx
1001: load \rd, [\rx, #UART_MSR << UART_SHIFT]
tst \rd, #UART_MSR_CTS
beq 1001b
-#endif
.endm
diff --git a/arch/arm/include/debug/asm9260.S b/arch/arm/include/debug/asm9260.S
index 0da1eb625331..5a0ce145c44a 100644
--- a/arch/arm/include/debug/asm9260.S
+++ b/arch/arm/include/debug/asm9260.S
@@ -11,7 +11,10 @@
ldr \rv, = CONFIG_DEBUG_UART_VIRT
.endm
- .macro waituart,rd,rx
+ .macro waituarttxrdy,rd,rx
+ .endm
+
+ .macro waituartcts,rd,rx
.endm
.macro senduart,rd,rx
diff --git a/arch/arm/include/debug/at91.S b/arch/arm/include/debug/at91.S
index 6c91cbaaa20b..17722824e2f2 100644
--- a/arch/arm/include/debug/at91.S
+++ b/arch/arm/include/debug/at91.S
@@ -19,12 +19,15 @@
strb \rd, [\rx, #(AT91_DBGU_THR)] @ Write to Transmitter Holding Register
.endm
- .macro waituart,rd,rx
+ .macro waituarttxrdy,rd,rx
1001: ldr \rd, [\rx, #(AT91_DBGU_SR)] @ Read Status Register
tst \rd, #AT91_DBGU_TXRDY @ DBGU_TXRDY = 1 when ready to transmit
beq 1001b
.endm
+ .macro waituartcts,rd,rx
+ .endm
+
.macro busyuart,rd,rx
1001: ldr \rd, [\rx, #(AT91_DBGU_SR)] @ Read Status Register
tst \rd, #AT91_DBGU_TXEMPTY @ DBGU_TXEMPTY = 1 when transmission complete
diff --git a/arch/arm/include/debug/bcm63xx.S b/arch/arm/include/debug/bcm63xx.S
index 06a896227396..da65abb6738d 100644
--- a/arch/arm/include/debug/bcm63xx.S
+++ b/arch/arm/include/debug/bcm63xx.S
@@ -17,12 +17,15 @@
strb \rd, [\rx, #UART_FIFO_REG]
.endm
- .macro waituart, rd, rx
+ .macro waituarttxrdy, rd, rx
1001: ldr \rd, [\rx, #UART_IR_REG]
tst \rd, #(1 << UART_IR_TXEMPTY)
beq 1001b
.endm
+ .macro waituartcts, rd, rx
+ .endm
+
.macro busyuart, rd, rx
1002: ldr \rd, [\rx, #UART_IR_REG]
tst \rd, #(1 << UART_IR_TXTRESH)
diff --git a/arch/arm/include/debug/brcmstb.S b/arch/arm/include/debug/brcmstb.S
index 132a20c4a676..7ffe66993029 100644
--- a/arch/arm/include/debug/brcmstb.S
+++ b/arch/arm/include/debug/brcmstb.S
@@ -142,7 +142,10 @@ ARM_BE8( rev \rd, \rd )
bne 1002b
.endm
- .macro waituart,rd,rx
+ .macro waituarttxrdy,rd,rx
+ .endm
+
+ .macro waituartcts,rd,rx
.endm
/*
diff --git a/arch/arm/include/debug/clps711x.S b/arch/arm/include/debug/clps711x.S
index 774a67ac3877..a983d12a6515 100644
--- a/arch/arm/include/debug/clps711x.S
+++ b/arch/arm/include/debug/clps711x.S
@@ -20,7 +20,10 @@
ldr \rp, =CLPS711X_UART_PADDR
.endm
- .macro waituart,rd,rx
+ .macro waituartcts,rd,rx
+ .endm
+
+ .macro waituarttxrdy,rd,rx
.endm
.macro senduart,rd,rx
diff --git a/arch/arm/include/debug/dc21285.S b/arch/arm/include/debug/dc21285.S
index d7e8c71706ab..4ec0e5e31704 100644
--- a/arch/arm/include/debug/dc21285.S
+++ b/arch/arm/include/debug/dc21285.S
@@ -34,5 +34,8 @@
bne 1001b
.endm
- .macro waituart,rd,rx
+ .macro waituartcts,rd,rx
+ .endm
+
+ .macro waituarttxrdy,rd,rx
.endm
diff --git a/arch/arm/include/debug/digicolor.S b/arch/arm/include/debug/digicolor.S
index 256f5f4da275..443674cad76a 100644
--- a/arch/arm/include/debug/digicolor.S
+++ b/arch/arm/include/debug/digicolor.S
@@ -21,7 +21,10 @@
strb \rd, [\rx, #UA0_EMI_REC]
.endm
- .macro waituart,rd,rx
+ .macro waituartcts,rd,rx
+ .endm
+
+ .macro waituarttxrdy,rd,rx
.endm
.macro busyuart,rd,rx
diff --git a/arch/arm/include/debug/efm32.S b/arch/arm/include/debug/efm32.S
index 5ed5028306f4..b0083d6e31e8 100644
--- a/arch/arm/include/debug/efm32.S
+++ b/arch/arm/include/debug/efm32.S
@@ -29,7 +29,10 @@
strb \rd, [\rx, #UARTn_TXDATA]
.endm
- .macro waituart,rd,rx
+ .macro waituartcts,rd,rx
+ .endm
+
+ .macro waituarttxrdy,rd,rx
1001: ldr \rd, [\rx, #UARTn_STATUS]
tst \rd, #UARTn_STATUS_TXBL
beq 1001b
diff --git a/arch/arm/include/debug/icedcc.S b/arch/arm/include/debug/icedcc.S
index 74a0dd036a17..d5e65da8a687 100644
--- a/arch/arm/include/debug/icedcc.S
+++ b/arch/arm/include/debug/icedcc.S
@@ -23,7 +23,10 @@
beq 1001b
.endm
- .macro waituart, rd, rx
+ .macro waituartcts, rd, rx
+ .endm
+
+ .macro waituarttxrdy, rd, rx
mov \rd, #0x2000000
1001:
subs \rd, \rd, #1
@@ -47,7 +50,10 @@
beq 1001b
.endm
- .macro waituart, rd, rx
+ .macro waituartcts, rd, rx
+ .endm
+
+ .macro waituarttxrdy, rd, rx
mov \rd, #0x10000000
1001:
subs \rd, \rd, #1
@@ -72,7 +78,10 @@
.endm
- .macro waituart, rd, rx
+ .macro waituartcts, rd, rx
+ .endm
+
+ .macro waituarttxrdy, rd, rx
mov \rd, #0x2000000
1001:
subs \rd, \rd, #1
diff --git a/arch/arm/include/debug/imx.S b/arch/arm/include/debug/imx.S
index 1c1b9d1da4c8..bb7b9550580c 100644
--- a/arch/arm/include/debug/imx.S
+++ b/arch/arm/include/debug/imx.S
@@ -35,7 +35,10 @@
str \rd, [\rx, #0x40] @ TXDATA
.endm
- .macro waituart,rd,rx
+ .macro waituartcts,rd,rx
+ .endm
+
+ .macro waituarttxrdy,rd,rx
.endm
.macro busyuart,rd,rx
diff --git a/arch/arm/include/debug/meson.S b/arch/arm/include/debug/meson.S
index 1e501a0054ae..7b60e4401225 100644
--- a/arch/arm/include/debug/meson.S
+++ b/arch/arm/include/debug/meson.S
@@ -25,7 +25,10 @@
beq 1002b
.endm
- .macro waituart,rd,rx
+ .macro waituartcts,rd,rx
+ .endm
+
+ .macro waituarttxrdy,rd,rx
1001: ldr \rd, [\rx, #MESON_AO_UART_STATUS]
tst \rd, #MESON_AO_UART_TX_FIFO_FULL
bne 1001b
diff --git a/arch/arm/include/debug/msm.S b/arch/arm/include/debug/msm.S
index 9405b71461da..530edc74f9a3 100644
--- a/arch/arm/include/debug/msm.S
+++ b/arch/arm/include/debug/msm.S
@@ -17,7 +17,10 @@ ARM_BE8(rev \rd, \rd )
str \rd, [\rx, #0x70]
.endm
- .macro waituart, rd, rx
+ .macro waituartcts,rd,rx
+ .endm
+
+ .macro waituarttxrdy, rd, rx
@ check for TX_EMT in UARTDM_SR
ldr \rd, [\rx, #0x08]
ARM_BE8(rev \rd, \rd )
diff --git a/arch/arm/include/debug/omap2plus.S b/arch/arm/include/debug/omap2plus.S
index b5696a33ba0f..0680be6c79d3 100644
--- a/arch/arm/include/debug/omap2plus.S
+++ b/arch/arm/include/debug/omap2plus.S
@@ -75,5 +75,8 @@ omap_uart_lsr: .word 0
bne 1001b
.endm
- .macro waituart,rd,rx
+ .macro waituartcts,rd,rx
+ .endm
+
+ .macro waituarttxrdy,rd,rx
.endm
diff --git a/arch/arm/include/debug/pl01x.S b/arch/arm/include/debug/pl01x.S
index a2a553afe7b8..0c7bfa4c10db 100644
--- a/arch/arm/include/debug/pl01x.S
+++ b/arch/arm/include/debug/pl01x.S
@@ -26,7 +26,10 @@
strb \rd, [\rx, #UART01x_DR]
.endm
- .macro waituart,rd,rx
+ .macro waituartcts,rd,rx
+ .endm
+
+ .macro waituarttxrdy,rd,rx
1001: ldr \rd, [\rx, #UART01x_FR]
ARM_BE8( rev \rd, \rd )
tst \rd, #UART01x_FR_TXFF
diff --git a/arch/arm/include/debug/renesas-scif.S b/arch/arm/include/debug/renesas-scif.S
index 25f06663a9a4..8e433e981bbe 100644
--- a/arch/arm/include/debug/renesas-scif.S
+++ b/arch/arm/include/debug/renesas-scif.S
@@ -33,7 +33,10 @@
ldr \rv, =SCIF_VIRT
.endm
- .macro waituart, rd, rx
+ .macro waituartcts,rd,rx
+ .endm
+
+ .macro waituarttxrdy, rd, rx
1001: ldrh \rd, [\rx, #FSR]
tst \rd, #TDFE
beq 1001b
diff --git a/arch/arm/include/debug/sa1100.S b/arch/arm/include/debug/sa1100.S
index 6109e6058e5b..7968ea52df3d 100644
--- a/arch/arm/include/debug/sa1100.S
+++ b/arch/arm/include/debug/sa1100.S
@@ -51,7 +51,10 @@
str \rd, [\rx, #UTDR]
.endm
- .macro waituart,rd,rx
+ .macro waituartcts,rd,rx
+ .endm
+
+ .macro waituarttxrdy,rd,rx
1001: ldr \rd, [\rx, #UTSR1]
tst \rd, #UTSR1_TNF
beq 1001b
diff --git a/arch/arm/include/debug/samsung.S b/arch/arm/include/debug/samsung.S
index 69201d7fb48f..ab474d564a90 100644
--- a/arch/arm/include/debug/samsung.S
+++ b/arch/arm/include/debug/samsung.S
@@ -69,7 +69,10 @@ ARM_BE8(rev \rd, \rd)
1002: @ exit busyuart
.endm
- .macro waituart,rd,rx
+ .macro waituartcts,rd,rx
+ .endm
+
+ .macro waituarttxrdy,rd,rx
ldr \rd, [\rx, # S3C2410_UFCON]
ARM_BE8(rev \rd, \rd)
tst \rd, #S3C2410_UFCON_FIFOMODE @ fifo enabled?
diff --git a/arch/arm/include/debug/sirf.S b/arch/arm/include/debug/sirf.S
index e73e4de0a015..3612c7b9cbe7 100644
--- a/arch/arm/include/debug/sirf.S
+++ b/arch/arm/include/debug/sirf.S
@@ -29,7 +29,10 @@
.macro busyuart,rd,rx
.endm
- .macro waituart,rd,rx
+ .macro waituartcts,rd,rx
+ .endm
+
+ .macro waituarttxrdy,rd,rx
1001: ldr \rd, [\rx, #SIRF_LLUART_TXFIFO_STATUS]
tst \rd, #SIRF_LLUART_TXFIFO_EMPTY
beq 1001b
diff --git a/arch/arm/include/debug/sti.S b/arch/arm/include/debug/sti.S
index 6b42c91f217d..72d052511890 100644
--- a/arch/arm/include/debug/sti.S
+++ b/arch/arm/include/debug/sti.S
@@ -45,7 +45,10 @@
strb \rd, [\rx, #ASC_TX_BUF_OFF]
.endm
- .macro waituart,rd,rx
+ .macro waituartcts,rd,rx
+ .endm
+
+ .macro waituarttxrdy,rd,rx
1001: ldr \rd, [\rx, #ASC_STA_OFF]
tst \rd, #ASC_STA_TX_FULL
bne 1001b
diff --git a/arch/arm/include/debug/stm32.S b/arch/arm/include/debug/stm32.S
index f3c4a37210ed..b6d9df30e37d 100644
--- a/arch/arm/include/debug/stm32.S
+++ b/arch/arm/include/debug/stm32.S
@@ -27,7 +27,10 @@
strb \rd, [\rx, #STM32_USART_TDR_OFF]
.endm
-.macro waituart,rd,rx
+.macro waituartcts,rd,rx
+.endm
+
+.macro waituarttxrdy,rd,rx
1001: ldr \rd, [\rx, #(STM32_USART_SR_OFF)] @ Read Status Register
tst \rd, #STM32_USART_TXE @ TXE = 1 = tx empty
beq 1001b
diff --git a/arch/arm/include/debug/tegra.S b/arch/arm/include/debug/tegra.S
index 2148d0f88591..98daa7f48314 100644
--- a/arch/arm/include/debug/tegra.S
+++ b/arch/arm/include/debug/tegra.S
@@ -178,15 +178,16 @@
1002:
.endm
- .macro waituart, rd, rx
-#ifdef FLOW_CONTROL
+ .macro waituartcts, rd, rx
cmp \rx, #0
beq 1002f
1001: ldrb \rd, [\rx, #UART_MSR << UART_SHIFT]
tst \rd, #UART_MSR_CTS
beq 1001b
1002:
-#endif
+ .endm
+
+ .macro waituarttxrdy,rd,rx
.endm
/*
diff --git a/arch/arm/include/debug/vf.S b/arch/arm/include/debug/vf.S
index 854d9bd82770..035bcbf117ab 100644
--- a/arch/arm/include/debug/vf.S
+++ b/arch/arm/include/debug/vf.S
@@ -29,5 +29,8 @@
beq 1001b @ wait until transmit done
.endm
- .macro waituart,rd,rx
+ .macro waituartcts,rd,rx
+ .endm
+
+ .macro waituarttxrdy,rd,rx
.endm
diff --git a/arch/arm/include/debug/vt8500.S b/arch/arm/include/debug/vt8500.S
index 8dc1df2d91b8..d01094fdbc8c 100644
--- a/arch/arm/include/debug/vt8500.S
+++ b/arch/arm/include/debug/vt8500.S
@@ -28,7 +28,10 @@
bne 1001b
.endm
- .macro waituart,rd,rx
+ .macro waituartcts,rd,rx
+ .endm
+
+ .macro waituarttxrdy,rd,rx
.endm
#endif
diff --git a/arch/arm/include/debug/zynq.S b/arch/arm/include/debug/zynq.S
index 58d77c972fd6..5d42cc35ecf3 100644
--- a/arch/arm/include/debug/zynq.S
+++ b/arch/arm/include/debug/zynq.S
@@ -33,7 +33,10 @@
strb \rd, [\rx, #UART_FIFO_OFFSET] @ TXDATA
.endm
- .macro waituart,rd,rx
+ .macro waituartcts,rd,rx
+ .endm
+
+ .macro waituarttxrdy,rd,rx
1001: ldr \rd, [\rx, #UART_SR_OFFSET]
ARM_BE8( rev \rd, \rd )
tst \rd, #UART_SR_TXEMPTY
diff --git a/arch/arm/kernel/debug.S b/arch/arm/kernel/debug.S
index e112072b579d..d92f44bdf438 100644
--- a/arch/arm/kernel/debug.S
+++ b/arch/arm/kernel/debug.S
@@ -89,11 +89,18 @@ ENTRY(printascii)
2: teq r1, #'\n'
bne 3f
mov r1, #'\r'
- waituart r2, r3
+#ifdef CONFIG_DEBUG_UART_FLOW_CONTROL
+ waituartcts r2, r3
+#endif
+ waituarttxrdy r2, r3
senduart r1, r3
busyuart r2, r3
mov r1, #'\n'
-3: waituart r2, r3
+3:
+#ifdef CONFIG_DEBUG_UART_FLOW_CONTROL
+ waituartcts r2, r3
+#endif
+ waituarttxrdy r2, r3
senduart r1, r3
busyuart r2, r3
b 1b
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 7a4853b1213a..08660ae9dcbc 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -683,6 +683,40 @@ static void disable_single_step(struct perf_event *bp)
arch_install_hw_breakpoint(bp);
}
+/*
+ * Arm32 hardware does not always report a watchpoint hit address that matches
+ * one of the watchpoints set. It can also report an address "near" the
+ * watchpoint if a single instruction access both watched and unwatched
+ * addresses. There is no straight-forward way, short of disassembling the
+ * offending instruction, to map that address back to the watchpoint. This
+ * function computes the distance of the memory access from the watchpoint as a
+ * heuristic for the likelyhood that a given access triggered the watchpoint.
+ *
+ * See this same function in the arm64 platform code, which has the same
+ * problem.
+ *
+ * The function returns the distance of the address from the bytes watched by
+ * the watchpoint. In case of an exact match, it returns 0.
+ */
+static u32 get_distance_from_watchpoint(unsigned long addr, u32 val,
+ struct arch_hw_breakpoint_ctrl *ctrl)
+{
+ u32 wp_low, wp_high;
+ u32 lens, lene;
+
+ lens = __ffs(ctrl->len);
+ lene = __fls(ctrl->len);
+
+ wp_low = val + lens;
+ wp_high = val + lene;
+ if (addr < wp_low)
+ return wp_low - addr;
+ else if (addr > wp_high)
+ return addr - wp_high;
+ else
+ return 0;
+}
+
static int watchpoint_fault_on_uaccess(struct pt_regs *regs,
struct arch_hw_breakpoint *info)
{
@@ -692,23 +726,25 @@ static int watchpoint_fault_on_uaccess(struct pt_regs *regs,
static void watchpoint_handler(unsigned long addr, unsigned int fsr,
struct pt_regs *regs)
{
- int i, access;
- u32 val, ctrl_reg, alignment_mask;
+ int i, access, closest_match = 0;
+ u32 min_dist = -1, dist;
+ u32 val, ctrl_reg;
struct perf_event *wp, **slots;
struct arch_hw_breakpoint *info;
struct arch_hw_breakpoint_ctrl ctrl;
slots = this_cpu_ptr(wp_on_reg);
+ /*
+ * Find all watchpoints that match the reported address. If no exact
+ * match is found. Attribute the hit to the closest watchpoint.
+ */
+ rcu_read_lock();
for (i = 0; i < core_num_wrps; ++i) {
- rcu_read_lock();
-
wp = slots[i];
-
if (wp == NULL)
- goto unlock;
+ continue;
- info = counter_arch_bp(wp);
/*
* The DFAR is an unknown value on debug architectures prior
* to 7.1. Since we only allow a single watchpoint on these
@@ -717,33 +753,31 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr,
*/
if (debug_arch < ARM_DEBUG_ARCH_V7_1) {
BUG_ON(i > 0);
+ info = counter_arch_bp(wp);
info->trigger = wp->attr.bp_addr;
} else {
- if (info->ctrl.len == ARM_BREAKPOINT_LEN_8)
- alignment_mask = 0x7;
- else
- alignment_mask = 0x3;
-
- /* Check if the watchpoint value matches. */
- val = read_wb_reg(ARM_BASE_WVR + i);
- if (val != (addr & ~alignment_mask))
- goto unlock;
-
- /* Possible match, check the byte address select. */
- ctrl_reg = read_wb_reg(ARM_BASE_WCR + i);
- decode_ctrl_reg(ctrl_reg, &ctrl);
- if (!((1 << (addr & alignment_mask)) & ctrl.len))
- goto unlock;
-
/* Check that the access type matches. */
if (debug_exception_updates_fsr()) {
access = (fsr & ARM_FSR_ACCESS_MASK) ?
HW_BREAKPOINT_W : HW_BREAKPOINT_R;
if (!(access & hw_breakpoint_type(wp)))
- goto unlock;
+ continue;
}
+ val = read_wb_reg(ARM_BASE_WVR + i);
+ ctrl_reg = read_wb_reg(ARM_BASE_WCR + i);
+ decode_ctrl_reg(ctrl_reg, &ctrl);
+ dist = get_distance_from_watchpoint(addr, val, &ctrl);
+ if (dist < min_dist) {
+ min_dist = dist;
+ closest_match = i;
+ }
+ /* Is this an exact match? */
+ if (dist != 0)
+ continue;
+
/* We have a winner. */
+ info = counter_arch_bp(wp);
info->trigger = addr;
}
@@ -765,13 +799,23 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr,
* we can single-step over the watchpoint trigger.
*/
if (!is_default_overflow_handler(wp))
- goto unlock;
-
+ continue;
step:
enable_single_step(wp, instruction_pointer(regs));
-unlock:
- rcu_read_unlock();
}
+
+ if (min_dist > 0 && min_dist != -1) {
+ /* No exact match found. */
+ wp = slots[closest_match];
+ info = counter_arch_bp(wp);
+ info->trigger = addr;
+ pr_debug("watchpoint fired: address = 0x%x\n", info->trigger);
+ perf_bp_event(wp, regs);
+ if (is_default_overflow_handler(wp))
+ enable_single_step(wp, instruction_pointer(regs));
+ }
+
+ rcu_read_unlock();
}
static void watchpoint_single_step_handler(unsigned long pc)
diff --git a/arch/arm/mach-davinci/board-da830-evm.c b/arch/arm/mach-davinci/board-da830-evm.c
index 1076886938b6..a20ba12d876c 100644
--- a/arch/arm/mach-davinci/board-da830-evm.c
+++ b/arch/arm/mach-davinci/board-da830-evm.c
@@ -306,7 +306,7 @@ static struct davinci_nand_pdata da830_evm_nand_pdata = {
.core_chipsel = 1,
.parts = da830_evm_nand_partitions,
.nr_parts = ARRAY_SIZE(da830_evm_nand_partitions),
- .ecc_mode = NAND_ECC_HW,
+ .engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST,
.ecc_bits = 4,
.bbt_options = NAND_BBT_USE_FLASH,
.bbt_td = &da830_evm_nand_bbt_main_descr,
diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c
index 6751292e5f8f..428012687a80 100644
--- a/arch/arm/mach-davinci/board-da850-evm.c
+++ b/arch/arm/mach-davinci/board-da850-evm.c
@@ -239,7 +239,7 @@ static struct davinci_nand_pdata da850_evm_nandflash_data = {
.core_chipsel = 1,
.parts = da850_evm_nandflash_partition,
.nr_parts = ARRAY_SIZE(da850_evm_nandflash_partition),
- .ecc_mode = NAND_ECC_HW,
+ .engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST,
.ecc_bits = 4,
.bbt_options = NAND_BBT_USE_FLASH,
.timing = &da850_evm_nandflash_timing,
diff --git a/arch/arm/mach-davinci/board-dm355-evm.c b/arch/arm/mach-davinci/board-dm355-evm.c
index 5113273fda69..3c5a9e3c128a 100644
--- a/arch/arm/mach-davinci/board-dm355-evm.c
+++ b/arch/arm/mach-davinci/board-dm355-evm.c
@@ -82,7 +82,7 @@ static struct davinci_nand_pdata davinci_nand_data = {
.mask_chipsel = BIT(14),
.parts = davinci_nand_partitions,
.nr_parts = ARRAY_SIZE(davinci_nand_partitions),
- .ecc_mode = NAND_ECC_HW,
+ .engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST,
.bbt_options = NAND_BBT_USE_FLASH,
.ecc_bits = 4,
};
diff --git a/arch/arm/mach-davinci/board-dm355-leopard.c b/arch/arm/mach-davinci/board-dm355-leopard.c
index b9e9950dd300..e475b2113e70 100644
--- a/arch/arm/mach-davinci/board-dm355-leopard.c
+++ b/arch/arm/mach-davinci/board-dm355-leopard.c
@@ -76,7 +76,8 @@ static struct davinci_nand_pdata davinci_nand_data = {
.mask_chipsel = BIT(14),
.parts = davinci_nand_partitions,
.nr_parts = ARRAY_SIZE(davinci_nand_partitions),
- .ecc_mode = NAND_ECC_HW_SYNDROME,
+ .engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST,
+ .ecc_placement = NAND_ECC_PLACEMENT_INTERLEAVED,
.ecc_bits = 4,
.bbt_options = NAND_BBT_USE_FLASH,
};
diff --git a/arch/arm/mach-davinci/board-dm365-evm.c b/arch/arm/mach-davinci/board-dm365-evm.c
index 2328b15ac067..bdf31eb77620 100644
--- a/arch/arm/mach-davinci/board-dm365-evm.c
+++ b/arch/arm/mach-davinci/board-dm365-evm.c
@@ -146,7 +146,7 @@ static struct davinci_nand_pdata davinci_nand_data = {
.mask_chipsel = BIT(14),
.parts = davinci_nand_partitions,
.nr_parts = ARRAY_SIZE(davinci_nand_partitions),
- .ecc_mode = NAND_ECC_HW,
+ .engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST,
.bbt_options = NAND_BBT_USE_FLASH,
.ecc_bits = 4,
};
diff --git a/arch/arm/mach-davinci/board-dm644x-evm.c b/arch/arm/mach-davinci/board-dm644x-evm.c
index a5d3708fedf6..bcb3c4070945 100644
--- a/arch/arm/mach-davinci/board-dm644x-evm.c
+++ b/arch/arm/mach-davinci/board-dm644x-evm.c
@@ -162,7 +162,7 @@ static struct davinci_nand_pdata davinci_evm_nandflash_data = {
.core_chipsel = 0,
.parts = davinci_evm_nandflash_partition,
.nr_parts = ARRAY_SIZE(davinci_evm_nandflash_partition),
- .ecc_mode = NAND_ECC_HW,
+ .engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST,
.ecc_bits = 1,
.bbt_options = NAND_BBT_USE_FLASH,
.timing = &davinci_evm_nandflash_timing,
diff --git a/arch/arm/mach-davinci/board-dm646x-evm.c b/arch/arm/mach-davinci/board-dm646x-evm.c
index dd7d60f4139a..8319a6067a68 100644
--- a/arch/arm/mach-davinci/board-dm646x-evm.c
+++ b/arch/arm/mach-davinci/board-dm646x-evm.c
@@ -91,7 +91,7 @@ static struct davinci_nand_pdata davinci_nand_data = {
.mask_ale = 0x40000,
.parts = davinci_nand_partitions,
.nr_parts = ARRAY_SIZE(davinci_nand_partitions),
- .ecc_mode = NAND_ECC_HW,
+ .engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST,
.ecc_bits = 1,
.options = 0,
};
diff --git a/arch/arm/mach-davinci/board-mityomapl138.c b/arch/arm/mach-davinci/board-mityomapl138.c
index 3382b93d9a2a..5205008c8061 100644
--- a/arch/arm/mach-davinci/board-mityomapl138.c
+++ b/arch/arm/mach-davinci/board-mityomapl138.c
@@ -432,7 +432,7 @@ static struct davinci_nand_pdata mityomapl138_nandflash_data = {
.core_chipsel = 1,
.parts = mityomapl138_nandflash_partition,
.nr_parts = ARRAY_SIZE(mityomapl138_nandflash_partition),
- .ecc_mode = NAND_ECC_HW,
+ .engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST,
.bbt_options = NAND_BBT_USE_FLASH,
.options = NAND_BUSWIDTH_16,
.ecc_bits = 1, /* 4 bit mode is not supported with 16 bit NAND */
diff --git a/arch/arm/mach-davinci/board-neuros-osd2.c b/arch/arm/mach-davinci/board-neuros-osd2.c
index 6cf46bbc7e1d..b4843f68bb57 100644
--- a/arch/arm/mach-davinci/board-neuros-osd2.c
+++ b/arch/arm/mach-davinci/board-neuros-osd2.c
@@ -90,7 +90,7 @@ static struct davinci_nand_pdata davinci_ntosd2_nandflash_data = {
.core_chipsel = 0,
.parts = davinci_ntosd2_nandflash_partition,
.nr_parts = ARRAY_SIZE(davinci_ntosd2_nandflash_partition),
- .ecc_mode = NAND_ECC_HW,
+ .engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST,
.ecc_bits = 1,
.bbt_options = NAND_BBT_USE_FLASH,
};
diff --git a/arch/arm/mach-davinci/board-omapl138-hawk.c b/arch/arm/mach-davinci/board-omapl138-hawk.c
index 6c79039002c9..88df8011a4e6 100644
--- a/arch/arm/mach-davinci/board-omapl138-hawk.c
+++ b/arch/arm/mach-davinci/board-omapl138-hawk.c
@@ -206,7 +206,7 @@ static struct davinci_nand_pdata omapl138_hawk_nandflash_data = {
.core_chipsel = 1,
.parts = omapl138_hawk_nandflash_partition,
.nr_parts = ARRAY_SIZE(omapl138_hawk_nandflash_partition),
- .ecc_mode = NAND_ECC_HW,
+ .engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST,
.ecc_bits = 4,
.bbt_options = NAND_BBT_USE_FLASH,
.options = NAND_BUSWIDTH_16,
diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c
index 3d2c108e911e..431709725d02 100644
--- a/arch/arm/mach-pxa/tosa.c
+++ b/arch/arm/mach-pxa/tosa.c
@@ -369,6 +369,15 @@ static struct pxaficp_platform_data tosa_ficp_platform_data = {
/*
* Tosa AC IN
*/
+static struct gpiod_lookup_table tosa_power_gpiod_table = {
+ .dev_id = "gpio-charger",
+ .table = {
+ GPIO_LOOKUP("gpio-pxa", TOSA_GPIO_AC_IN,
+ NULL, GPIO_ACTIVE_LOW),
+ { },
+ },
+};
+
static char *tosa_ac_supplied_to[] = {
"main-battery",
"backup-battery",
@@ -378,8 +387,6 @@ static char *tosa_ac_supplied_to[] = {
static struct gpio_charger_platform_data tosa_power_data = {
.name = "charger",
.type = POWER_SUPPLY_TYPE_MAINS,
- .gpio = TOSA_GPIO_AC_IN,
- .gpio_active_low = 1,
.supplied_to = tosa_ac_supplied_to,
.num_supplicants = ARRAY_SIZE(tosa_ac_supplied_to),
};
@@ -951,6 +958,7 @@ static void __init tosa_init(void)
clk_add_alias("CLK_CK3P6MI", tc6393xb_device.name, "GPIO11_CLK", NULL);
gpiod_add_lookup_table(&tosa_udc_gpiod_table);
+ gpiod_add_lookup_table(&tosa_power_gpiod_table);
platform_add_devices(devices, ARRAY_SIZE(devices));
}
diff --git a/arch/arm/mach-s3c24xx/common-smdk.c b/arch/arm/mach-s3c24xx/common-smdk.c
index 75064dfaceb1..121646ad1bb1 100644
--- a/arch/arm/mach-s3c24xx/common-smdk.c
+++ b/arch/arm/mach-s3c24xx/common-smdk.c
@@ -191,7 +191,7 @@ static struct s3c2410_platform_nand smdk_nand_info = {
.twrph1 = 20,
.nr_sets = ARRAY_SIZE(smdk_nand_sets),
.sets = smdk_nand_sets,
- .ecc_mode = NAND_ECC_SOFT,
+ .engine_type = NAND_ECC_ENGINE_TYPE_SOFT,
};
/* devices we initialise */
diff --git a/arch/arm/mach-s3c24xx/mach-anubis.c b/arch/arm/mach-s3c24xx/mach-anubis.c
index 072966dcad78..28326241e360 100644
--- a/arch/arm/mach-s3c24xx/mach-anubis.c
+++ b/arch/arm/mach-s3c24xx/mach-anubis.c
@@ -218,7 +218,7 @@ static struct s3c2410_platform_nand __initdata anubis_nand_info = {
.nr_sets = ARRAY_SIZE(anubis_nand_sets),
.sets = anubis_nand_sets,
.select_chip = anubis_nand_select,
- .ecc_mode = NAND_ECC_SOFT,
+ .engine_type = NAND_ECC_ENGINE_TYPE_SOFT,
};
/* IDE channels */
diff --git a/arch/arm/mach-s3c24xx/mach-at2440evb.c b/arch/arm/mach-s3c24xx/mach-at2440evb.c
index 58c5ef3cf1d7..04dedebdb57c 100644
--- a/arch/arm/mach-s3c24xx/mach-at2440evb.c
+++ b/arch/arm/mach-s3c24xx/mach-at2440evb.c
@@ -109,7 +109,7 @@ static struct s3c2410_platform_nand __initdata at2440evb_nand_info = {
.twrph1 = 40,
.nr_sets = ARRAY_SIZE(at2440evb_nand_sets),
.sets = at2440evb_nand_sets,
- .ecc_mode = NAND_ECC_SOFT,
+ .engine_type = NAND_ECC_ENGINE_TYPE_SOFT,
};
/* DM9000AEP 10/100 ethernet controller */
diff --git a/arch/arm/mach-s3c24xx/mach-bast.c b/arch/arm/mach-s3c24xx/mach-bast.c
index a7c3955ae8f6..6465eab0ab3a 100644
--- a/arch/arm/mach-s3c24xx/mach-bast.c
+++ b/arch/arm/mach-s3c24xx/mach-bast.c
@@ -294,7 +294,7 @@ static struct s3c2410_platform_nand __initdata bast_nand_info = {
.nr_sets = ARRAY_SIZE(bast_nand_sets),
.sets = bast_nand_sets,
.select_chip = bast_nand_select,
- .ecc_mode = NAND_ECC_SOFT,
+ .engine_type = NAND_ECC_ENGINE_TYPE_SOFT,
};
/* DM9000 */
diff --git a/arch/arm/mach-s3c24xx/mach-gta02.c b/arch/arm/mach-s3c24xx/mach-gta02.c
index 526fd0933289..732748170751 100644
--- a/arch/arm/mach-s3c24xx/mach-gta02.c
+++ b/arch/arm/mach-s3c24xx/mach-gta02.c
@@ -417,7 +417,7 @@ static struct s3c2410_platform_nand __initdata gta02_nand_info = {
.twrph1 = 15,
.nr_sets = ARRAY_SIZE(gta02_nand_sets),
.sets = gta02_nand_sets,
- .ecc_mode = NAND_ECC_SOFT,
+ .engine_type = NAND_ECC_ENGINE_TYPE_SOFT,
};
diff --git a/arch/arm/mach-s3c24xx/mach-jive.c b/arch/arm/mach-s3c24xx/mach-jive.c
index 885e8f12e4b9..8233dcff19e7 100644
--- a/arch/arm/mach-s3c24xx/mach-jive.c
+++ b/arch/arm/mach-s3c24xx/mach-jive.c
@@ -228,7 +228,7 @@ static struct s3c2410_platform_nand __initdata jive_nand_info = {
.twrph1 = 40,
.sets = jive_nand_sets,
.nr_sets = ARRAY_SIZE(jive_nand_sets),
- .ecc_mode = NAND_ECC_SOFT,
+ .engine_type = NAND_ECC_ENGINE_TYPE_SOFT,
};
static int __init jive_mtdset(char *options)
diff --git a/arch/arm/mach-s3c24xx/mach-mini2440.c b/arch/arm/mach-s3c24xx/mach-mini2440.c
index 235749448311..057dcbaf1b22 100644
--- a/arch/arm/mach-s3c24xx/mach-mini2440.c
+++ b/arch/arm/mach-s3c24xx/mach-mini2440.c
@@ -296,7 +296,7 @@ static struct s3c2410_platform_nand mini2440_nand_info __initdata = {
.nr_sets = ARRAY_SIZE(mini2440_nand_sets),
.sets = mini2440_nand_sets,
.ignore_unset_ecc = 1,
- .ecc_mode = NAND_ECC_HW,
+ .engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST,
};
/* DM9000AEP 10/100 ethernet controller */
diff --git a/arch/arm/mach-s3c24xx/mach-osiris.c b/arch/arm/mach-s3c24xx/mach-osiris.c
index ee3630cb236a..157448827f61 100644
--- a/arch/arm/mach-s3c24xx/mach-osiris.c
+++ b/arch/arm/mach-s3c24xx/mach-osiris.c
@@ -234,7 +234,7 @@ static struct s3c2410_platform_nand __initdata osiris_nand_info = {
.nr_sets = ARRAY_SIZE(osiris_nand_sets),
.sets = osiris_nand_sets,
.select_chip = osiris_nand_select,
- .ecc_mode = NAND_ECC_SOFT,
+ .engine_type = NAND_ECC_ENGINE_TYPE_SOFT,
};
/* PCMCIA control and configuration */
diff --git a/arch/arm/mach-s3c24xx/mach-qt2410.c b/arch/arm/mach-s3c24xx/mach-qt2410.c
index ff9e3197309b..f3131d94e90b 100644
--- a/arch/arm/mach-s3c24xx/mach-qt2410.c
+++ b/arch/arm/mach-s3c24xx/mach-qt2410.c
@@ -287,7 +287,7 @@ static struct s3c2410_platform_nand __initdata qt2410_nand_info = {
.twrph1 = 20,
.nr_sets = ARRAY_SIZE(qt2410_nand_sets),
.sets = qt2410_nand_sets,
- .ecc_mode = NAND_ECC_SOFT,
+ .engine_type = NAND_ECC_ENGINE_TYPE_SOFT,
};
/* UDC */
diff --git a/arch/arm/mach-s3c24xx/mach-rx1950.c b/arch/arm/mach-s3c24xx/mach-rx1950.c
index e9806bf654b4..3645b9c838d9 100644
--- a/arch/arm/mach-s3c24xx/mach-rx1950.c
+++ b/arch/arm/mach-s3c24xx/mach-rx1950.c
@@ -620,7 +620,7 @@ static struct s3c2410_platform_nand rx1950_nand_info = {
.twrph1 = 15,
.nr_sets = ARRAY_SIZE(rx1950_nand_sets),
.sets = rx1950_nand_sets,
- .ecc_mode = NAND_ECC_SOFT,
+ .engine_type = NAND_ECC_ENGINE_TYPE_SOFT,
};
static struct s3c2410_udc_mach_info rx1950_udc_cfg __initdata = {
diff --git a/arch/arm/mach-s3c24xx/mach-rx3715.c b/arch/arm/mach-s3c24xx/mach-rx3715.c
index 995f1ff34a1b..017010d67e01 100644
--- a/arch/arm/mach-s3c24xx/mach-rx3715.c
+++ b/arch/arm/mach-s3c24xx/mach-rx3715.c
@@ -158,7 +158,7 @@ static struct s3c2410_platform_nand __initdata rx3715_nand_info = {
.twrph1 = 15,
.nr_sets = ARRAY_SIZE(rx3715_nand_sets),
.sets = rx3715_nand_sets,
- .ecc_mode = NAND_ECC_SOFT,
+ .engine_type = NAND_ECC_ENGINE_TYPE_SOFT,
};
static struct platform_device *rx3715_devices[] __initdata = {
diff --git a/arch/arm/mach-s3c24xx/mach-vstms.c b/arch/arm/mach-s3c24xx/mach-vstms.c
index d76b28b65e65..c5fa215a527e 100644
--- a/arch/arm/mach-s3c24xx/mach-vstms.c
+++ b/arch/arm/mach-s3c24xx/mach-vstms.c
@@ -112,7 +112,7 @@ static struct s3c2410_platform_nand __initdata vstms_nand_info = {
.twrph1 = 20,
.nr_sets = ARRAY_SIZE(vstms_nand_sets),
.sets = vstms_nand_sets,
- .ecc_mode = NAND_ECC_SOFT,
+ .engine_type = NAND_ECC_ENGINE_TYPE_SOFT,
};
static struct platform_device *vstms_devices[] __initdata = {
diff --git a/arch/arm/mach-s3c64xx/mach-hmt.c b/arch/arm/mach-s3c64xx/mach-hmt.c
index e7080215c624..0d9acaf91701 100644
--- a/arch/arm/mach-s3c64xx/mach-hmt.c
+++ b/arch/arm/mach-s3c64xx/mach-hmt.c
@@ -199,7 +199,7 @@ static struct s3c2410_platform_nand hmt_nand_info = {
.twrph1 = 40,
.nr_sets = ARRAY_SIZE(hmt_nand_sets),
.sets = hmt_nand_sets,
- .ecc_mode = NAND_ECC_SOFT,
+ .engine_type = NAND_ECC_ENGINE_TYPE_SOFT,
};
static struct gpio_led hmt_leds[] = {
diff --git a/arch/arm/mach-s3c64xx/mach-mini6410.c b/arch/arm/mach-s3c64xx/mach-mini6410.c
index 0dd36ae49e6a..6fbb57878746 100644
--- a/arch/arm/mach-s3c64xx/mach-mini6410.c
+++ b/arch/arm/mach-s3c64xx/mach-mini6410.c
@@ -136,7 +136,7 @@ static struct s3c2410_platform_nand mini6410_nand_info = {
.twrph1 = 40,
.nr_sets = ARRAY_SIZE(mini6410_nand_sets),
.sets = mini6410_nand_sets,
- .ecc_mode = NAND_ECC_SOFT,
+ .engine_type = NAND_ECC_ENGINE_TYPE_SOFT,
};
static struct s3c_fb_pd_win mini6410_lcd_type0_fb_win = {
diff --git a/arch/arm/mach-s3c64xx/mach-real6410.c b/arch/arm/mach-s3c64xx/mach-real6410.c
index 0ff88b6859c4..1e98e530a6aa 100644
--- a/arch/arm/mach-s3c64xx/mach-real6410.c
+++ b/arch/arm/mach-s3c64xx/mach-real6410.c
@@ -188,7 +188,7 @@ static struct s3c2410_platform_nand real6410_nand_info = {
.twrph1 = 40,
.nr_sets = ARRAY_SIZE(real6410_nand_sets),
.sets = real6410_nand_sets,
- .ecc_mode = NAND_ECC_SOFT,
+ .engine_type = NAND_ECC_ENGINE_TYPE_SOFT,
};
static struct platform_device *real6410_devices[] __initdata = {
diff --git a/arch/arm/mach-sa1100/collie.c b/arch/arm/mach-sa1100/collie.c
index 3cc2b71e16f0..bd3a52fd09ce 100644
--- a/arch/arm/mach-sa1100/collie.c
+++ b/arch/arm/mach-sa1100/collie.c
@@ -30,6 +30,7 @@
#include <linux/gpio_keys.h>
#include <linux/input.h>
#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/power/gpio-charger.h>
#include <video/sa1100fb.h>
@@ -131,16 +132,23 @@ static struct irda_platform_data collie_ir_data = {
/*
* Collie AC IN
*/
+static struct gpiod_lookup_table collie_power_gpiod_table = {
+ .dev_id = "gpio-charger",
+ .table = {
+ GPIO_LOOKUP("gpio", COLLIE_GPIO_AC_IN,
+ NULL, GPIO_ACTIVE_HIGH),
+ { },
+ },
+};
+
static char *collie_ac_supplied_to[] = {
"main-battery",
"backup-battery",
};
-
static struct gpio_charger_platform_data collie_power_data = {
.name = "charger",
.type = POWER_SUPPLY_TYPE_MAINS,
- .gpio = COLLIE_GPIO_AC_IN,
.supplied_to = collie_ac_supplied_to,
.num_supplicants = ARRAY_SIZE(collie_ac_supplied_to),
};
@@ -386,6 +394,8 @@ static void __init collie_init(void)
platform_scoop_config = &collie_pcmcia_config;
+ gpiod_add_lookup_table(&collie_power_gpiod_table);
+
ret = platform_add_devices(devices, ARRAY_SIZE(devices));
if (ret) {
printk(KERN_WARNING "collie: Unable to register LoCoMo device\n");
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 12c26eb88afb..43d91bfd2360 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -1249,20 +1249,28 @@ static void __init l2c310_of_parse(const struct device_node *np,
ret = of_property_read_u32(np, "prefetch-data", &val);
if (ret == 0) {
- if (val)
+ if (val) {
prefetch |= L310_PREFETCH_CTRL_DATA_PREFETCH;
- else
+ *aux_val |= L310_PREFETCH_CTRL_DATA_PREFETCH;
+ } else {
prefetch &= ~L310_PREFETCH_CTRL_DATA_PREFETCH;
+ *aux_val &= ~L310_PREFETCH_CTRL_DATA_PREFETCH;
+ }
+ *aux_mask &= ~L310_PREFETCH_CTRL_DATA_PREFETCH;
} else if (ret != -EINVAL) {
pr_err("L2C-310 OF prefetch-data property value is missing\n");
}
ret = of_property_read_u32(np, "prefetch-instr", &val);
if (ret == 0) {
- if (val)
+ if (val) {
prefetch |= L310_PREFETCH_CTRL_INSTR_PREFETCH;
- else
+ *aux_val |= L310_PREFETCH_CTRL_INSTR_PREFETCH;
+ } else {
prefetch &= ~L310_PREFETCH_CTRL_INSTR_PREFETCH;
+ *aux_val &= ~L310_PREFETCH_CTRL_INSTR_PREFETCH;
+ }
+ *aux_mask &= ~L310_PREFETCH_CTRL_INSTR_PREFETCH;
} else if (ret != -EINVAL) {
pr_err("L2C-310 OF prefetch-instr property value is missing\n");
}
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 698cc740c6b8..ab69250a86bc 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -17,7 +17,6 @@
#include <asm/cp15.h>
#include <asm/cputype.h>
-#include <asm/sections.h>
#include <asm/cachetype.h>
#include <asm/fixmap.h>
#include <asm/sections.h>
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 171077cbf419..d056a548358e 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -453,3 +453,4 @@
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
+440 common process_madvise sys_process_madvise
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 3b859596840d..b3b2019f8d16 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -38,7 +38,7 @@
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)
-#define __NR_compat_syscalls 440
+#define __NR_compat_syscalls 441
#endif
#define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 2a3ad9b9accd..107f08e03b9f 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -887,6 +887,8 @@ __SYSCALL(__NR_openat2, sys_openat2)
__SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
#define __NR_faccessat2 439
__SYSCALL(__NR_faccessat2, sys_faccessat2)
+#define __NR_process_madvise 440
+__SYSCALL(__NR_process_madvise, sys_process_madvise)
/*
* Please add new compat syscalls above this comment and update
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index 81901c5e5426..c89bd5f8cbf8 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -40,7 +40,7 @@ obj-y += esi_stub.o # must be in kernel proper
endif
obj-$(CONFIG_INTEL_IOMMU) += pci-dma.o
-obj-$(CONFIG_BINFMT_ELF) += elfcore.o
+obj-$(CONFIG_ELF_CORE) += elfcore.o
# fp_emulate() expects f2-f5,f16-f31 to contain the user-level state.
CFLAGS_traps.o += -mfixed-range=f2-f5,f16-f31
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
index 4799c96c325f..b96ed8b8a508 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -360,3 +360,4 @@
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
+440 common process_madvise sys_process_madvise
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index d8686bf3ae2f..ef12e097f318 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -537,7 +537,7 @@ virtual_memmap_init(u64 start, u64 end, void *arg)
if (map_start < map_end)
memmap_init_zone((unsigned long)(map_end - map_start),
args->nid, args->zone, page_to_pfn(map_start),
- MEMINIT_EARLY, NULL);
+ MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
return 0;
}
@@ -547,7 +547,7 @@ memmap_init (unsigned long size, int nid, unsigned long zone,
{
if (!vmem_map) {
memmap_init_zone(size, nid, zone, start_pfn,
- MEMINIT_EARLY, NULL);
+ MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
} else {
struct page *start;
struct memmap_init_callback_data args;
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 93bbb74ea876..c830705bd3ac 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -31,6 +31,7 @@ config M68K
select NO_DMA if !MMU && !COLDFIRE
select OLD_SIGACTION
select OLD_SIGSUSPEND3
+ select UACCESS_MEMCPY if !MMU
select VIRT_TO_BUS
config CPU_BIG_ENDIAN
diff --git a/arch/m68k/coldfire/device.c b/arch/m68k/coldfire/device.c
index 9ef4ec0aea00..59f7dfe50a4d 100644
--- a/arch/m68k/coldfire/device.c
+++ b/arch/m68k/coldfire/device.c
@@ -554,7 +554,7 @@ static struct platform_device mcf_edma = {
};
#endif /* IS_ENABLED(CONFIG_MCF_EDMA) */
-#if IS_ENABLED(CONFIG_MMC)
+#ifdef MCFSDHC_BASE
static struct mcf_esdhc_platform_data mcf_esdhc_data = {
.max_bus_width = 4,
.cd_type = ESDHC_CD_NONE,
@@ -579,7 +579,7 @@ static struct platform_device mcf_esdhc = {
.resource = mcf_esdhc_resources,
.dev.platform_data = &mcf_esdhc_data,
};
-#endif /* IS_ENABLED(CONFIG_MMC) */
+#endif /* MCFSDHC_BASE */
static struct platform_device *mcf_devices[] __initdata = {
&mcf_uart,
@@ -613,7 +613,7 @@ static struct platform_device *mcf_devices[] __initdata = {
#if IS_ENABLED(CONFIG_MCF_EDMA)
&mcf_edma,
#endif
-#if IS_ENABLED(CONFIG_MMC)
+#ifdef MCFSDHC_BASE
&mcf_esdhc,
#endif
};
diff --git a/arch/m68k/include/asm/uaccess.h b/arch/m68k/include/asm/uaccess.h
index e896466a41a4..f98208ccbbcd 100644
--- a/arch/m68k/include/asm/uaccess.h
+++ b/arch/m68k/include/asm/uaccess.h
@@ -1,7 +1,397 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifdef __uClinux__
-#include <asm/uaccess_no.h>
+#ifndef __M68K_UACCESS_H
+#define __M68K_UACCESS_H
+
+#ifdef CONFIG_MMU
+
+/*
+ * User space memory access functions
+ */
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <asm/segment.h>
+#include <asm/extable.h>
+
+/* We let the MMU do all checking */
+static inline int access_ok(const void __user *addr,
+ unsigned long size)
+{
+ return 1;
+}
+
+/*
+ * Not all varients of the 68k family support the notion of address spaces.
+ * The traditional 680x0 parts do, and they use the sfc/dfc registers and
+ * the "moves" instruction to access user space from kernel space. Other
+ * family members like ColdFire don't support this, and only have a single
+ * address space, and use the usual "move" instruction for user space access.
+ *
+ * Outside of this difference the user space access functions are the same.
+ * So lets keep the code simple and just define in what we need to use.
+ */
+#ifdef CONFIG_CPU_HAS_ADDRESS_SPACES
+#define MOVES "moves"
#else
-#include <asm/uaccess_mm.h>
+#define MOVES "move"
#endif
-#include <asm/extable.h>
+
+extern int __put_user_bad(void);
+extern int __get_user_bad(void);
+
+#define __put_user_asm(res, x, ptr, bwl, reg, err) \
+asm volatile ("\n" \
+ "1: "MOVES"."#bwl" %2,%1\n" \
+ "2:\n" \
+ " .section .fixup,\"ax\"\n" \
+ " .even\n" \
+ "10: moveq.l %3,%0\n" \
+ " jra 2b\n" \
+ " .previous\n" \
+ "\n" \
+ " .section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 1b,10b\n" \
+ " .long 2b,10b\n" \
+ " .previous" \
+ : "+d" (res), "=m" (*(ptr)) \
+ : #reg (x), "i" (err))
+
+/*
+ * These are the main single-value transfer routines. They automatically
+ * use the right size if we just have the right pointer type.
+ */
+
+#define __put_user(x, ptr) \
+({ \
+ typeof(*(ptr)) __pu_val = (x); \
+ int __pu_err = 0; \
+ __chk_user_ptr(ptr); \
+ switch (sizeof (*(ptr))) { \
+ case 1: \
+ __put_user_asm(__pu_err, __pu_val, ptr, b, d, -EFAULT); \
+ break; \
+ case 2: \
+ __put_user_asm(__pu_err, __pu_val, ptr, w, r, -EFAULT); \
+ break; \
+ case 4: \
+ __put_user_asm(__pu_err, __pu_val, ptr, l, r, -EFAULT); \
+ break; \
+ case 8: \
+ { \
+ const void __user *__pu_ptr = (ptr); \
+ asm volatile ("\n" \
+ "1: "MOVES".l %2,(%1)+\n" \
+ "2: "MOVES".l %R2,(%1)\n" \
+ "3:\n" \
+ " .section .fixup,\"ax\"\n" \
+ " .even\n" \
+ "10: movel %3,%0\n" \
+ " jra 3b\n" \
+ " .previous\n" \
+ "\n" \
+ " .section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 1b,10b\n" \
+ " .long 2b,10b\n" \
+ " .long 3b,10b\n" \
+ " .previous" \
+ : "+d" (__pu_err), "+a" (__pu_ptr) \
+ : "r" (__pu_val), "i" (-EFAULT) \
+ : "memory"); \
+ break; \
+ } \
+ default: \
+ __pu_err = __put_user_bad(); \
+ break; \
+ } \
+ __pu_err; \
+})
+#define put_user(x, ptr) __put_user(x, ptr)
+
+
+#define __get_user_asm(res, x, ptr, type, bwl, reg, err) ({ \
+ type __gu_val; \
+ asm volatile ("\n" \
+ "1: "MOVES"."#bwl" %2,%1\n" \
+ "2:\n" \
+ " .section .fixup,\"ax\"\n" \
+ " .even\n" \
+ "10: move.l %3,%0\n" \
+ " sub.l %1,%1\n" \
+ " jra 2b\n" \
+ " .previous\n" \
+ "\n" \
+ " .section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 1b,10b\n" \
+ " .previous" \
+ : "+d" (res), "=&" #reg (__gu_val) \
+ : "m" (*(ptr)), "i" (err)); \
+ (x) = (__force typeof(*(ptr)))(__force unsigned long)__gu_val; \
+})
+
+#define __get_user(x, ptr) \
+({ \
+ int __gu_err = 0; \
+ __chk_user_ptr(ptr); \
+ switch (sizeof(*(ptr))) { \
+ case 1: \
+ __get_user_asm(__gu_err, x, ptr, u8, b, d, -EFAULT); \
+ break; \
+ case 2: \
+ __get_user_asm(__gu_err, x, ptr, u16, w, r, -EFAULT); \
+ break; \
+ case 4: \
+ __get_user_asm(__gu_err, x, ptr, u32, l, r, -EFAULT); \
+ break; \
+ case 8: { \
+ const void __user *__gu_ptr = (ptr); \
+ union { \
+ u64 l; \
+ __typeof__(*(ptr)) t; \
+ } __gu_val; \
+ asm volatile ("\n" \
+ "1: "MOVES".l (%2)+,%1\n" \
+ "2: "MOVES".l (%2),%R1\n" \
+ "3:\n" \
+ " .section .fixup,\"ax\"\n" \
+ " .even\n" \
+ "10: move.l %3,%0\n" \
+ " sub.l %1,%1\n" \
+ " sub.l %R1,%R1\n" \
+ " jra 3b\n" \
+ " .previous\n" \
+ "\n" \
+ " .section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 1b,10b\n" \
+ " .long 2b,10b\n" \
+ " .previous" \
+ : "+d" (__gu_err), "=&r" (__gu_val.l), \
+ "+a" (__gu_ptr) \
+ : "i" (-EFAULT) \
+ : "memory"); \
+ (x) = __gu_val.t; \
+ break; \
+ } \
+ default: \
+ __gu_err = __get_user_bad(); \
+ break; \
+ } \
+ __gu_err; \
+})
+#define get_user(x, ptr) __get_user(x, ptr)
+
+unsigned long __generic_copy_from_user(void *to, const void __user *from, unsigned long n);
+unsigned long __generic_copy_to_user(void __user *to, const void *from, unsigned long n);
+
+#define __suffix0
+#define __suffix1 b
+#define __suffix2 w
+#define __suffix4 l
+
+#define ____constant_copy_from_user_asm(res, to, from, tmp, n1, n2, n3, s1, s2, s3)\
+ asm volatile ("\n" \
+ "1: "MOVES"."#s1" (%2)+,%3\n" \
+ " move."#s1" %3,(%1)+\n" \
+ " .ifnc \""#s2"\",\"\"\n" \
+ "2: "MOVES"."#s2" (%2)+,%3\n" \
+ " move."#s2" %3,(%1)+\n" \
+ " .ifnc \""#s3"\",\"\"\n" \
+ "3: "MOVES"."#s3" (%2)+,%3\n" \
+ " move."#s3" %3,(%1)+\n" \
+ " .endif\n" \
+ " .endif\n" \
+ "4:\n" \
+ " .section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 1b,10f\n" \
+ " .ifnc \""#s2"\",\"\"\n" \
+ " .long 2b,20f\n" \
+ " .ifnc \""#s3"\",\"\"\n" \
+ " .long 3b,30f\n" \
+ " .endif\n" \
+ " .endif\n" \
+ " .previous\n" \
+ "\n" \
+ " .section .fixup,\"ax\"\n" \
+ " .even\n" \
+ "10: addq.l #"#n1",%0\n" \
+ " .ifnc \""#s2"\",\"\"\n" \
+ "20: addq.l #"#n2",%0\n" \
+ " .ifnc \""#s3"\",\"\"\n" \
+ "30: addq.l #"#n3",%0\n" \
+ " .endif\n" \
+ " .endif\n" \
+ " jra 4b\n" \
+ " .previous\n" \
+ : "+d" (res), "+&a" (to), "+a" (from), "=&d" (tmp) \
+ : : "memory")
+
+#define ___constant_copy_from_user_asm(res, to, from, tmp, n1, n2, n3, s1, s2, s3)\
+ ____constant_copy_from_user_asm(res, to, from, tmp, n1, n2, n3, s1, s2, s3)
+#define __constant_copy_from_user_asm(res, to, from, tmp, n1, n2, n3) \
+ ___constant_copy_from_user_asm(res, to, from, tmp, n1, n2, n3, \
+ __suffix##n1, __suffix##n2, __suffix##n3)
+
+static __always_inline unsigned long
+__constant_copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+ unsigned long res = 0, tmp;
+
+ switch (n) {
+ case 1:
+ __constant_copy_from_user_asm(res, to, from, tmp, 1, 0, 0);
+ break;
+ case 2:
+ __constant_copy_from_user_asm(res, to, from, tmp, 2, 0, 0);
+ break;
+ case 3:
+ __constant_copy_from_user_asm(res, to, from, tmp, 2, 1, 0);
+ break;
+ case 4:
+ __constant_copy_from_user_asm(res, to, from, tmp, 4, 0, 0);
+ break;
+ case 5:
+ __constant_copy_from_user_asm(res, to, from, tmp, 4, 1, 0);
+ break;
+ case 6:
+ __constant_copy_from_user_asm(res, to, from, tmp, 4, 2, 0);
+ break;
+ case 7:
+ __constant_copy_from_user_asm(res, to, from, tmp, 4, 2, 1);
+ break;
+ case 8:
+ __constant_copy_from_user_asm(res, to, from, tmp, 4, 4, 0);
+ break;
+ case 9:
+ __constant_copy_from_user_asm(res, to, from, tmp, 4, 4, 1);
+ break;
+ case 10:
+ __constant_copy_from_user_asm(res, to, from, tmp, 4, 4, 2);
+ break;
+ case 12:
+ __constant_copy_from_user_asm(res, to, from, tmp, 4, 4, 4);
+ break;
+ default:
+ /* we limit the inlined version to 3 moves */
+ return __generic_copy_from_user(to, from, n);
+ }
+
+ return res;
+}
+
+#define __constant_copy_to_user_asm(res, to, from, tmp, n, s1, s2, s3) \
+ asm volatile ("\n" \
+ " move."#s1" (%2)+,%3\n" \
+ "11: "MOVES"."#s1" %3,(%1)+\n" \
+ "12: move."#s2" (%2)+,%3\n" \
+ "21: "MOVES"."#s2" %3,(%1)+\n" \
+ "22:\n" \
+ " .ifnc \""#s3"\",\"\"\n" \
+ " move."#s3" (%2)+,%3\n" \
+ "31: "MOVES"."#s3" %3,(%1)+\n" \
+ "32:\n" \
+ " .endif\n" \
+ "4:\n" \
+ "\n" \
+ " .section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 11b,5f\n" \
+ " .long 12b,5f\n" \
+ " .long 21b,5f\n" \
+ " .long 22b,5f\n" \
+ " .ifnc \""#s3"\",\"\"\n" \
+ " .long 31b,5f\n" \
+ " .long 32b,5f\n" \
+ " .endif\n" \
+ " .previous\n" \
+ "\n" \
+ " .section .fixup,\"ax\"\n" \
+ " .even\n" \
+ "5: moveq.l #"#n",%0\n" \
+ " jra 4b\n" \
+ " .previous\n" \
+ : "+d" (res), "+a" (to), "+a" (from), "=&d" (tmp) \
+ : : "memory")
+
+static __always_inline unsigned long
+__constant_copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+ unsigned long res = 0, tmp;
+
+ switch (n) {
+ case 1:
+ __put_user_asm(res, *(u8 *)from, (u8 __user *)to, b, d, 1);
+ break;
+ case 2:
+ __put_user_asm(res, *(u16 *)from, (u16 __user *)to, w, r, 2);
+ break;
+ case 3:
+ __constant_copy_to_user_asm(res, to, from, tmp, 3, w, b,);
+ break;
+ case 4:
+ __put_user_asm(res, *(u32 *)from, (u32 __user *)to, l, r, 4);
+ break;
+ case 5:
+ __constant_copy_to_user_asm(res, to, from, tmp, 5, l, b,);
+ break;
+ case 6:
+ __constant_copy_to_user_asm(res, to, from, tmp, 6, l, w,);
+ break;
+ case 7:
+ __constant_copy_to_user_asm(res, to, from, tmp, 7, l, w, b);
+ break;
+ case 8:
+ __constant_copy_to_user_asm(res, to, from, tmp, 8, l, l,);
+ break;
+ case 9:
+ __constant_copy_to_user_asm(res, to, from, tmp, 9, l, l, b);
+ break;
+ case 10:
+ __constant_copy_to_user_asm(res, to, from, tmp, 10, l, l, w);
+ break;
+ case 12:
+ __constant_copy_to_user_asm(res, to, from, tmp, 12, l, l, l);
+ break;
+ default:
+ /* limit the inlined version to 3 moves */
+ return __generic_copy_to_user(to, from, n);
+ }
+
+ return res;
+}
+
+static inline unsigned long
+raw_copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+ if (__builtin_constant_p(n))
+ return __constant_copy_from_user(to, from, n);
+ return __generic_copy_from_user(to, from, n);
+}
+
+static inline unsigned long
+raw_copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+ if (__builtin_constant_p(n))
+ return __constant_copy_to_user(to, from, n);
+ return __generic_copy_to_user(to, from, n);
+}
+#define INLINE_COPY_FROM_USER
+#define INLINE_COPY_TO_USER
+
+#define user_addr_max() \
+ (uaccess_kernel() ? ~0UL : TASK_SIZE)
+
+extern long strncpy_from_user(char *dst, const char __user *src, long count);
+extern __must_check long strnlen_user(const char __user *str, long n);
+
+unsigned long __clear_user(void __user *to, unsigned long n);
+
+#define clear_user __clear_user
+
+#else /* !CONFIG_MMU */
+#include <asm-generic/uaccess.h>
+#endif
+
+#endif /* _M68K_UACCESS_H */
diff --git a/arch/m68k/include/asm/uaccess_mm.h b/arch/m68k/include/asm/uaccess_mm.h
deleted file mode 100644
index 9ae9f8d05925..000000000000
--- a/arch/m68k/include/asm/uaccess_mm.h
+++ /dev/null
@@ -1,390 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __M68K_UACCESS_H
-#define __M68K_UACCESS_H
-
-/*
- * User space memory access functions
- */
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <asm/segment.h>
-
-/* We let the MMU do all checking */
-static inline int access_ok(const void __user *addr,
- unsigned long size)
-{
- return 1;
-}
-
-/*
- * Not all varients of the 68k family support the notion of address spaces.
- * The traditional 680x0 parts do, and they use the sfc/dfc registers and
- * the "moves" instruction to access user space from kernel space. Other
- * family members like ColdFire don't support this, and only have a single
- * address space, and use the usual "move" instruction for user space access.
- *
- * Outside of this difference the user space access functions are the same.
- * So lets keep the code simple and just define in what we need to use.
- */
-#ifdef CONFIG_CPU_HAS_ADDRESS_SPACES
-#define MOVES "moves"
-#else
-#define MOVES "move"
-#endif
-
-extern int __put_user_bad(void);
-extern int __get_user_bad(void);
-
-#define __put_user_asm(res, x, ptr, bwl, reg, err) \
-asm volatile ("\n" \
- "1: "MOVES"."#bwl" %2,%1\n" \
- "2:\n" \
- " .section .fixup,\"ax\"\n" \
- " .even\n" \
- "10: moveq.l %3,%0\n" \
- " jra 2b\n" \
- " .previous\n" \
- "\n" \
- " .section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 1b,10b\n" \
- " .long 2b,10b\n" \
- " .previous" \
- : "+d" (res), "=m" (*(ptr)) \
- : #reg (x), "i" (err))
-
-/*
- * These are the main single-value transfer routines. They automatically
- * use the right size if we just have the right pointer type.
- */
-
-#define __put_user(x, ptr) \
-({ \
- typeof(*(ptr)) __pu_val = (x); \
- int __pu_err = 0; \
- __chk_user_ptr(ptr); \
- switch (sizeof (*(ptr))) { \
- case 1: \
- __put_user_asm(__pu_err, __pu_val, ptr, b, d, -EFAULT); \
- break; \
- case 2: \
- __put_user_asm(__pu_err, __pu_val, ptr, w, r, -EFAULT); \
- break; \
- case 4: \
- __put_user_asm(__pu_err, __pu_val, ptr, l, r, -EFAULT); \
- break; \
- case 8: \
- { \
- const void __user *__pu_ptr = (ptr); \
- asm volatile ("\n" \
- "1: "MOVES".l %2,(%1)+\n" \
- "2: "MOVES".l %R2,(%1)\n" \
- "3:\n" \
- " .section .fixup,\"ax\"\n" \
- " .even\n" \
- "10: movel %3,%0\n" \
- " jra 3b\n" \
- " .previous\n" \
- "\n" \
- " .section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 1b,10b\n" \
- " .long 2b,10b\n" \
- " .long 3b,10b\n" \
- " .previous" \
- : "+d" (__pu_err), "+a" (__pu_ptr) \
- : "r" (__pu_val), "i" (-EFAULT) \
- : "memory"); \
- break; \
- } \
- default: \
- __pu_err = __put_user_bad(); \
- break; \
- } \
- __pu_err; \
-})
-#define put_user(x, ptr) __put_user(x, ptr)
-
-
-#define __get_user_asm(res, x, ptr, type, bwl, reg, err) ({ \
- type __gu_val; \
- asm volatile ("\n" \
- "1: "MOVES"."#bwl" %2,%1\n" \
- "2:\n" \
- " .section .fixup,\"ax\"\n" \
- " .even\n" \
- "10: move.l %3,%0\n" \
- " sub.l %1,%1\n" \
- " jra 2b\n" \
- " .previous\n" \
- "\n" \
- " .section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 1b,10b\n" \
- " .previous" \
- : "+d" (res), "=&" #reg (__gu_val) \
- : "m" (*(ptr)), "i" (err)); \
- (x) = (__force typeof(*(ptr)))(__force unsigned long)__gu_val; \
-})
-
-#define __get_user(x, ptr) \
-({ \
- int __gu_err = 0; \
- __chk_user_ptr(ptr); \
- switch (sizeof(*(ptr))) { \
- case 1: \
- __get_user_asm(__gu_err, x, ptr, u8, b, d, -EFAULT); \
- break; \
- case 2: \
- __get_user_asm(__gu_err, x, ptr, u16, w, r, -EFAULT); \
- break; \
- case 4: \
- __get_user_asm(__gu_err, x, ptr, u32, l, r, -EFAULT); \
- break; \
- case 8: { \
- const void __user *__gu_ptr = (ptr); \
- union { \
- u64 l; \
- __typeof__(*(ptr)) t; \
- } __gu_val; \
- asm volatile ("\n" \
- "1: "MOVES".l (%2)+,%1\n" \
- "2: "MOVES".l (%2),%R1\n" \
- "3:\n" \
- " .section .fixup,\"ax\"\n" \
- " .even\n" \
- "10: move.l %3,%0\n" \
- " sub.l %1,%1\n" \
- " sub.l %R1,%R1\n" \
- " jra 3b\n" \
- " .previous\n" \
- "\n" \
- " .section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 1b,10b\n" \
- " .long 2b,10b\n" \
- " .previous" \
- : "+d" (__gu_err), "=&r" (__gu_val.l), \
- "+a" (__gu_ptr) \
- : "i" (-EFAULT) \
- : "memory"); \
- (x) = __gu_val.t; \
- break; \
- } \
- default: \
- __gu_err = __get_user_bad(); \
- break; \
- } \
- __gu_err; \
-})
-#define get_user(x, ptr) __get_user(x, ptr)
-
-unsigned long __generic_copy_from_user(void *to, const void __user *from, unsigned long n);
-unsigned long __generic_copy_to_user(void __user *to, const void *from, unsigned long n);
-
-#define __suffix0
-#define __suffix1 b
-#define __suffix2 w
-#define __suffix4 l
-
-#define ____constant_copy_from_user_asm(res, to, from, tmp, n1, n2, n3, s1, s2, s3)\
- asm volatile ("\n" \
- "1: "MOVES"."#s1" (%2)+,%3\n" \
- " move."#s1" %3,(%1)+\n" \
- " .ifnc \""#s2"\",\"\"\n" \
- "2: "MOVES"."#s2" (%2)+,%3\n" \
- " move."#s2" %3,(%1)+\n" \
- " .ifnc \""#s3"\",\"\"\n" \
- "3: "MOVES"."#s3" (%2)+,%3\n" \
- " move."#s3" %3,(%1)+\n" \
- " .endif\n" \
- " .endif\n" \
- "4:\n" \
- " .section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 1b,10f\n" \
- " .ifnc \""#s2"\",\"\"\n" \
- " .long 2b,20f\n" \
- " .ifnc \""#s3"\",\"\"\n" \
- " .long 3b,30f\n" \
- " .endif\n" \
- " .endif\n" \
- " .previous\n" \
- "\n" \
- " .section .fixup,\"ax\"\n" \
- " .even\n" \
- "10: addq.l #"#n1",%0\n" \
- " .ifnc \""#s2"\",\"\"\n" \
- "20: addq.l #"#n2",%0\n" \
- " .ifnc \""#s3"\",\"\"\n" \
- "30: addq.l #"#n3",%0\n" \
- " .endif\n" \
- " .endif\n" \
- " jra 4b\n" \
- " .previous\n" \
- : "+d" (res), "+&a" (to), "+a" (from), "=&d" (tmp) \
- : : "memory")
-
-#define ___constant_copy_from_user_asm(res, to, from, tmp, n1, n2, n3, s1, s2, s3)\
- ____constant_copy_from_user_asm(res, to, from, tmp, n1, n2, n3, s1, s2, s3)
-#define __constant_copy_from_user_asm(res, to, from, tmp, n1, n2, n3) \
- ___constant_copy_from_user_asm(res, to, from, tmp, n1, n2, n3, \
- __suffix##n1, __suffix##n2, __suffix##n3)
-
-static __always_inline unsigned long
-__constant_copy_from_user(void *to, const void __user *from, unsigned long n)
-{
- unsigned long res = 0, tmp;
-
- switch (n) {
- case 1:
- __constant_copy_from_user_asm(res, to, from, tmp, 1, 0, 0);
- break;
- case 2:
- __constant_copy_from_user_asm(res, to, from, tmp, 2, 0, 0);
- break;
- case 3:
- __constant_copy_from_user_asm(res, to, from, tmp, 2, 1, 0);
- break;
- case 4:
- __constant_copy_from_user_asm(res, to, from, tmp, 4, 0, 0);
- break;
- case 5:
- __constant_copy_from_user_asm(res, to, from, tmp, 4, 1, 0);
- break;
- case 6:
- __constant_copy_from_user_asm(res, to, from, tmp, 4, 2, 0);
- break;
- case 7:
- __constant_copy_from_user_asm(res, to, from, tmp, 4, 2, 1);
- break;
- case 8:
- __constant_copy_from_user_asm(res, to, from, tmp, 4, 4, 0);
- break;
- case 9:
- __constant_copy_from_user_asm(res, to, from, tmp, 4, 4, 1);
- break;
- case 10:
- __constant_copy_from_user_asm(res, to, from, tmp, 4, 4, 2);
- break;
- case 12:
- __constant_copy_from_user_asm(res, to, from, tmp, 4, 4, 4);
- break;
- default:
- /* we limit the inlined version to 3 moves */
- return __generic_copy_from_user(to, from, n);
- }
-
- return res;
-}
-
-#define __constant_copy_to_user_asm(res, to, from, tmp, n, s1, s2, s3) \
- asm volatile ("\n" \
- " move."#s1" (%2)+,%3\n" \
- "11: "MOVES"."#s1" %3,(%1)+\n" \
- "12: move."#s2" (%2)+,%3\n" \
- "21: "MOVES"."#s2" %3,(%1)+\n" \
- "22:\n" \
- " .ifnc \""#s3"\",\"\"\n" \
- " move."#s3" (%2)+,%3\n" \
- "31: "MOVES"."#s3" %3,(%1)+\n" \
- "32:\n" \
- " .endif\n" \
- "4:\n" \
- "\n" \
- " .section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 11b,5f\n" \
- " .long 12b,5f\n" \
- " .long 21b,5f\n" \
- " .long 22b,5f\n" \
- " .ifnc \""#s3"\",\"\"\n" \
- " .long 31b,5f\n" \
- " .long 32b,5f\n" \
- " .endif\n" \
- " .previous\n" \
- "\n" \
- " .section .fixup,\"ax\"\n" \
- " .even\n" \
- "5: moveq.l #"#n",%0\n" \
- " jra 4b\n" \
- " .previous\n" \
- : "+d" (res), "+a" (to), "+a" (from), "=&d" (tmp) \
- : : "memory")
-
-static __always_inline unsigned long
-__constant_copy_to_user(void __user *to, const void *from, unsigned long n)
-{
- unsigned long res = 0, tmp;
-
- switch (n) {
- case 1:
- __put_user_asm(res, *(u8 *)from, (u8 __user *)to, b, d, 1);
- break;
- case 2:
- __put_user_asm(res, *(u16 *)from, (u16 __user *)to, w, r, 2);
- break;
- case 3:
- __constant_copy_to_user_asm(res, to, from, tmp, 3, w, b,);
- break;
- case 4:
- __put_user_asm(res, *(u32 *)from, (u32 __user *)to, l, r, 4);
- break;
- case 5:
- __constant_copy_to_user_asm(res, to, from, tmp, 5, l, b,);
- break;
- case 6:
- __constant_copy_to_user_asm(res, to, from, tmp, 6, l, w,);
- break;
- case 7:
- __constant_copy_to_user_asm(res, to, from, tmp, 7, l, w, b);
- break;
- case 8:
- __constant_copy_to_user_asm(res, to, from, tmp, 8, l, l,);
- break;
- case 9:
- __constant_copy_to_user_asm(res, to, from, tmp, 9, l, l, b);
- break;
- case 10:
- __constant_copy_to_user_asm(res, to, from, tmp, 10, l, l, w);
- break;
- case 12:
- __constant_copy_to_user_asm(res, to, from, tmp, 12, l, l, l);
- break;
- default:
- /* limit the inlined version to 3 moves */
- return __generic_copy_to_user(to, from, n);
- }
-
- return res;
-}
-
-static inline unsigned long
-raw_copy_from_user(void *to, const void __user *from, unsigned long n)
-{
- if (__builtin_constant_p(n))
- return __constant_copy_from_user(to, from, n);
- return __generic_copy_from_user(to, from, n);
-}
-
-static inline unsigned long
-raw_copy_to_user(void __user *to, const void *from, unsigned long n)
-{
- if (__builtin_constant_p(n))
- return __constant_copy_to_user(to, from, n);
- return __generic_copy_to_user(to, from, n);
-}
-#define INLINE_COPY_FROM_USER
-#define INLINE_COPY_TO_USER
-
-#define user_addr_max() \
- (uaccess_kernel() ? ~0UL : TASK_SIZE)
-
-extern long strncpy_from_user(char *dst, const char __user *src, long count);
-extern __must_check long strnlen_user(const char __user *str, long n);
-
-unsigned long __clear_user(void __user *to, unsigned long n);
-
-#define clear_user __clear_user
-
-#endif /* _M68K_UACCESS_H */
diff --git a/arch/m68k/include/asm/uaccess_no.h b/arch/m68k/include/asm/uaccess_no.h
deleted file mode 100644
index dcfb69361408..000000000000
--- a/arch/m68k/include/asm/uaccess_no.h
+++ /dev/null
@@ -1,160 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __M68KNOMMU_UACCESS_H
-#define __M68KNOMMU_UACCESS_H
-
-/*
- * User space memory access functions
- */
-#include <linux/string.h>
-
-#include <asm/segment.h>
-
-#define access_ok(addr,size) _access_ok((unsigned long)(addr),(size))
-
-/*
- * It is not enough to just have access_ok check for a real RAM address.
- * This would disallow the case of code/ro-data running XIP in flash/rom.
- * Ideally we would check the possible flash ranges too, but that is
- * currently not so easy.
- */
-static inline int _access_ok(unsigned long addr, unsigned long size)
-{
- return 1;
-}
-
-/*
- * These are the main single-value transfer routines. They automatically
- * use the right size if we just have the right pointer type.
- */
-
-#define put_user(x, ptr) \
-({ \
- int __pu_err = 0; \
- typeof(*(ptr)) __pu_val = (x); \
- switch (sizeof (*(ptr))) { \
- case 1: \
- __put_user_asm(__pu_err, __pu_val, ptr, b); \
- break; \
- case 2: \
- __put_user_asm(__pu_err, __pu_val, ptr, w); \
- break; \
- case 4: \
- __put_user_asm(__pu_err, __pu_val, ptr, l); \
- break; \
- case 8: \
- memcpy((void __force *)ptr, &__pu_val, sizeof(*(ptr))); \
- break; \
- default: \
- __pu_err = __put_user_bad(); \
- break; \
- } \
- __pu_err; \
-})
-#define __put_user(x, ptr) put_user(x, ptr)
-
-extern int __put_user_bad(void);
-
-/*
- * Tell gcc we read from memory instead of writing: this is because
- * we do not write to any memory gcc knows about, so there are no
- * aliasing issues.
- */
-
-#define __ptr(x) ((unsigned long __user *)(x))
-
-#define __put_user_asm(err,x,ptr,bwl) \
- __asm__ ("move" #bwl " %0,%1" \
- : /* no outputs */ \
- :"d" (x),"m" (*__ptr(ptr)) : "memory")
-
-#define get_user(x, ptr) \
-({ \
- int __gu_err = 0; \
- switch (sizeof(*(ptr))) { \
- case 1: \
- __get_user_asm(__gu_err, x, ptr, b, "=d"); \
- break; \
- case 2: \
- __get_user_asm(__gu_err, x, ptr, w, "=r"); \
- break; \
- case 4: \
- __get_user_asm(__gu_err, x, ptr, l, "=r"); \
- break; \
- case 8: { \
- union { \
- u64 l; \
- __typeof__(*(ptr)) t; \
- } __gu_val; \
- memcpy(&__gu_val.l, (const void __force *)ptr, sizeof(__gu_val.l)); \
- (x) = __gu_val.t; \
- break; \
- } \
- default: \
- __gu_err = __get_user_bad(); \
- break; \
- } \
- __gu_err; \
-})
-#define __get_user(x, ptr) get_user(x, ptr)
-
-extern int __get_user_bad(void);
-
-#define __get_user_asm(err,x,ptr,bwl,reg) \
- __asm__ ("move" #bwl " %1,%0" \
- : "=d" (x) \
- : "m" (*__ptr(ptr)))
-
-static inline unsigned long
-raw_copy_from_user(void *to, const void __user *from, unsigned long n)
-{
- memcpy(to, (__force const void *)from, n);
- return 0;
-}
-
-static inline unsigned long
-raw_copy_to_user(void __user *to, const void *from, unsigned long n)
-{
- memcpy((__force void *)to, from, n);
- return 0;
-}
-#define INLINE_COPY_FROM_USER
-#define INLINE_COPY_TO_USER
-
-/*
- * Copy a null terminated string from userspace.
- */
-
-static inline long
-strncpy_from_user(char *dst, const char *src, long count)
-{
- char *tmp;
- strncpy(dst, src, count);
- for (tmp = dst; *tmp && count > 0; tmp++, count--)
- ;
- return(tmp - dst); /* DAVIDM should we count a NUL ? check getname */
-}
-
-/*
- * Return the size of a string (including the ending 0)
- *
- * Return 0 on exception, a value greater than N if too long
- */
-static inline long strnlen_user(const char *src, long n)
-{
- return(strlen(src) + 1); /* DAVIDM make safer */
-}
-
-/*
- * Zero Userspace
- */
-
-static inline unsigned long
-__clear_user(void *to, unsigned long n)
-{
- memset(to, 0, n);
- return 0;
-}
-
-#define clear_user(to,n) __clear_user(to,n)
-
-#endif /* _M68KNOMMU_UACCESS_H */
diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c
index a98fca977073..a85f59bc1c35 100644
--- a/arch/m68k/kernel/signal.c
+++ b/arch/m68k/kernel/signal.c
@@ -920,7 +920,8 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set,
err |= __put_user(0x70004e40 + (__NR_sigreturn << 16),
(long __user *)(frame->retcode));
#else
- err |= __put_user((void *) ret_from_user_signal, &frame->pretcode);
+ err |= __put_user((long) ret_from_user_signal,
+ (long __user *) &frame->pretcode);
#endif
if (err)
@@ -1004,7 +1005,8 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
err |= __put_user(0x4e40, (short __user *)(frame->retcode + 4));
#endif
#else
- err |= __put_user((void *) ret_from_user_rt_signal, &frame->pretcode);
+ err |= __put_user((long) ret_from_user_rt_signal,
+ (long __user *) &frame->pretcode);
#endif /* CONFIG_MMU */
if (err)
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index 81fc799d8392..625fb6d32842 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -439,3 +439,4 @@
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
+440 common process_madvise sys_process_madvise
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index b4e263916f41..aae729c95cf9 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -445,3 +445,4 @@
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
+440 common process_madvise sys_process_madvise
diff --git a/arch/mips/Kbuild.platforms b/arch/mips/Kbuild.platforms
index a13c4cf6e608..5483e38b5dc7 100644
--- a/arch/mips/Kbuild.platforms
+++ b/arch/mips/Kbuild.platforms
@@ -13,7 +13,6 @@ platform-$(CONFIG_MIPS_COBALT) += cobalt/
platform-$(CONFIG_MACH_DECSTATION) += dec/
platform-$(CONFIG_MIPS_GENERIC) += generic/
platform-$(CONFIG_MACH_JAZZ) += jazz/
-platform-$(CONFIG_MACH_INGENIC) += jz4740/
platform-$(CONFIG_LANTIQ) += lantiq/
platform-$(CONFIG_MACH_LOONGSON2EF) += loongson2ef/
platform-$(CONFIG_MACH_LOONGSON32) += loongson32/
@@ -22,7 +21,6 @@ platform-$(CONFIG_MIPS_MALTA) += mti-malta/
platform-$(CONFIG_NLM_COMMON) += netlogic/
platform-$(CONFIG_PIC32MZDA) += pic32/
platform-$(CONFIG_MACH_PISTACHIO) += pistachio/
-platform-$(CONFIG_SOC_PNX833X) += pnx833x/
platform-$(CONFIG_RALINK) += ralink/
platform-$(CONFIG_MIKROTIK_RB532) += rb532/
platform-$(CONFIG_SGI_IP22) += sgi-ip22/
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index c695d103bf6d..bc04cf000e94 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -94,14 +94,34 @@ config MIPS
config MIPS_FIXUP_BIGPHYS_ADDR
bool
+config MIPS_GENERIC
+ bool
+
+config MACH_INGENIC
+ bool
+ select SYS_SUPPORTS_32BIT_KERNEL
+ select SYS_SUPPORTS_LITTLE_ENDIAN
+ select SYS_SUPPORTS_ZBOOT
+ select DMA_NONCOHERENT
+ select IRQ_MIPS_CPU
+ select PINCTRL
+ select GPIOLIB
+ select COMMON_CLK
+ select GENERIC_IRQ_CHIP
+ select BUILTIN_DTB if MIPS_NO_APPENDED_DTB
+ select USE_OF
+ select CPU_SUPPORTS_CPUFREQ
+ select MIPS_EXTERNAL_TIMER
+
menu "Machine selection"
choice
prompt "System type"
- default MIPS_GENERIC
+ default MIPS_GENERIC_KERNEL
-config MIPS_GENERIC
+config MIPS_GENERIC_KERNEL
bool "Generic board-agnostic MIPS kernel"
+ select MIPS_GENERIC
select BOOT_RAW
select BUILTIN_DTB
select CEVT_R4K
@@ -138,6 +158,7 @@ config MIPS_GENERIC
select SYS_SUPPORTS_MULTITHREADING
select SYS_SUPPORTS_RELOCATABLE
select SYS_SUPPORTS_SMARTMIPS
+ select SYS_SUPPORTS_ZBOOT
select UHI_BOOT
select USB_EHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN
select USB_EHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
@@ -390,20 +411,11 @@ config MACH_JAZZ
Members include the Acer PICA, MIPS Magnum 4000, MIPS Millennium and
Olivetti M700-10 workstations.
-config MACH_INGENIC
+config MACH_INGENIC_SOC
bool "Ingenic SoC based machines"
- select SYS_SUPPORTS_32BIT_KERNEL
- select SYS_SUPPORTS_LITTLE_ENDIAN
+ select MIPS_GENERIC
+ select MACH_INGENIC
select SYS_SUPPORTS_ZBOOT_UART16550
- select CPU_SUPPORTS_HUGEPAGES
- select DMA_NONCOHERENT
- select IRQ_MIPS_CPU
- select PINCTRL
- select GPIOLIB
- select COMMON_CLK
- select GENERIC_IRQ_CHIP
- select BUILTIN_DTB if MIPS_NO_APPENDED_DTB
- select USE_OF
config LANTIQ
bool "Lantiq based platforms"
@@ -476,6 +488,7 @@ config MACH_LOONGSON64
select SYS_SUPPORTS_ZBOOT
select ZONE_DMA32
select NUMA
+ select SMP
select COMMON_CLK
select USE_OF
select BUILTIN_DTB
@@ -569,6 +582,7 @@ config MIPS_MALTA
select SYS_SUPPORTS_VPE_LOADER
select SYS_SUPPORTS_ZBOOT
select USE_OF
+ select WAR_ICACHE_REFILLS
select ZONE_DMA32 if 64BIT
help
This enables support for the MIPS Technologies Malta evaluation
@@ -590,19 +604,6 @@ config MACH_VR41XX
select SYS_SUPPORTS_MIPS16
select GPIOLIB
-config NXP_STB220
- bool "NXP STB220 board"
- select SOC_PNX833X
- help
- Support for NXP Semiconductors STB220 Development Board.
-
-config NXP_STB225
- bool "NXP 225 board"
- select SOC_PNX833X
- select SOC_PNX8335
- help
- Support for NXP Semiconductors STB225 Development Board.
-
config RALINK
bool "Ralink based machines"
select CEVT_R4K
@@ -616,6 +617,7 @@ config RALINK
select SYS_SUPPORTS_32BIT_KERNEL
select SYS_SUPPORTS_LITTLE_ENDIAN
select SYS_SUPPORTS_MIPS16
+ select SYS_SUPPORTS_ZBOOT
select SYS_HAS_EARLY_PRINTK
select CLKDEV_LOOKUP
select ARCH_HAS_RESET_CONTROLLER
@@ -652,6 +654,9 @@ config SGI_IP22
select SYS_SUPPORTS_32BIT_KERNEL
select SYS_SUPPORTS_64BIT_KERNEL
select SYS_SUPPORTS_BIG_ENDIAN
+ select WAR_R4600_V1_INDEX_ICACHEOP
+ select WAR_R4600_V1_HIT_CACHEOP
+ select WAR_R4600_V2_HIT_CACHEOP
select MIPS_L1_CACHE_SHIFT_7
help
This are the SGI Indy, Challenge S and Indigo2, as well as certain
@@ -679,6 +684,7 @@ config SGI_IP27
select SYS_SUPPORTS_BIG_ENDIAN
select SYS_SUPPORTS_NUMA
select SYS_SUPPORTS_SMP
+ select WAR_R10000_LLSC
select MIPS_L1_CACHE_SHIFT_7
select NUMA
help
@@ -714,6 +720,7 @@ config SGI_IP28
select SYS_HAS_EARLY_PRINTK
select SYS_SUPPORTS_64BIT_KERNEL
select SYS_SUPPORTS_BIG_ENDIAN
+ select WAR_R10000_LLSC
select MIPS_L1_CACHE_SHIFT_7
help
This is the SGI Indigo2 with R10000 processor. To compile a Linux
@@ -740,6 +747,7 @@ config SGI_IP30
select SYS_SUPPORTS_64BIT_KERNEL
select SYS_SUPPORTS_BIG_ENDIAN
select SYS_SUPPORTS_SMP
+ select WAR_R10000_LLSC
select MIPS_L1_CACHE_SHIFT_7
select ARC_MEMORY
help
@@ -767,6 +775,7 @@ config SGI_IP32
select SYS_HAS_CPU_NEVADA
select SYS_SUPPORTS_64BIT_KERNEL
select SYS_SUPPORTS_BIG_ENDIAN
+ select WAR_ICACHE_REFILLS
help
If you want this kernel to run on SGI O2 workstation, say Y here.
@@ -890,6 +899,7 @@ config SNI_RM
select SYS_SUPPORTS_BIG_ENDIAN
select SYS_SUPPORTS_HIGHMEM
select SYS_SUPPORTS_LITTLE_ENDIAN
+ select WAR_R4600_V2_HIT_CACHEOP
help
The SNI RM200/300/400 are MIPS-based machines manufactured by
Siemens Nixdorf Informationssysteme (SNI), parent company of Pyramid
@@ -901,6 +911,7 @@ config MACH_TX39XX
config MACH_TX49XX
bool "Toshiba TX49 series based machines"
+ select WAR_TX49XX_ICACHE_INDEX_INV
config MIKROTIK_RB532
bool "Mikrotik RB532 boards"
@@ -1026,8 +1037,8 @@ source "arch/mips/bcm47xx/Kconfig"
source "arch/mips/bcm63xx/Kconfig"
source "arch/mips/bmips/Kconfig"
source "arch/mips/generic/Kconfig"
+source "arch/mips/ingenic/Kconfig"
source "arch/mips/jazz/Kconfig"
-source "arch/mips/jz4740/Kconfig"
source "arch/mips/lantiq/Kconfig"
source "arch/mips/pic32/Kconfig"
source "arch/mips/pistachio/Kconfig"
@@ -1267,23 +1278,6 @@ config PCI_XTALK_BRIDGE
config NO_EXCEPT_FILL
bool
-config SOC_PNX833X
- bool
- select CEVT_R4K
- select CSRC_R4K
- select IRQ_MIPS_CPU
- select DMA_NONCOHERENT
- select SYS_HAS_CPU_MIPS32_R2
- select SYS_SUPPORTS_32BIT_KERNEL
- select SYS_SUPPORTS_LITTLE_ENDIAN
- select SYS_SUPPORTS_BIG_ENDIAN
- select SYS_SUPPORTS_MIPS16
- select CPU_MIPSR2_IRQ_VI
-
-config SOC_PNX8335
- bool
- select SOC_PNX833X
-
config MIPS_SPRAM
bool
@@ -1620,7 +1614,6 @@ config CPU_P5600
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_HIGHMEM
select CPU_SUPPORTS_MSA
- select CPU_SUPPORTS_UNCACHED_ACCELERATED
select CPU_SUPPORTS_CPUFREQ
select CPU_MIPSR2_IRQ_VI
select CPU_MIPSR2_IRQ_EI
@@ -1891,6 +1884,7 @@ config SYS_SUPPORTS_ZBOOT
select HAVE_KERNEL_LZMA
select HAVE_KERNEL_LZO
select HAVE_KERNEL_XZ
+ select HAVE_KERNEL_ZSTD
config SYS_SUPPORTS_ZBOOT_UART16550
bool
@@ -2272,7 +2266,7 @@ config FORCE_MAX_ZONEORDER
default "13" if MIPS_HUGE_TLB_SUPPORT && PAGE_SIZE_32KB
range 12 64 if MIPS_HUGE_TLB_SUPPORT && PAGE_SIZE_16KB
default "12" if MIPS_HUGE_TLB_SUPPORT && PAGE_SIZE_16KB
- range 11 64
+ range 0 64
default "11"
help
The kernel memory allocator divides physically contiguous memory
@@ -2638,6 +2632,76 @@ config MIPS_ASID_BITS_VARIABLE
config MIPS_CRC_SUPPORT
bool
+# R4600 erratum. Due to the lack of errata information the exact
+# technical details aren't known. I've experimentally found that disabling
+# interrupts during indexed I-cache flushes seems to be sufficient to deal
+# with the issue.
+config WAR_R4600_V1_INDEX_ICACHEOP
+ bool
+
+# Pleasures of the R4600 V1.x. Cite from the IDT R4600 V1.7 errata:
+#
+# 18. The CACHE instructions Hit_Writeback_Invalidate_D, Hit_Writeback_D,
+# Hit_Invalidate_D and Create_Dirty_Excl_D should only be
+# executed if there is no other dcache activity. If the dcache is
+# accessed for another instruction immeidately preceding when these
+# cache instructions are executing, it is possible that the dcache
+# tag match outputs used by these cache instructions will be
+# incorrect. These cache instructions should be preceded by at least
+# four instructions that are not any kind of load or store
+# instruction.
+#
+# This is not allowed: lw
+# nop
+# nop
+# nop
+# cache Hit_Writeback_Invalidate_D
+#
+# This is allowed: lw
+# nop
+# nop
+# nop
+# nop
+# cache Hit_Writeback_Invalidate_D
+config WAR_R4600_V1_HIT_CACHEOP
+ bool
+
+# Writeback and invalidate the primary cache dcache before DMA.
+#
+# R4600 v2.0 bug: "The CACHE instructions Hit_Writeback_Inv_D,
+# Hit_Writeback_D, Hit_Invalidate_D and Create_Dirty_Exclusive_D will only
+# operate correctly if the internal data cache refill buffer is empty. These
+# CACHE instructions should be separated from any potential data cache miss
+# by a load instruction to an uncached address to empty the response buffer."
+# (Revision 2.0 device errata from IDT available on https://www.idt.com/
+# in .pdf format.)
+config WAR_R4600_V2_HIT_CACHEOP
+ bool
+
+# From TX49/H2 manual: "If the instruction (i.e. CACHE) is issued for
+# the line which this instruction itself exists, the following
+# operation is not guaranteed."
+#
+# Workaround: do two phase flushing for Index_Invalidate_I
+config WAR_TX49XX_ICACHE_INDEX_INV
+ bool
+
+# The RM7000 processors and the E9000 cores have a bug (though PMC-Sierra
+# opposes it being called that) where invalid instructions in the same
+# I-cache line worth of instructions being fetched may case spurious
+# exceptions.
+config WAR_ICACHE_REFILLS
+ bool
+
+# On the R10000 up to version 2.6 (not sure about 2.7) there is a bug that
+# may cause ll / sc and lld / scd sequences to execute non-atomically.
+config WAR_R10000_LLSC
+ bool
+
+# 34K core erratum: "Problems Executing the TLBR Instruction"
+config WAR_MIPS34K_MISSED_ITLB
+ bool
+
#
# - Highmem only makes sense for the 32-bit kernel.
# - The current highmem code will only work properly on physically indexed
diff --git a/arch/mips/alchemy/Kconfig b/arch/mips/alchemy/Kconfig
index 83b288b95b16..69734120ada1 100644
--- a/arch/mips/alchemy/Kconfig
+++ b/arch/mips/alchemy/Kconfig
@@ -1,12 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
-# au1000-style gpio and interrupt controllers
-config ALCHEMY_GPIOINT_AU1000
- bool
-
-# au1300-style GPIO/INT controller
-config ALCHEMY_GPIOINT_AU1300
- bool
-
choice
prompt "Machine type"
depends on MIPS_ALCHEMY
@@ -15,7 +7,6 @@ choice
config MIPS_MTX1
bool "4G Systems MTX-1 board"
select HAVE_PCI
- select ALCHEMY_GPIOINT_AU1000
select SYS_SUPPORTS_LITTLE_ENDIAN
select SYS_HAS_EARLY_PRINTK
@@ -33,13 +24,11 @@ config MIPS_DB1XXX
config MIPS_XXS1500
bool "MyCable XXS1500 board"
- select ALCHEMY_GPIOINT_AU1000
select SYS_SUPPORTS_LITTLE_ENDIAN
select SYS_HAS_EARLY_PRINTK
config MIPS_GPR
bool "Trapeze ITS GPR board"
- select ALCHEMY_GPIOINT_AU1000
select HAVE_PCI
select SYS_SUPPORTS_LITTLE_ENDIAN
select SYS_HAS_EARLY_PRINTK
diff --git a/arch/mips/alchemy/board-gpr.c b/arch/mips/alchemy/board-gpr.c
index 6c47318946e4..f587c40b6d00 100644
--- a/arch/mips/alchemy/board-gpr.c
+++ b/arch/mips/alchemy/board-gpr.c
@@ -31,23 +31,6 @@ const char *get_system_type(void)
return "GPR";
}
-void __init prom_init(void)
-{
- unsigned char *memsize_str;
- unsigned long memsize;
-
- prom_argc = fw_arg0;
- prom_argv = (char **)fw_arg1;
- prom_envp = (char **)fw_arg2;
-
- prom_init_cmdline();
-
- memsize_str = prom_getenv("memsize");
- if (!memsize_str || kstrtoul(memsize_str, 0, &memsize))
- memsize = 0x04000000;
- add_memory_region(0, memsize, BOOT_MEM_RAM);
-}
-
void prom_putchar(char c)
{
alchemy_uart_putchar(AU1000_UART0_PHYS_ADDR, c);
diff --git a/arch/mips/alchemy/board-mtx1.c b/arch/mips/alchemy/board-mtx1.c
index 23093535399f..68ea57511629 100644
--- a/arch/mips/alchemy/board-mtx1.c
+++ b/arch/mips/alchemy/board-mtx1.c
@@ -30,23 +30,6 @@ const char *get_system_type(void)
return "MTX-1";
}
-void __init prom_init(void)
-{
- unsigned char *memsize_str;
- unsigned long memsize;
-
- prom_argc = fw_arg0;
- prom_argv = (char **)fw_arg1;
- prom_envp = (char **)fw_arg2;
-
- prom_init_cmdline();
-
- memsize_str = prom_getenv("memsize");
- if (!memsize_str || kstrtoul(memsize_str, 0, &memsize))
- memsize = 0x04000000;
- add_memory_region(0, memsize, BOOT_MEM_RAM);
-}
-
void prom_putchar(char c)
{
alchemy_uart_putchar(AU1000_UART0_PHYS_ADDR, c);
diff --git a/arch/mips/alchemy/board-xxs1500.c b/arch/mips/alchemy/board-xxs1500.c
index c67dfe1f4997..b184baa4e56a 100644
--- a/arch/mips/alchemy/board-xxs1500.c
+++ b/arch/mips/alchemy/board-xxs1500.c
@@ -25,24 +25,6 @@ const char *get_system_type(void)
return "XXS1500";
}
-void __init prom_init(void)
-{
- unsigned char *memsize_str;
- unsigned long memsize;
-
- prom_argc = fw_arg0;
- prom_argv = (char **)fw_arg1;
- prom_envp = (char **)fw_arg2;
-
- prom_init_cmdline();
-
- memsize_str = prom_getenv("memsize");
- if (!memsize_str || kstrtoul(memsize_str, 0, &memsize))
- memsize = 0x04000000;
-
- add_memory_region(0, memsize, BOOT_MEM_RAM);
-}
-
void prom_putchar(char c)
{
alchemy_uart_putchar(AU1000_UART0_PHYS_ADDR, c);
diff --git a/arch/mips/alchemy/common/prom.c b/arch/mips/alchemy/common/prom.c
index af312b5e33f6..d910c0a64de9 100644
--- a/arch/mips/alchemy/common/prom.c
+++ b/arch/mips/alchemy/common/prom.c
@@ -34,6 +34,9 @@
*/
#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/memblock.h>
+#include <linux/sizes.h>
#include <linux/string.h>
#include <asm/bootinfo.h>
@@ -76,6 +79,24 @@ char *prom_getenv(char *envname)
return NULL;
}
+void __init prom_init(void)
+{
+ unsigned char *memsize_str;
+ unsigned long memsize;
+
+ prom_argc = (int)fw_arg0;
+ prom_argv = (char **)fw_arg1;
+ prom_envp = (char **)fw_arg2;
+
+ prom_init_cmdline();
+
+ memsize_str = prom_getenv("memsize");
+ if (!memsize_str || kstrtoul(memsize_str, 0, &memsize))
+ memsize = SZ_64M; /* minimum memsize is 64MB RAM */
+
+ memblock_add(0, memsize);
+}
+
static inline unsigned char str2hexnum(unsigned char c)
{
if (c >= '0' && c <= '9')
diff --git a/arch/mips/alchemy/devboards/db1300.c b/arch/mips/alchemy/devboards/db1300.c
index 8ac1f56ee57d..cd72eaa1168f 100644
--- a/arch/mips/alchemy/devboards/db1300.c
+++ b/arch/mips/alchemy/devboards/db1300.c
@@ -731,6 +731,7 @@ static struct platform_device db1300_lcd_dev = {
/**********************************************************************/
+#if IS_ENABLED(CONFIG_TOUCHSCREEN_WM97XX)
static void db1300_wm97xx_irqen(struct wm97xx *wm, int enable)
{
if (enable)
@@ -762,6 +763,12 @@ static int db1300_wm97xx_probe(struct platform_device *pdev)
return wm97xx_register_mach_ops(wm, &db1300_wm97xx_ops);
}
+#else
+static int db1300_wm97xx_probe(struct platform_device *pdev)
+{
+ return -ENODEV;
+}
+#endif
static struct platform_driver db1300_wm97xx_driver = {
.driver.name = "wm97xx-touch",
diff --git a/arch/mips/alchemy/devboards/platform.c b/arch/mips/alchemy/devboards/platform.c
index 8d4b65c3268a..754bdd2ca630 100644
--- a/arch/mips/alchemy/devboards/platform.c
+++ b/arch/mips/alchemy/devboards/platform.c
@@ -20,23 +20,6 @@
#include <prom.h>
-void __init prom_init(void)
-{
- unsigned char *memsize_str;
- unsigned long memsize;
-
- prom_argc = (int)fw_arg0;
- prom_argv = (char **)fw_arg1;
- prom_envp = (char **)fw_arg2;
-
- prom_init_cmdline();
- memsize_str = prom_getenv("memsize");
- if (!memsize_str || kstrtoul(memsize_str, 0, &memsize))
- memsize = 64 << 20; /* all devboards have at least 64MB RAM */
-
- add_memory_region(0, memsize, BOOT_MEM_RAM);
-}
-
void prom_putchar(char c)
{
if (alchemy_get_cputype() == ALCHEMY_CPU_AU1300)
diff --git a/arch/mips/ar7/memory.c b/arch/mips/ar7/memory.c
index ad6efb36ebfe..787716c5e946 100644
--- a/arch/mips/ar7/memory.c
+++ b/arch/mips/ar7/memory.c
@@ -47,7 +47,7 @@ void __init prom_meminit(void)
unsigned long pages;
pages = memsize() >> PAGE_SHIFT;
- add_memory_region(PHYS_OFFSET, pages << PAGE_SHIFT, BOOT_MEM_RAM);
+ memblock_add(PHYS_OFFSET, pages << PAGE_SHIFT);
}
void __init prom_free_prom_memory(void)
diff --git a/arch/mips/ath25/ar2315.c b/arch/mips/ath25/ar2315.c
index e7b53e3960c8..9dbed7b5ea76 100644
--- a/arch/mips/ath25/ar2315.c
+++ b/arch/mips/ath25/ar2315.c
@@ -19,6 +19,7 @@
#include <linux/bitops.h>
#include <linux/irqdomain.h>
#include <linux/interrupt.h>
+#include <linux/memblock.h>
#include <linux/platform_device.h>
#include <linux/reboot.h>
#include <asm/bootinfo.h>
@@ -266,7 +267,7 @@ void __init ar2315_plat_mem_setup(void)
memsize <<= 1 + ATH25_REG_MS(memcfg, AR2315_MEM_CFG_COL_WIDTH);
memsize <<= 1 + ATH25_REG_MS(memcfg, AR2315_MEM_CFG_ROW_WIDTH);
memsize <<= 3;
- add_memory_region(0, memsize, BOOT_MEM_RAM);
+ memblock_add(0, memsize);
iounmap(sdram_base);
ar2315_rst_base = ioremap(AR2315_RST_BASE, AR2315_RST_SIZE);
diff --git a/arch/mips/ath25/ar5312.c b/arch/mips/ath25/ar5312.c
index 42bf2afb4765..23c879f4b734 100644
--- a/arch/mips/ath25/ar5312.c
+++ b/arch/mips/ath25/ar5312.c
@@ -19,6 +19,7 @@
#include <linux/bitops.h>
#include <linux/irqdomain.h>
#include <linux/interrupt.h>
+#include <linux/memblock.h>
#include <linux/platform_device.h>
#include <linux/mtd/physmap.h>
#include <linux/reboot.h>
@@ -363,7 +364,7 @@ void __init ar5312_plat_mem_setup(void)
memsize = (bank0_ac ? (1 << (bank0_ac + 1)) : 0) +
(bank1_ac ? (1 << (bank1_ac + 1)) : 0);
memsize <<= 20;
- add_memory_region(0, memsize, BOOT_MEM_RAM);
+ memblock_add(0, memsize);
iounmap(sdram_base);
ar5312_rst_base = ioremap(AR5312_RST_BASE, AR5312_RST_SIZE);
diff --git a/arch/mips/bcm47xx/prom.c b/arch/mips/bcm47xx/prom.c
index 135a5407f015..3e2a8166377f 100644
--- a/arch/mips/bcm47xx/prom.c
+++ b/arch/mips/bcm47xx/prom.c
@@ -27,6 +27,7 @@
#include <linux/init.h>
#include <linux/types.h>
#include <linux/kernel.h>
+#include <linux/memblock.h>
#include <linux/spinlock.h>
#include <linux/ssb/ssb_driver_chipcommon.h>
#include <linux/ssb/ssb_regs.h>
@@ -97,7 +98,7 @@ static __init void prom_init_mem(void)
*/
if (c->cputype == CPU_74K && (mem == (128 << 20)))
mem -= 0x1000;
- add_memory_region(0, mem, BOOT_MEM_RAM);
+ memblock_add(0, mem);
}
/*
diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c
index 01427bde2397..94bf839576c1 100644
--- a/arch/mips/bcm47xx/setup.c
+++ b/arch/mips/bcm47xx/setup.c
@@ -141,7 +141,7 @@ static void __init bcm47xx_register_bcma(void)
/*
* Memory setup is done in the early part of MIPS's arch_mem_init. It's supposed
- * to detect memory and record it with add_memory_region.
+ * to detect memory and record it with memblock_add.
* Any extra initializaion performed here must not use kmalloc or bootmem.
*/
void __init plat_mem_setup(void)
diff --git a/arch/mips/bcm63xx/boards/board_bcm963xx.c b/arch/mips/bcm63xx/boards/board_bcm963xx.c
index 230bf27c1fb8..01aff80a5967 100644
--- a/arch/mips/bcm63xx/boards/board_bcm963xx.c
+++ b/arch/mips/bcm63xx/boards/board_bcm963xx.c
@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
* Copyright (C) 2008 Maxime Bizon <mbizon@freebox.fr>
* Copyright (C) 2008 Florian Fainelli <florian@openwrt.org>
*/
@@ -32,7 +29,6 @@
#include <uapi/linux/bcm933xx_hcs.h>
-
#define HCS_OFFSET_128K 0x20000
static struct board_info board;
@@ -42,30 +38,28 @@ static struct board_info board;
*/
#ifdef CONFIG_BCM63XX_CPU_3368
static struct board_info __initdata board_cvg834g = {
- .name = "CVG834G_E15R3921",
- .expected_cpu_id = 0x3368,
-
- .has_uart0 = 1,
- .has_uart1 = 1,
+ .name = "CVG834G_E15R3921",
+ .expected_cpu_id = 0x3368,
- .has_enet0 = 1,
- .has_pci = 1,
+ .ephy_reset_gpio = 36,
+ .ephy_reset_gpio_flags = GPIOF_INIT_HIGH,
+ .has_pci = 1,
+ .has_uart0 = 1,
+ .has_uart1 = 1,
+ .has_enet0 = 1,
.enet0 = {
- .has_phy = 1,
- .use_internal_phy = 1,
+ .has_phy = 1,
+ .use_internal_phy = 1,
},
.leds = {
{
- .name = "CVG834G:green:power",
- .gpio = 37,
+ .name = "CVG834G:green:power",
+ .gpio = 37,
.default_trigger= "default-on",
},
},
-
- .ephy_reset_gpio = 36,
- .ephy_reset_gpio_flags = GPIOF_INIT_HIGH,
};
#endif /* CONFIG_BCM63XX_CPU_3368 */
@@ -74,44 +68,44 @@ static struct board_info __initdata board_cvg834g = {
*/
#ifdef CONFIG_BCM63XX_CPU_6328
static struct board_info __initdata board_96328avng = {
- .name = "96328avng",
- .expected_cpu_id = 0x6328,
+ .name = "96328avng",
+ .expected_cpu_id = 0x6328,
- .has_uart0 = 1,
- .has_pci = 1,
- .has_usbd = 0,
+ .has_pci = 1,
+ .has_uart0 = 1,
+ .has_usbd = 0,
.usbd = {
- .use_fullspeed = 0,
- .port_no = 0,
+ .use_fullspeed = 0,
+ .port_no = 0,
},
.leds = {
{
- .name = "96328avng::ppp-fail",
- .gpio = 2,
- .active_low = 1,
+ .name = "96328avng::ppp-fail",
+ .gpio = 2,
+ .active_low = 1,
},
{
- .name = "96328avng::power",
- .gpio = 4,
- .active_low = 1,
+ .name = "96328avng::power",
+ .gpio = 4,
+ .active_low = 1,
.default_trigger = "default-on",
},
{
- .name = "96328avng::power-fail",
- .gpio = 8,
- .active_low = 1,
+ .name = "96328avng::power-fail",
+ .gpio = 8,
+ .active_low = 1,
},
{
- .name = "96328avng::wps",
- .gpio = 9,
- .active_low = 1,
+ .name = "96328avng::wps",
+ .gpio = 9,
+ .active_low = 1,
},
{
- .name = "96328avng::ppp",
- .gpio = 11,
- .active_low = 1,
+ .name = "96328avng::ppp",
+ .gpio = 11,
+ .active_low = 1,
},
},
};
@@ -122,85 +116,86 @@ static struct board_info __initdata board_96328avng = {
*/
#ifdef CONFIG_BCM63XX_CPU_6338
static struct board_info __initdata board_96338gw = {
- .name = "96338GW",
- .expected_cpu_id = 0x6338,
+ .name = "96338GW",
+ .expected_cpu_id = 0x6338,
- .has_uart0 = 1,
- .has_enet0 = 1,
+ .has_ohci0 = 1,
+ .has_uart0 = 1,
+
+ .has_enet0 = 1,
.enet0 = {
- .force_speed_100 = 1,
- .force_duplex_full = 1,
+ .force_speed_100 = 1,
+ .force_duplex_full = 1,
},
- .has_ohci0 = 1,
-
.leds = {
{
- .name = "adsl",
- .gpio = 3,
- .active_low = 1,
+ .name = "adsl",
+ .gpio = 3,
+ .active_low = 1,
},
{
- .name = "ses",
- .gpio = 5,
- .active_low = 1,
+ .name = "ses",
+ .gpio = 5,
+ .active_low = 1,
},
{
- .name = "ppp-fail",
- .gpio = 4,
- .active_low = 1,
+ .name = "ppp-fail",
+ .gpio = 4,
+ .active_low = 1,
},
{
- .name = "power",
- .gpio = 0,
- .active_low = 1,
+ .name = "power",
+ .gpio = 0,
+ .active_low = 1,
.default_trigger = "default-on",
},
{
- .name = "stop",
- .gpio = 1,
- .active_low = 1,
+ .name = "stop",
+ .gpio = 1,
+ .active_low = 1,
}
},
};
static struct board_info __initdata board_96338w = {
- .name = "96338W",
- .expected_cpu_id = 0x6338,
+ .name = "96338W",
+ .expected_cpu_id = 0x6338,
+
+ .has_uart0 = 1,
- .has_uart0 = 1,
- .has_enet0 = 1,
+ .has_enet0 = 1,
.enet0 = {
- .force_speed_100 = 1,
- .force_duplex_full = 1,
+ .force_speed_100 = 1,
+ .force_duplex_full = 1,
},
.leds = {
{
- .name = "adsl",
- .gpio = 3,
- .active_low = 1,
+ .name = "adsl",
+ .gpio = 3,
+ .active_low = 1,
},
{
- .name = "ses",
- .gpio = 5,
- .active_low = 1,
+ .name = "ses",
+ .gpio = 5,
+ .active_low = 1,
},
{
- .name = "ppp-fail",
- .gpio = 4,
- .active_low = 1,
+ .name = "ppp-fail",
+ .gpio = 4,
+ .active_low = 1,
},
{
- .name = "power",
- .gpio = 0,
- .active_low = 1,
+ .name = "power",
+ .gpio = 0,
+ .active_low = 1,
.default_trigger = "default-on",
},
{
- .name = "stop",
- .gpio = 1,
- .active_low = 1,
+ .name = "stop",
+ .gpio = 1,
+ .active_low = 1,
},
},
};
@@ -211,10 +206,10 @@ static struct board_info __initdata board_96338w = {
*/
#ifdef CONFIG_BCM63XX_CPU_6345
static struct board_info __initdata board_96345gw2 = {
- .name = "96345GW2",
- .expected_cpu_id = 0x6345,
+ .name = "96345GW2",
+ .expected_cpu_id = 0x6345,
- .has_uart0 = 1,
+ .has_uart0 = 1,
};
#endif /* CONFIG_BCM63XX_CPU_6345 */
@@ -223,286 +218,282 @@ static struct board_info __initdata board_96345gw2 = {
*/
#ifdef CONFIG_BCM63XX_CPU_6348
static struct board_info __initdata board_96348r = {
- .name = "96348R",
- .expected_cpu_id = 0x6348,
+ .name = "96348R",
+ .expected_cpu_id = 0x6348,
- .has_uart0 = 1,
- .has_enet0 = 1,
- .has_pci = 1,
+ .has_pci = 1,
+ .has_uart0 = 1,
+ .has_enet0 = 1,
.enet0 = {
- .has_phy = 1,
- .use_internal_phy = 1,
+ .has_phy = 1,
+ .use_internal_phy = 1,
},
.leds = {
{
- .name = "adsl-fail",
- .gpio = 2,
- .active_low = 1,
+ .name = "adsl-fail",
+ .gpio = 2,
+ .active_low = 1,
},
{
- .name = "ppp",
- .gpio = 3,
- .active_low = 1,
+ .name = "ppp",
+ .gpio = 3,
+ .active_low = 1,
},
{
- .name = "ppp-fail",
- .gpio = 4,
- .active_low = 1,
+ .name = "ppp-fail",
+ .gpio = 4,
+ .active_low = 1,
},
{
- .name = "power",
- .gpio = 0,
- .active_low = 1,
+ .name = "power",
+ .gpio = 0,
+ .active_low = 1,
.default_trigger = "default-on",
},
{
- .name = "stop",
- .gpio = 1,
- .active_low = 1,
+ .name = "stop",
+ .gpio = 1,
+ .active_low = 1,
},
},
};
static struct board_info __initdata board_96348gw_10 = {
- .name = "96348GW-10",
- .expected_cpu_id = 0x6348,
+ .name = "96348GW-10",
+ .expected_cpu_id = 0x6348,
- .has_uart0 = 1,
- .has_enet0 = 1,
- .has_enet1 = 1,
- .has_pci = 1,
+ .has_ohci0 = 1,
+ .has_pccard = 1,
+ .has_pci = 1,
+ .has_uart0 = 1,
+ .has_enet0 = 1,
.enet0 = {
- .has_phy = 1,
- .use_internal_phy = 1,
+ .has_phy = 1,
+ .use_internal_phy = 1,
},
+
+ .has_enet1 = 1,
.enet1 = {
- .force_speed_100 = 1,
- .force_duplex_full = 1,
+ .force_speed_100 = 1,
+ .force_duplex_full = 1,
},
- .has_ohci0 = 1,
- .has_pccard = 1,
- .has_ehci0 = 1,
-
.leds = {
{
- .name = "adsl-fail",
- .gpio = 2,
- .active_low = 1,
+ .name = "adsl-fail",
+ .gpio = 2,
+ .active_low = 1,
},
{
- .name = "ppp",
- .gpio = 3,
- .active_low = 1,
+ .name = "ppp",
+ .gpio = 3,
+ .active_low = 1,
},
{
- .name = "ppp-fail",
- .gpio = 4,
- .active_low = 1,
+ .name = "ppp-fail",
+ .gpio = 4,
+ .active_low = 1,
},
{
- .name = "power",
- .gpio = 0,
- .active_low = 1,
+ .name = "power",
+ .gpio = 0,
+ .active_low = 1,
.default_trigger = "default-on",
},
{
- .name = "stop",
- .gpio = 1,
- .active_low = 1,
+ .name = "stop",
+ .gpio = 1,
+ .active_low = 1,
},
},
};
static struct board_info __initdata board_96348gw_11 = {
- .name = "96348GW-11",
- .expected_cpu_id = 0x6348,
+ .name = "96348GW-11",
+ .expected_cpu_id = 0x6348,
- .has_uart0 = 1,
- .has_enet0 = 1,
- .has_enet1 = 1,
- .has_pci = 1,
+ .has_ohci0 = 1,
+ .has_pccard = 1,
+ .has_pci = 1,
+ .has_uart0 = 1,
+ .has_enet0 = 1,
.enet0 = {
- .has_phy = 1,
- .use_internal_phy = 1,
+ .has_phy = 1,
+ .use_internal_phy = 1,
},
+ .has_enet1 = 1,
.enet1 = {
- .force_speed_100 = 1,
- .force_duplex_full = 1,
+ .force_speed_100 = 1,
+ .force_duplex_full = 1,
},
-
- .has_ohci0 = 1,
- .has_pccard = 1,
- .has_ehci0 = 1,
-
.leds = {
{
- .name = "adsl-fail",
- .gpio = 2,
- .active_low = 1,
+ .name = "adsl-fail",
+ .gpio = 2,
+ .active_low = 1,
},
{
- .name = "ppp",
- .gpio = 3,
- .active_low = 1,
+ .name = "ppp",
+ .gpio = 3,
+ .active_low = 1,
},
{
- .name = "ppp-fail",
- .gpio = 4,
- .active_low = 1,
+ .name = "ppp-fail",
+ .gpio = 4,
+ .active_low = 1,
},
{
- .name = "power",
- .gpio = 0,
- .active_low = 1,
+ .name = "power",
+ .gpio = 0,
+ .active_low = 1,
.default_trigger = "default-on",
},
{
- .name = "stop",
- .gpio = 1,
- .active_low = 1,
+ .name = "stop",
+ .gpio = 1,
+ .active_low = 1,
},
},
};
static struct board_info __initdata board_96348gw = {
- .name = "96348GW",
- .expected_cpu_id = 0x6348,
+ .name = "96348GW",
+ .expected_cpu_id = 0x6348,
- .has_uart0 = 1,
- .has_enet0 = 1,
- .has_enet1 = 1,
- .has_pci = 1,
+ .has_ohci0 = 1,
+ .has_pci = 1,
+ .has_uart0 = 1,
+ .has_enet0 = 1,
.enet0 = {
- .has_phy = 1,
- .use_internal_phy = 1,
+ .has_phy = 1,
+ .use_internal_phy = 1,
},
+
+ .has_enet1 = 1,
.enet1 = {
- .force_speed_100 = 1,
- .force_duplex_full = 1,
+ .force_speed_100 = 1,
+ .force_duplex_full = 1,
},
- .has_ohci0 = 1,
-
.leds = {
{
- .name = "adsl-fail",
- .gpio = 2,
- .active_low = 1,
+ .name = "adsl-fail",
+ .gpio = 2,
+ .active_low = 1,
},
{
- .name = "ppp",
- .gpio = 3,
- .active_low = 1,
+ .name = "ppp",
+ .gpio = 3,
+ .active_low = 1,
},
{
- .name = "ppp-fail",
- .gpio = 4,
- .active_low = 1,
+ .name = "ppp-fail",
+ .gpio = 4,
+ .active_low = 1,
},
{
- .name = "power",
- .gpio = 0,
- .active_low = 1,
+ .name = "power",
+ .gpio = 0,
+ .active_low = 1,
.default_trigger = "default-on",
},
{
- .name = "stop",
- .gpio = 1,
- .active_low = 1,
+ .name = "stop",
+ .gpio = 1,
+ .active_low = 1,
},
},
};
static struct board_info __initdata board_FAST2404 = {
- .name = "F@ST2404",
- .expected_cpu_id = 0x6348,
+ .name = "F@ST2404",
+ .expected_cpu_id = 0x6348,
- .has_uart0 = 1,
- .has_enet0 = 1,
- .has_enet1 = 1,
- .has_pci = 1,
+ .has_ohci0 = 1,
+ .has_pccard = 1,
+ .has_pci = 1,
+ .has_uart0 = 1,
+ .has_enet0 = 1,
.enet0 = {
- .has_phy = 1,
- .use_internal_phy = 1,
+ .has_phy = 1,
+ .use_internal_phy = 1,
},
+ .has_enet1 = 1,
.enet1 = {
- .force_speed_100 = 1,
- .force_duplex_full = 1,
+ .force_speed_100 = 1,
+ .force_duplex_full = 1,
},
-
- .has_ohci0 = 1,
- .has_pccard = 1,
- .has_ehci0 = 1,
};
static struct board_info __initdata board_rta1025w_16 = {
- .name = "RTA1025W_16",
- .expected_cpu_id = 0x6348,
+ .name = "RTA1025W_16",
+ .expected_cpu_id = 0x6348,
- .has_enet0 = 1,
- .has_enet1 = 1,
- .has_pci = 1,
+ .has_pci = 1,
+ .has_enet0 = 1,
.enet0 = {
- .has_phy = 1,
- .use_internal_phy = 1,
+ .has_phy = 1,
+ .use_internal_phy = 1,
},
+
+ .has_enet1 = 1,
.enet1 = {
- .force_speed_100 = 1,
- .force_duplex_full = 1,
+ .force_speed_100 = 1,
+ .force_duplex_full = 1,
},
};
static struct board_info __initdata board_DV201AMR = {
- .name = "DV201AMR",
- .expected_cpu_id = 0x6348,
+ .name = "DV201AMR",
+ .expected_cpu_id = 0x6348,
- .has_uart0 = 1,
- .has_pci = 1,
- .has_ohci0 = 1,
+ .has_ohci0 = 1,
+ .has_pci = 1,
+ .has_uart0 = 1,
- .has_enet0 = 1,
- .has_enet1 = 1,
+ .has_enet0 = 1,
.enet0 = {
- .has_phy = 1,
- .use_internal_phy = 1,
+ .has_phy = 1,
+ .use_internal_phy = 1,
},
+
+ .has_enet1 = 1,
.enet1 = {
- .force_speed_100 = 1,
- .force_duplex_full = 1,
+ .force_speed_100 = 1,
+ .force_duplex_full = 1,
},
};
static struct board_info __initdata board_96348gw_a = {
- .name = "96348GW-A",
- .expected_cpu_id = 0x6348,
+ .name = "96348GW-A",
+ .expected_cpu_id = 0x6348,
- .has_uart0 = 1,
- .has_enet0 = 1,
- .has_enet1 = 1,
- .has_pci = 1,
+ .has_ohci0 = 1,
+ .has_pci = 1,
+ .has_uart0 = 1,
+ .has_enet0 = 1,
.enet0 = {
- .has_phy = 1,
- .use_internal_phy = 1,
+ .has_phy = 1,
+ .use_internal_phy = 1,
},
+
+ .has_enet1 = 1,
.enet1 = {
- .force_speed_100 = 1,
- .force_duplex_full = 1,
+ .force_speed_100 = 1,
+ .force_duplex_full = 1,
},
-
- .has_ohci0 = 1,
};
#endif /* CONFIG_BCM63XX_CPU_6348 */
@@ -511,146 +502,142 @@ static struct board_info __initdata board_96348gw_a = {
*/
#ifdef CONFIG_BCM63XX_CPU_6358
static struct board_info __initdata board_96358vw = {
- .name = "96358VW",
- .expected_cpu_id = 0x6358,
+ .name = "96358VW",
+ .expected_cpu_id = 0x6358,
- .has_uart0 = 1,
- .has_enet0 = 1,
- .has_enet1 = 1,
- .has_pci = 1,
+ .has_ehci0 = 1,
+ .has_ohci0 = 1,
+ .has_pccard = 1,
+ .has_pci = 1,
+ .has_uart0 = 1,
+ .has_enet0 = 1,
.enet0 = {
- .has_phy = 1,
- .use_internal_phy = 1,
+ .has_phy = 1,
+ .use_internal_phy = 1,
},
+ .has_enet1 = 1,
.enet1 = {
- .force_speed_100 = 1,
- .force_duplex_full = 1,
+ .force_speed_100 = 1,
+ .force_duplex_full = 1,
},
- .has_ohci0 = 1,
- .has_pccard = 1,
- .has_ehci0 = 1,
-
.leds = {
{
- .name = "adsl-fail",
- .gpio = 15,
- .active_low = 1,
+ .name = "adsl-fail",
+ .gpio = 15,
+ .active_low = 1,
},
{
- .name = "ppp",
- .gpio = 22,
- .active_low = 1,
+ .name = "ppp",
+ .gpio = 22,
+ .active_low = 1,
},
{
- .name = "ppp-fail",
- .gpio = 23,
- .active_low = 1,
+ .name = "ppp-fail",
+ .gpio = 23,
+ .active_low = 1,
},
{
- .name = "power",
- .gpio = 4,
+ .name = "power",
+ .gpio = 4,
.default_trigger = "default-on",
},
{
- .name = "stop",
- .gpio = 5,
+ .name = "stop",
+ .gpio = 5,
},
},
};
static struct board_info __initdata board_96358vw2 = {
- .name = "96358VW2",
- .expected_cpu_id = 0x6358,
+ .name = "96358VW2",
+ .expected_cpu_id = 0x6358,
- .has_uart0 = 1,
- .has_enet0 = 1,
- .has_enet1 = 1,
- .has_pci = 1,
+ .has_ehci0 = 1,
+ .has_ohci0 = 1,
+ .has_pccard = 1,
+ .has_pci = 1,
+ .has_uart0 = 1,
+ .has_enet0 = 1,
.enet0 = {
- .has_phy = 1,
- .use_internal_phy = 1,
+ .has_phy = 1,
+ .use_internal_phy = 1,
},
+ .has_enet1 = 1,
.enet1 = {
- .force_speed_100 = 1,
- .force_duplex_full = 1,
+ .force_speed_100 = 1,
+ .force_duplex_full = 1,
},
-
- .has_ohci0 = 1,
- .has_pccard = 1,
- .has_ehci0 = 1,
-
.leds = {
{
- .name = "adsl",
- .gpio = 22,
- .active_low = 1,
+ .name = "adsl",
+ .gpio = 22,
+ .active_low = 1,
},
{
- .name = "ppp-fail",
- .gpio = 23,
+ .name = "ppp-fail",
+ .gpio = 23,
},
{
- .name = "power",
- .gpio = 5,
- .active_low = 1,
+ .name = "power",
+ .gpio = 5,
+ .active_low = 1,
.default_trigger = "default-on",
},
{
- .name = "stop",
- .gpio = 4,
- .active_low = 1,
+ .name = "stop",
+ .gpio = 4,
+ .active_low = 1,
},
},
};
static struct board_info __initdata board_AGPFS0 = {
- .name = "AGPF-S0",
- .expected_cpu_id = 0x6358,
+ .name = "AGPF-S0",
+ .expected_cpu_id = 0x6358,
- .has_uart0 = 1,
- .has_enet0 = 1,
- .has_enet1 = 1,
- .has_pci = 1,
+ .has_ehci0 = 1,
+ .has_ohci0 = 1,
+ .has_pci = 1,
+ .has_uart0 = 1,
+ .has_enet0 = 1,
.enet0 = {
- .has_phy = 1,
- .use_internal_phy = 1,
+ .has_phy = 1,
+ .use_internal_phy = 1,
},
+ .has_enet1 = 1,
.enet1 = {
- .force_speed_100 = 1,
- .force_duplex_full = 1,
+ .force_speed_100 = 1,
+ .force_duplex_full = 1,
},
-
- .has_ohci0 = 1,
- .has_ehci0 = 1,
};
static struct board_info __initdata board_DWVS0 = {
- .name = "DWV-S0",
- .expected_cpu_id = 0x6358,
+ .name = "DWV-S0",
+ .expected_cpu_id = 0x6358,
- .has_enet0 = 1,
- .has_enet1 = 1,
- .has_pci = 1,
+ .has_ehci0 = 1,
+ .has_ohci0 = 1,
+ .has_pci = 1,
+ .has_enet0 = 1,
.enet0 = {
- .has_phy = 1,
- .use_internal_phy = 1,
+ .has_phy = 1,
+ .use_internal_phy = 1,
},
+ .has_enet1 = 1,
.enet1 = {
- .force_speed_100 = 1,
- .force_duplex_full = 1,
+ .force_speed_100 = 1,
+ .force_duplex_full = 1,
},
-
- .has_ohci0 = 1,
};
#endif /* CONFIG_BCM63XX_CPU_6358 */
diff --git a/arch/mips/bcm63xx/setup.c b/arch/mips/bcm63xx/setup.c
index e28ee9a7cc7e..d811e3e03f81 100644
--- a/arch/mips/bcm63xx/setup.c
+++ b/arch/mips/bcm63xx/setup.c
@@ -146,7 +146,7 @@ void __init plat_time_init(void)
void __init plat_mem_setup(void)
{
- add_memory_region(0, bcm63xx_get_memory_size(), BOOT_MEM_RAM);
+ memblock_add(0, bcm63xx_get_memory_size());
_machine_halt = bcm63xx_machine_halt;
_machine_restart = __bcm63xx_machine_reboot;
diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile
index 6e56caef69f0..d66511825fe1 100644
--- a/arch/mips/boot/compressed/Makefile
+++ b/arch/mips/boot/compressed/Makefile
@@ -22,7 +22,12 @@ KBUILD_CFLAGS := $(filter-out -pg, $(KBUILD_CFLAGS))
KBUILD_CFLAGS := $(filter-out -fstack-protector, $(KBUILD_CFLAGS))
-KBUILD_CFLAGS := $(KBUILD_CFLAGS) -D__KERNEL__ \
+# Disable lq/sq in zboot
+ifdef CONFIG_CPU_LOONGSON64
+KBUILD_CFLAGS := $(filter-out -march=loongson3a, $(KBUILD_CFLAGS)) -march=mips64r2
+endif
+
+KBUILD_CFLAGS := $(KBUILD_CFLAGS) -D__KERNEL__ -D__DISABLE_EXPORTS \
-DBOOT_HEAP_SIZE=$(BOOT_HEAP_SIZE) -D"VMLINUX_LOAD_ADDRESS_ULL=$(VMLINUX_LOAD_ADDRESS)ull"
KBUILD_AFLAGS := $(KBUILD_AFLAGS) -D__ASSEMBLY__ \
@@ -70,6 +75,7 @@ tool_$(CONFIG_KERNEL_LZ4) = lz4
tool_$(CONFIG_KERNEL_LZMA) = lzma
tool_$(CONFIG_KERNEL_LZO) = lzo
tool_$(CONFIG_KERNEL_XZ) = xzkern
+tool_$(CONFIG_KERNEL_ZSTD) = zstd22
targets += vmlinux.bin.z
$(obj)/vmlinux.bin.z: $(obj)/vmlinux.bin FORCE
diff --git a/arch/mips/boot/compressed/decompress.c b/arch/mips/boot/compressed/decompress.c
index 88f5d637b1c4..c61c641674e6 100644
--- a/arch/mips/boot/compressed/decompress.c
+++ b/arch/mips/boot/compressed/decompress.c
@@ -72,6 +72,10 @@ void error(char *x)
#include "../../../../lib/decompress_unxz.c"
#endif
+#ifdef CONFIG_KERNEL_ZSTD
+#include "../../../../lib/decompress_unzstd.c"
+#endif
+
const unsigned long __stack_chk_guard = 0x000a0dff;
void __stack_chk_fail(void)
diff --git a/arch/mips/boot/compressed/string.c b/arch/mips/boot/compressed/string.c
index 43beecc3587c..0b593b709228 100644
--- a/arch/mips/boot/compressed/string.c
+++ b/arch/mips/boot/compressed/string.c
@@ -5,6 +5,7 @@
* Very small subset of simple string routines
*/
+#include <linux/compiler_attributes.h>
#include <linux/types.h>
void *memcpy(void *dest, const void *src, size_t n)
@@ -27,3 +28,19 @@ void *memset(void *s, int c, size_t n)
ss[i] = c;
return s;
}
+
+void * __weak memmove(void *dest, const void *src, size_t n)
+{
+ unsigned int i;
+ const char *s = src;
+ char *d = dest;
+
+ if ((uintptr_t)dest < (uintptr_t)src) {
+ for (i = 0; i < n; i++)
+ d[i] = s[i];
+ } else {
+ for (i = n; i > 0; i--)
+ d[i - 1] = s[i - 1];
+ }
+ return dest;
+}
diff --git a/arch/mips/boot/dts/ingenic/jz4725b.dtsi b/arch/mips/boot/dts/ingenic/jz4725b.dtsi
index a8fca560878d..a1f0b71c9223 100644
--- a/arch/mips/boot/dts/ingenic/jz4725b.dtsi
+++ b/arch/mips/boot/dts/ingenic/jz4725b.dtsi
@@ -7,6 +7,20 @@
#size-cells = <1>;
compatible = "ingenic,jz4725b";
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu0: cpu@0 {
+ device_type = "cpu";
+ compatible = "ingenic,xburst-mxu1.0";
+ reg = <0>;
+
+ clocks = <&cgu JZ4725B_CLK_CCLK>;
+ clock-names = "cpu";
+ };
+ };
+
cpuintc: interrupt-controller {
#address-cells = <0>;
#interrupt-cells = <1>;
diff --git a/arch/mips/boot/dts/ingenic/jz4740.dtsi b/arch/mips/boot/dts/ingenic/jz4740.dtsi
index 1520585c235c..eee523678ce5 100644
--- a/arch/mips/boot/dts/ingenic/jz4740.dtsi
+++ b/arch/mips/boot/dts/ingenic/jz4740.dtsi
@@ -7,6 +7,20 @@
#size-cells = <1>;
compatible = "ingenic,jz4740";
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu0: cpu@0 {
+ device_type = "cpu";
+ compatible = "ingenic,xburst-mxu1.0";
+ reg = <0>;
+
+ clocks = <&cgu JZ4740_CLK_CCLK>;
+ clock-names = "cpu";
+ };
+ };
+
cpuintc: interrupt-controller {
#address-cells = <0>;
#interrupt-cells = <1>;
diff --git a/arch/mips/boot/dts/ingenic/jz4770.dtsi b/arch/mips/boot/dts/ingenic/jz4770.dtsi
index fa11ac950499..018721a9eea9 100644
--- a/arch/mips/boot/dts/ingenic/jz4770.dtsi
+++ b/arch/mips/boot/dts/ingenic/jz4770.dtsi
@@ -1,5 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
-
#include <dt-bindings/clock/jz4770-cgu.h>
#include <dt-bindings/clock/ingenic,tcu.h>
@@ -8,6 +7,20 @@
#size-cells = <1>;
compatible = "ingenic,jz4770";
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu0: cpu@0 {
+ device_type = "cpu";
+ compatible = "ingenic,xburst-fpu1.0-mxu1.1";
+ reg = <0>;
+
+ clocks = <&cgu JZ4770_CLK_CCLK>;
+ clock-names = "cpu";
+ };
+ };
+
cpuintc: interrupt-controller {
#address-cells = <0>;
#interrupt-cells = <1>;
diff --git a/arch/mips/boot/dts/ingenic/jz4780.dtsi b/arch/mips/boot/dts/ingenic/jz4780.dtsi
index b7f409a7cf5d..dfb5a7e1bb21 100644
--- a/arch/mips/boot/dts/ingenic/jz4780.dtsi
+++ b/arch/mips/boot/dts/ingenic/jz4780.dtsi
@@ -8,6 +8,29 @@
#size-cells = <1>;
compatible = "ingenic,jz4780";
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu0: cpu@0 {
+ device_type = "cpu";
+ compatible = "ingenic,xburst-fpu1.0-mxu1.1";
+ reg = <0>;
+
+ clocks = <&cgu JZ4780_CLK_CPU>;
+ clock-names = "cpu";
+ };
+
+ cpu1: cpu@1 {
+ device_type = "cpu";
+ compatible = "ingenic,xburst-fpu1.0-mxu1.1";
+ reg = <1>;
+
+ clocks = <&cgu JZ4780_CLK_CORE1>;
+ clock-names = "cpu";
+ };
+ };
+
cpuintc: interrupt-controller {
#address-cells = <0>;
#interrupt-cells = <1>;
diff --git a/arch/mips/boot/dts/ingenic/qi_lb60.dts b/arch/mips/boot/dts/ingenic/qi_lb60.dts
index bf298268f1a1..ba0218971572 100644
--- a/arch/mips/boot/dts/ingenic/qi_lb60.dts
+++ b/arch/mips/boot/dts/ingenic/qi_lb60.dts
@@ -109,74 +109,73 @@
debounce-delay-ms = <10>;
wakeup-source;
- row-gpios = <&gpd 18 0 &gpd 19 0 &gpd 20 0 &gpd 21 0
- &gpd 22 0 &gpd 23 0 &gpd 24 0 &gpd 26 0>;
- col-gpios = <&gpc 10 0 &gpc 11 0 &gpc 12 0 &gpc 13 0
- &gpc 14 0 &gpc 15 0 &gpc 16 0 &gpc 17 0>;
+ row-gpios = <&gpd 18 0>, <&gpd 19 0>, <&gpd 20 0>, <&gpd 21 0>,
+ <&gpd 22 0>, <&gpd 23 0>, <&gpd 24 0>, <&gpd 26 0>;
+ col-gpios = <&gpc 10 0>, <&gpc 11 0>, <&gpc 12 0>, <&gpc 13 0>,
+ <&gpc 14 0>, <&gpc 15 0>, <&gpc 16 0>, <&gpc 17 0>;
gpio-activelow;
- linux,keymap = <
- MATRIX_KEY(0, 0, KEY_F1) /* S2 */
- MATRIX_KEY(0, 1, KEY_F2) /* S3 */
- MATRIX_KEY(0, 2, KEY_F3) /* S4 */
- MATRIX_KEY(0, 3, KEY_F4) /* S5 */
- MATRIX_KEY(0, 4, KEY_F5) /* S6 */
- MATRIX_KEY(0, 5, KEY_F6) /* S7 */
- MATRIX_KEY(0, 6, KEY_F7) /* S8 */
-
- MATRIX_KEY(1, 0, KEY_Q) /* S10 */
- MATRIX_KEY(1, 1, KEY_W) /* S11 */
- MATRIX_KEY(1, 2, KEY_E) /* S12 */
- MATRIX_KEY(1, 3, KEY_R) /* S13 */
- MATRIX_KEY(1, 4, KEY_T) /* S14 */
- MATRIX_KEY(1, 5, KEY_Y) /* S15 */
- MATRIX_KEY(1, 6, KEY_U) /* S16 */
- MATRIX_KEY(1, 7, KEY_I) /* S17 */
- MATRIX_KEY(2, 0, KEY_A) /* S18 */
- MATRIX_KEY(2, 1, KEY_S) /* S19 */
- MATRIX_KEY(2, 2, KEY_D) /* S20 */
- MATRIX_KEY(2, 3, KEY_F) /* S21 */
- MATRIX_KEY(2, 4, KEY_G) /* S22 */
- MATRIX_KEY(2, 5, KEY_H) /* S23 */
- MATRIX_KEY(2, 6, KEY_J) /* S24 */
- MATRIX_KEY(2, 7, KEY_K) /* S25 */
- MATRIX_KEY(3, 0, KEY_ESC) /* S26 */
- MATRIX_KEY(3, 1, KEY_Z) /* S27 */
- MATRIX_KEY(3, 2, KEY_X) /* S28 */
- MATRIX_KEY(3, 3, KEY_C) /* S29 */
- MATRIX_KEY(3, 4, KEY_V) /* S30 */
- MATRIX_KEY(3, 5, KEY_B) /* S31 */
- MATRIX_KEY(3, 6, KEY_N) /* S32 */
- MATRIX_KEY(3, 7, KEY_M) /* S33 */
- MATRIX_KEY(4, 0, KEY_TAB) /* S34 */
- MATRIX_KEY(4, 1, KEY_CAPSLOCK) /* S35 */
- MATRIX_KEY(4, 2, KEY_BACKSLASH) /* S36 */
- MATRIX_KEY(4, 3, KEY_APOSTROPHE) /* S37 */
- MATRIX_KEY(4, 4, KEY_COMMA) /* S38 */
- MATRIX_KEY(4, 5, KEY_DOT) /* S39 */
- MATRIX_KEY(4, 6, KEY_SLASH) /* S40 */
- MATRIX_KEY(4, 7, KEY_UP) /* S41 */
- MATRIX_KEY(5, 0, KEY_O) /* S42 */
- MATRIX_KEY(5, 1, KEY_L) /* S43 */
- MATRIX_KEY(5, 2, KEY_EQUAL) /* S44 */
- MATRIX_KEY(5, 3, KEY_QI_UPRED) /* S45 */
- MATRIX_KEY(5, 4, KEY_SPACE) /* S46 */
- MATRIX_KEY(5, 5, KEY_QI_QI) /* S47 */
- MATRIX_KEY(5, 6, KEY_RIGHTCTRL) /* S48 */
- MATRIX_KEY(5, 7, KEY_LEFT) /* S49 */
- MATRIX_KEY(6, 0, KEY_F8) /* S50 */
- MATRIX_KEY(6, 1, KEY_P) /* S51 */
- MATRIX_KEY(6, 2, KEY_BACKSPACE)/* S52 */
- MATRIX_KEY(6, 3, KEY_ENTER) /* S53 */
- MATRIX_KEY(6, 4, KEY_QI_VOLUP) /* S54 */
- MATRIX_KEY(6, 5, KEY_QI_VOLDOWN) /* S55 */
- MATRIX_KEY(6, 6, KEY_DOWN) /* S56 */
- MATRIX_KEY(6, 7, KEY_RIGHT) /* S57 */
-
- MATRIX_KEY(7, 0, KEY_LEFTSHIFT) /* S58 */
- MATRIX_KEY(7, 1, KEY_LEFTALT) /* S59 */
- MATRIX_KEY(7, 2, KEY_QI_FN) /* S60 */
- >;
+ linux,keymap =
+ <MATRIX_KEY(0, 0, KEY_F1)>, /* S2 */
+ <MATRIX_KEY(0, 1, KEY_F2)>, /* S3 */
+ <MATRIX_KEY(0, 2, KEY_F3)>, /* S4 */
+ <MATRIX_KEY(0, 3, KEY_F4)>, /* S5 */
+ <MATRIX_KEY(0, 4, KEY_F5)>, /* S6 */
+ <MATRIX_KEY(0, 5, KEY_F6)>, /* S7 */
+ <MATRIX_KEY(0, 6, KEY_F7)>, /* S8 */
+
+ <MATRIX_KEY(1, 0, KEY_Q)>, /* S10 */
+ <MATRIX_KEY(1, 1, KEY_W)>, /* S11 */
+ <MATRIX_KEY(1, 2, KEY_E)>, /* S12 */
+ <MATRIX_KEY(1, 3, KEY_R)>, /* S13 */
+ <MATRIX_KEY(1, 4, KEY_T)>, /* S14 */
+ <MATRIX_KEY(1, 5, KEY_Y)>, /* S15 */
+ <MATRIX_KEY(1, 6, KEY_U)>, /* S16 */
+ <MATRIX_KEY(1, 7, KEY_I)>, /* S17 */
+ <MATRIX_KEY(2, 0, KEY_A)>, /* S18 */
+ <MATRIX_KEY(2, 1, KEY_S)>, /* S19 */
+ <MATRIX_KEY(2, 2, KEY_D)>, /* S20 */
+ <MATRIX_KEY(2, 3, KEY_F)>, /* S21 */
+ <MATRIX_KEY(2, 4, KEY_G)>, /* S22 */
+ <MATRIX_KEY(2, 5, KEY_H)>, /* S23 */
+ <MATRIX_KEY(2, 6, KEY_J)>, /* S24 */
+ <MATRIX_KEY(2, 7, KEY_K)>, /* S25 */
+ <MATRIX_KEY(3, 0, KEY_ESC)>, /* S26 */
+ <MATRIX_KEY(3, 1, KEY_Z)>, /* S27 */
+ <MATRIX_KEY(3, 2, KEY_X)>, /* S28 */
+ <MATRIX_KEY(3, 3, KEY_C)>, /* S29 */
+ <MATRIX_KEY(3, 4, KEY_V)>, /* S30 */
+ <MATRIX_KEY(3, 5, KEY_B)>, /* S31 */
+ <MATRIX_KEY(3, 6, KEY_N)>, /* S32 */
+ <MATRIX_KEY(3, 7, KEY_M)>, /* S33 */
+ <MATRIX_KEY(4, 0, KEY_TAB)>, /* S34 */
+ <MATRIX_KEY(4, 1, KEY_CAPSLOCK)>, /* S35 */
+ <MATRIX_KEY(4, 2, KEY_BACKSLASH)>, /* S36 */
+ <MATRIX_KEY(4, 3, KEY_APOSTROPHE)>, /* S37 */
+ <MATRIX_KEY(4, 4, KEY_COMMA)>, /* S38 */
+ <MATRIX_KEY(4, 5, KEY_DOT)>, /* S39 */
+ <MATRIX_KEY(4, 6, KEY_SLASH)>, /* S40 */
+ <MATRIX_KEY(4, 7, KEY_UP)>, /* S41 */
+ <MATRIX_KEY(5, 0, KEY_O)>, /* S42 */
+ <MATRIX_KEY(5, 1, KEY_L)>, /* S43 */
+ <MATRIX_KEY(5, 2, KEY_EQUAL)>, /* S44 */
+ <MATRIX_KEY(5, 3, KEY_QI_UPRED)>, /* S45 */
+ <MATRIX_KEY(5, 4, KEY_SPACE)>, /* S46 */
+ <MATRIX_KEY(5, 5, KEY_QI_QI)>, /* S47 */
+ <MATRIX_KEY(5, 6, KEY_RIGHTCTRL)>, /* S48 */
+ <MATRIX_KEY(5, 7, KEY_LEFT)>, /* S49 */
+ <MATRIX_KEY(6, 0, KEY_F8)>, /* S50 */
+ <MATRIX_KEY(6, 1, KEY_P)>, /* S51 */
+ <MATRIX_KEY(6, 2, KEY_BACKSPACE)>,/* S52 */
+ <MATRIX_KEY(6, 3, KEY_ENTER)>, /* S53 */
+ <MATRIX_KEY(6, 4, KEY_QI_VOLUP)>, /* S54 */
+ <MATRIX_KEY(6, 5, KEY_QI_VOLDOWN)>, /* S55 */
+ <MATRIX_KEY(6, 6, KEY_DOWN)>, /* S56 */
+ <MATRIX_KEY(6, 7, KEY_RIGHT)>, /* S57 */
+
+ <MATRIX_KEY(7, 0, KEY_LEFTSHIFT)>, /* S58 */
+ <MATRIX_KEY(7, 1, KEY_LEFTALT)>, /* S59 */
+ <MATRIX_KEY(7, 2, KEY_QI_FN)>; /* S60 */
};
spi {
@@ -261,12 +260,12 @@
#address-cells = <1>;
#size-cells = <0>;
- ingenic,bch-controller = <&ecc>;
+ ecc-engine = <&ecc>;
pinctrl-names = "default";
pinctrl-0 = <&pins_nemc>;
- rb-gpios = <&gpc 30 GPIO_ACTIVE_LOW>;
+ rb-gpios = <&gpc 30 GPIO_ACTIVE_HIGH>;
nand@1 {
reg = <1>;
@@ -324,7 +323,7 @@
pins_nemc: nemc {
function = "nand";
- groups = "nand-cs1";
+ groups = "nand-fre-fwe", "nand-cs1";
};
pins_uart0: uart0 {
diff --git a/arch/mips/boot/dts/ingenic/x1000.dtsi b/arch/mips/boot/dts/ingenic/x1000.dtsi
index 9de9e7c2d523..1f1f896dd1f7 100644
--- a/arch/mips/boot/dts/ingenic/x1000.dtsi
+++ b/arch/mips/boot/dts/ingenic/x1000.dtsi
@@ -8,6 +8,20 @@
#size-cells = <1>;
compatible = "ingenic,x1000", "ingenic,x1000e";
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu0: cpu@0 {
+ device_type = "cpu";
+ compatible = "ingenic,xburst-fpu1.0-mxu1.1";
+ reg = <0>;
+
+ clocks = <&cgu X1000_CLK_CPU>;
+ clock-names = "cpu";
+ };
+ };
+
cpuintc: interrupt-controller {
#address-cells = <0>;
#interrupt-cells = <1>;
diff --git a/arch/mips/boot/dts/ingenic/x1830.dtsi b/arch/mips/boot/dts/ingenic/x1830.dtsi
index eb1214481a33..b05dac3ae308 100644
--- a/arch/mips/boot/dts/ingenic/x1830.dtsi
+++ b/arch/mips/boot/dts/ingenic/x1830.dtsi
@@ -8,6 +8,20 @@
#size-cells = <1>;
compatible = "ingenic,x1830";
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu0: cpu@0 {
+ device_type = "cpu";
+ compatible = "ingenic,xburst-fpu2.0-mxu2.0";
+ reg = <0>;
+
+ clocks = <&cgu X1830_CLK_CPU>;
+ clock-names = "cpu";
+ };
+ };
+
cpuintc: interrupt-controller {
#address-cells = <0>;
#interrupt-cells = <1>;
diff --git a/arch/mips/boot/dts/loongson/ls7a-pch.dtsi b/arch/mips/boot/dts/loongson/ls7a-pch.dtsi
index e574a062dfae..f99a7a11fded 100644
--- a/arch/mips/boot/dts/loongson/ls7a-pch.dtsi
+++ b/arch/mips/boot/dts/loongson/ls7a-pch.dtsi
@@ -19,6 +19,45 @@
#interrupt-cells = <2>;
};
+ ls7a_uart0: serial@10080000 {
+ compatible = "ns16550a";
+ reg = <0 0x10080000 0 0x100>;
+ clock-frequency = <50000000>;
+ interrupt-parent = <&pic>;
+ interrupts = <8 IRQ_TYPE_LEVEL_HIGH>;
+ no-loopback-test;
+ };
+
+ ls7a_uart1: serial@10080100 {
+ status = "disabled";
+ compatible = "ns16550a";
+ reg = <0 0x10080100 0 0x100>;
+ clock-frequency = <50000000>;
+ interrupt-parent = <&pic>;
+ interrupts = <8 IRQ_TYPE_LEVEL_HIGH>;
+ no-loopback-test;
+ };
+
+ ls7a_uart2: serial@10080200 {
+ status = "disabled";
+ compatible = "ns16550a";
+ reg = <0 0x10080200 0 0x100>;
+ clock-frequency = <50000000>;
+ interrupt-parent = <&pic>;
+ interrupts = <8 IRQ_TYPE_LEVEL_HIGH>;
+ no-loopback-test;
+ };
+
+ ls7a_uart3: serial@10080300 {
+ status = "disabled";
+ compatible = "ns16550a";
+ reg = <0 0x10080300 0 0x100>;
+ clock-frequency = <50000000>;
+ interrupt-parent = <&pic>;
+ interrupts = <8 IRQ_TYPE_LEVEL_HIGH>;
+ no-loopback-test;
+ };
+
pci@1a000000 {
compatible = "loongson,ls7a-pci";
device_type = "pci";
diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c
index 4f34d92b52f9..561389d3fadb 100644
--- a/arch/mips/cavium-octeon/setup.c
+++ b/arch/mips/cavium-octeon/setup.c
@@ -16,6 +16,7 @@
#include <linux/export.h>
#include <linux/interrupt.h>
#include <linux/io.h>
+#include <linux/memblock.h>
#include <linux/serial.h>
#include <linux/smp.h>
#include <linux/types.h>
@@ -930,7 +931,7 @@ static __init void memory_exclude_page(u64 addr, u64 *mem, u64 *size)
{
if (addr > *mem && addr < *mem + *size) {
u64 inc = addr - *mem;
- add_memory_region(*mem, inc, BOOT_MEM_RAM);
+ memblock_add(*mem, inc);
*mem += inc;
*size -= inc;
}
@@ -992,19 +993,18 @@ void __init plat_mem_setup(void)
/* Crashkernel ignores bootmem list. It relies on mem=X@Y option */
#ifdef CONFIG_CRASH_DUMP
- add_memory_region(reserve_low_mem, max_memory, BOOT_MEM_RAM);
+ memblock_add(reserve_low_mem, max_memory);
total += max_memory;
#else
#ifdef CONFIG_KEXEC
if (crashk_size > 0) {
- add_memory_region(crashk_base, crashk_size, BOOT_MEM_RAM);
+ memblock_add(crashk_base, crashk_size);
crashk_end = crashk_base + crashk_size;
}
#endif
/*
- * When allocating memory, we want incrementing addresses from
- * bootmem_alloc so the code in add_memory_region can merge
- * regions next to each other.
+ * When allocating memory, we want incrementing addresses,
+ * which is handled by memblock
*/
cvmx_bootmem_lock();
while (total < max_memory) {
@@ -1039,13 +1039,9 @@ void __init plat_mem_setup(void)
*/
if (memory < crashk_base && end > crashk_end) {
/* region is fully in */
- add_memory_region(memory,
- crashk_base - memory,
- BOOT_MEM_RAM);
+ memblock_add(memory, crashk_base - memory);
total += crashk_base - memory;
- add_memory_region(crashk_end,
- end - crashk_end,
- BOOT_MEM_RAM);
+ memblock_add(crashk_end, end - crashk_end);
total += end - crashk_end;
continue;
}
@@ -1073,7 +1069,7 @@ void __init plat_mem_setup(void)
*/
mem_alloc_size -= end - crashk_base;
#endif
- add_memory_region(memory, mem_alloc_size, BOOT_MEM_RAM);
+ memblock_add(memory, mem_alloc_size);
total += mem_alloc_size;
/* Recovering mem_alloc_size */
mem_alloc_size = 4 << 20;
@@ -1088,7 +1084,7 @@ void __init plat_mem_setup(void)
/* Adjust for physical offset. */
kernel_start &= ~0xffffffff80000000ULL;
- add_memory_region(kernel_start, kernel_size, BOOT_MEM_RAM);
+ memblock_add(kernel_start, kernel_size);
#endif /* CONFIG_CRASH_DUMP */
#ifdef CONFIG_CAVIUM_RESERVE32
@@ -1126,7 +1122,7 @@ EXPORT_SYMBOL(prom_putchar);
void __init prom_free_prom_memory(void)
{
- if (CAVIUM_OCTEON_DCACHE_PREFETCH_WAR) {
+ if (OCTEON_IS_MODEL(OCTEON_CN6XXX)) {
/* Check for presence of Core-14449 fix. */
u32 insn;
u32 *foo;
diff --git a/arch/mips/cobalt/setup.c b/arch/mips/cobalt/setup.c
index c136a18c7221..46581e686882 100644
--- a/arch/mips/cobalt/setup.c
+++ b/arch/mips/cobalt/setup.c
@@ -13,6 +13,7 @@
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/ioport.h>
+#include <linux/memblock.h>
#include <linux/pm.h>
#include <asm/bootinfo.h>
@@ -112,7 +113,7 @@ void __init prom_init(void)
strlcat(arcs_cmdline, " ", COMMAND_LINE_SIZE);
}
- add_memory_region(0x0, memsz, BOOT_MEM_RAM);
+ memblock_add(0, memsz);
setup_8250_early_printk_port(CKSEG1ADDR(0x1c800000), 0, 0);
}
diff --git a/arch/mips/configs/ci20_defconfig b/arch/mips/configs/ci20_defconfig
index 0a46199fdc3f..052c5ad0f2b1 100644
--- a/arch/mips/configs/ci20_defconfig
+++ b/arch/mips/configs/ci20_defconfig
@@ -22,7 +22,7 @@ CONFIG_EMBEDDED=y
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_SLAB=y
-CONFIG_MACH_INGENIC=y
+CONFIG_MACH_INGENIC_SOC=y
CONFIG_JZ4780_CI20=y
CONFIG_HIGHMEM=y
CONFIG_HZ_100=y
@@ -42,7 +42,7 @@ CONFIG_IP_PNP_DHCP=y
# CONFIG_IPV6 is not set
# CONFIG_WIRELESS is not set
CONFIG_DEVTMPFS=y
-# CONFIG_FW_LOADER is not set
+CONFIG_FW_LOADER=m
# CONFIG_ALLOW_DEV_COREDUMP is not set
CONFIG_MTD=y
CONFIG_MTD_RAW_NAND=y
diff --git a/arch/mips/configs/cu1000-neo_defconfig b/arch/mips/configs/cu1000-neo_defconfig
index e924c817f73d..55d0690a3ffe 100644
--- a/arch/mips/configs/cu1000-neo_defconfig
+++ b/arch/mips/configs/cu1000-neo_defconfig
@@ -1,5 +1,3 @@
-CONFIG_LOCALVERSION_AUTO=y
-CONFIG_KERNEL_GZIP=y
CONFIG_SYSVIPC=y
CONFIG_NO_HZ_IDLE=y
CONFIG_HIGH_RES_TIMERS=y
@@ -9,7 +7,6 @@ CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_CGROUPS=y
CONFIG_MEMCG=y
-CONFIG_MEMCG_KMEM=y
CONFIG_CGROUP_SCHED=y
CONFIG_CGROUP_FREEZER=y
CONFIG_CGROUP_DEVICE=y
@@ -22,7 +19,7 @@ CONFIG_EMBEDDED=y
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_SLAB=y
-CONFIG_MACH_INGENIC=y
+CONFIG_MACH_INGENIC_SOC=y
CONFIG_X1000_CU1000_NEO=y
CONFIG_HIGHMEM=y
CONFIG_HZ_100=y
@@ -31,7 +28,6 @@ CONFIG_HZ_100=y
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
# CONFIG_COMPACTION is not set
CONFIG_CMA=y
-CONFIG_CMA_AREAS=7
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
@@ -40,19 +36,16 @@ CONFIG_CFG80211=y
CONFIG_UEVENT_HELPER=y
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
-# CONFIG_FW_LOADER is not set
# CONFIG_ALLOW_DEV_COREDUMP is not set
CONFIG_NETDEVICES=y
CONFIG_STMMAC_ETH=y
CONFIG_SMSC_PHY=y
CONFIG_BRCMFMAC=y
-# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
CONFIG_VT_HW_CONSOLE_BINDING=y
CONFIG_LEGACY_PTY_COUNT=2
-CONFIG_SERIAL_EARLYCON=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_8250_NR_UARTS=3
@@ -66,8 +59,6 @@ CONFIG_GPIO_SYSFS=y
CONFIG_SENSORS_ADS7828=y
CONFIG_WATCHDOG=y
CONFIG_JZ4740_WDT=y
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
# CONFIG_VGA_CONSOLE is not set
# CONFIG_HID is not set
# CONFIG_USB_SUPPORT is not set
@@ -82,8 +73,6 @@ CONFIG_RTC_DRV_JZ4740=y
CONFIG_DMADEVICES=y
CONFIG_DMA_JZ4780=y
# CONFIG_IOMMU_SUPPORT is not set
-CONFIG_NVMEM=y
-CONFIG_NVMEM_SYSFS=y
CONFIG_EXT4_FS=y
# CONFIG_DNOTIFY is not set
CONFIG_AUTOFS_FS=y
@@ -108,8 +97,8 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=15
CONFIG_MESSAGE_LOGLEVEL_DEFAULT=7
CONFIG_DEBUG_INFO=y
CONFIG_STRIP_ASM_SYMS=y
-CONFIG_DEBUG_FS=y
CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_FS=y
CONFIG_PANIC_ON_OOPS=y
CONFIG_PANIC_TIMEOUT=10
# CONFIG_SCHED_DEBUG is not set
diff --git a/arch/mips/configs/cu1830-neo_defconfig b/arch/mips/configs/cu1830-neo_defconfig
index cbfb62900273..e7064851a47a 100644
--- a/arch/mips/configs/cu1830-neo_defconfig
+++ b/arch/mips/configs/cu1830-neo_defconfig
@@ -1,5 +1,3 @@
-CONFIG_LOCALVERSION_AUTO=y
-CONFIG_KERNEL_GZIP=y
CONFIG_SYSVIPC=y
CONFIG_NO_HZ_IDLE=y
CONFIG_HIGH_RES_TIMERS=y
@@ -9,7 +7,6 @@ CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_CGROUPS=y
CONFIG_MEMCG=y
-CONFIG_MEMCG_KMEM=y
CONFIG_CGROUP_SCHED=y
CONFIG_CGROUP_FREEZER=y
CONFIG_CGROUP_DEVICE=y
@@ -22,7 +19,7 @@ CONFIG_EMBEDDED=y
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_SLAB=y
-CONFIG_MACH_INGENIC=y
+CONFIG_MACH_INGENIC_SOC=y
CONFIG_X1830_CU1830_NEO=y
CONFIG_HIGHMEM=y
CONFIG_HZ_100=y
@@ -31,7 +28,6 @@ CONFIG_HZ_100=y
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
# CONFIG_COMPACTION is not set
CONFIG_CMA=y
-CONFIG_CMA_AREAS=7
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
@@ -40,7 +36,6 @@ CONFIG_CFG80211=y
CONFIG_UEVENT_HELPER=y
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
-# CONFIG_FW_LOADER is not set
# CONFIG_ALLOW_DEV_COREDUMP is not set
CONFIG_MD=y
CONFIG_BLK_DEV_MD=y
@@ -49,13 +44,11 @@ CONFIG_NETDEVICES=y
CONFIG_STMMAC_ETH=y
CONFIG_ICPLUS_PHY=y
CONFIG_BRCMFMAC=y
-# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
CONFIG_VT_HW_CONSOLE_BINDING=y
CONFIG_LEGACY_PTY_COUNT=2
-CONFIG_SERIAL_EARLYCON=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_8250_NR_UARTS=2
@@ -69,8 +62,6 @@ CONFIG_GPIO_SYSFS=y
CONFIG_SENSORS_ADS7828=y
CONFIG_WATCHDOG=y
CONFIG_JZ4740_WDT=y
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
# CONFIG_VGA_CONSOLE is not set
# CONFIG_HID is not set
# CONFIG_USB_SUPPORT is not set
@@ -85,8 +76,6 @@ CONFIG_RTC_DRV_JZ4740=y
CONFIG_DMADEVICES=y
CONFIG_DMA_JZ4780=y
# CONFIG_IOMMU_SUPPORT is not set
-CONFIG_NVMEM=y
-CONFIG_NVMEM_SYSFS=y
CONFIG_EXT4_FS=y
# CONFIG_DNOTIFY is not set
CONFIG_AUTOFS_FS=y
@@ -111,8 +100,8 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=15
CONFIG_MESSAGE_LOGLEVEL_DEFAULT=7
CONFIG_DEBUG_INFO=y
CONFIG_STRIP_ASM_SYMS=y
-CONFIG_DEBUG_FS=y
CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_FS=y
CONFIG_PANIC_ON_OOPS=y
CONFIG_PANIC_TIMEOUT=10
# CONFIG_SCHED_DEBUG is not set
diff --git a/arch/mips/configs/gcw0_defconfig b/arch/mips/configs/gcw0_defconfig
index 4994749b9eaa..7e28a4fe9d84 100644
--- a/arch/mips/configs/gcw0_defconfig
+++ b/arch/mips/configs/gcw0_defconfig
@@ -4,7 +4,7 @@ CONFIG_HIGH_RES_TIMERS=y
CONFIG_PREEMPT_VOLUNTARY=y
CONFIG_EMBEDDED=y
CONFIG_PROFILING=y
-CONFIG_MACH_INGENIC=y
+CONFIG_MACH_INGENIC_SOC=y
CONFIG_JZ4770_GCW0=y
CONFIG_HIGHMEM=y
# CONFIG_SECCOMP is not set
diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig
index a65b08de4098..38a817ead8e7 100644
--- a/arch/mips/configs/loongson3_defconfig
+++ b/arch/mips/configs/loongson3_defconfig
@@ -30,7 +30,6 @@ CONFIG_EMBEDDED=y
CONFIG_PERF_EVENTS=y
CONFIG_MACH_LOONGSON64=y
CONFIG_CPU_HAS_MSA=y
-CONFIG_SMP=y
CONFIG_NR_CPUS=16
CONFIG_HZ_256=y
CONFIG_KEXEC=y
@@ -403,7 +402,6 @@ CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_DEFLATE=m
CONFIG_PRINTK_TIME=y
-CONFIG_FRAME_WARN=1024
CONFIG_STRIP_ASM_SYMS=y
CONFIG_MAGIC_SYSRQ=y
# CONFIG_SCHED_DEBUG is not set
diff --git a/arch/mips/configs/pnx8335_stb225_defconfig b/arch/mips/configs/pnx8335_stb225_defconfig
deleted file mode 100644
index d06db6b87959..000000000000
--- a/arch/mips/configs/pnx8335_stb225_defconfig
+++ /dev/null
@@ -1,77 +0,0 @@
-# CONFIG_LOCALVERSION_AUTO is not set
-# CONFIG_SWAP is not set
-CONFIG_SYSVIPC=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_PREEMPT_VOLUNTARY=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_EXPERT=y
-CONFIG_SLAB=y
-CONFIG_NXP_STB225=y
-CONFIG_CPU_LITTLE_ENDIAN=y
-CONFIG_HZ_128=y
-# CONFIG_SECCOMP is not set
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_INET_AH=y
-# CONFIG_IPV6 is not set
-CONFIG_MTD=y
-CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_CFI=y
-CONFIG_MTD_CFI_ADV_OPTIONS=y
-CONFIG_MTD_CFI_LE_BYTE_SWAP=y
-CONFIG_MTD_CFI_GEOMETRY=y
-CONFIG_MTD_CFI_AMDSTD=y
-CONFIG_MTD_PHYSMAP=y
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_SD=y
-# CONFIG_SCSI_LOWLEVEL is not set
-CONFIG_ATA=y
-CONFIG_NETDEVICES=y
-CONFIG_INPUT_EVDEV=m
-CONFIG_INPUT_EVBUG=m
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_VT_CONSOLE is not set
-# CONFIG_LEGACY_PTYS is not set
-CONFIG_SERIAL_PNX8XXX=y
-CONFIG_SERIAL_PNX8XXX_CONSOLE=y
-CONFIG_HW_RANDOM=y
-CONFIG_I2C=y
-CONFIG_I2C_CHARDEV=y
-# CONFIG_HWMON is not set
-CONFIG_FB=y
-# CONFIG_VGA_CONSOLE is not set
-CONFIG_SOUND=m
-CONFIG_SND=m
-CONFIG_SND_VERBOSE_PRINTK=y
-CONFIG_SND_DEBUG=y
-CONFIG_SND_SEQUENCER=m
-CONFIG_EXT2_FS=m
-# CONFIG_DNOTIFY is not set
-CONFIG_MSDOS_FS=m
-CONFIG_VFAT_FS=m
-CONFIG_TMPFS=y
-CONFIG_JFFS2_FS=y
-CONFIG_CRAMFS=y
-CONFIG_NFS_FS=y
-CONFIG_ROOT_NFS=y
-CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
-CONFIG_NLS=y
-CONFIG_NLS_CODEPAGE_437=m
-CONFIG_NLS_CODEPAGE_850=m
-CONFIG_NLS_CODEPAGE_932=m
-CONFIG_NLS_ASCII=m
-CONFIG_NLS_ISO8859_1=m
-CONFIG_NLS_ISO8859_15=m
-CONFIG_NLS_UTF8=m
diff --git a/arch/mips/configs/qi_lb60_defconfig b/arch/mips/configs/qi_lb60_defconfig
index 81bfbee72b0c..b4448d0876d5 100644
--- a/arch/mips/configs/qi_lb60_defconfig
+++ b/arch/mips/configs/qi_lb60_defconfig
@@ -7,7 +7,8 @@ CONFIG_EMBEDDED=y
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_SLAB=y
-CONFIG_MACH_INGENIC=y
+CONFIG_MACH_INGENIC_SOC=y
+CONFIG_JZ4740_QI_LB60=y
CONFIG_HZ_100=y
# CONFIG_SECCOMP is not set
CONFIG_MODULES=y
@@ -72,9 +73,7 @@ CONFIG_DRM=y
CONFIG_DRM_FBDEV_OVERALLOC=200
CONFIG_DRM_PANEL_SIMPLE=y
CONFIG_DRM_INGENIC=y
-# CONFIG_LCD_CLASS_DEVICE is not set
CONFIG_BACKLIGHT_CLASS_DEVICE=y
-# CONFIG_BACKLIGHT_GENERIC is not set
# CONFIG_VGA_CONSOLE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
@@ -170,9 +169,9 @@ CONFIG_PRINTK_TIME=y
CONFIG_DEBUG_INFO=y
CONFIG_STRIP_ASM_SYMS=y
CONFIG_READABLE_ASM=y
+CONFIG_KGDB=y
CONFIG_DEBUG_KMEMLEAK=y
CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_PANIC_ON_OOPS=y
# CONFIG_FTRACE is not set
-CONFIG_KGDB=y
diff --git a/arch/mips/configs/rs90_defconfig b/arch/mips/configs/rs90_defconfig
index de6752051ecc..dfbb9fed9a42 100644
--- a/arch/mips/configs/rs90_defconfig
+++ b/arch/mips/configs/rs90_defconfig
@@ -19,7 +19,7 @@ CONFIG_EMBEDDED=y
# CONFIG_PERF_EVENTS is not set
CONFIG_SLAB=y
CONFIG_PROFILING=y
-CONFIG_MACH_INGENIC=y
+CONFIG_MACH_INGENIC_SOC=y
CONFIG_JZ4740_RS90=y
CONFIG_PAGE_SIZE_16KB=y
CONFIG_HZ_100=y
@@ -80,8 +80,8 @@ CONFIG_KEYBOARD_GPIO=y
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
CONFIG_LEGACY_PTY_COUNT=2
-# CONFIG_DEVMEM is not set
# CONFIG_HW_RANDOM is not set
+# CONFIG_DEVMEM is not set
# CONFIG_I2C_COMPAT is not set
# CONFIG_I2C_HELPER_AUTO is not set
CONFIG_POWER_SUPPLY=y
diff --git a/arch/mips/dec/prom/memory.c b/arch/mips/dec/prom/memory.c
index 5073d2ed78bb..44490c30d63b 100644
--- a/arch/mips/dec/prom/memory.c
+++ b/arch/mips/dec/prom/memory.c
@@ -12,7 +12,6 @@
#include <linux/types.h>
#include <asm/addrspace.h>
-#include <asm/bootinfo.h>
#include <asm/dec/machtype.h>
#include <asm/dec/prom.h>
#include <asm/page.h>
@@ -28,7 +27,7 @@ volatile unsigned long mem_err; /* So we know an error occurred */
#define CHUNK_SIZE 0x400000
-static inline void pmax_setup_memory_region(void)
+static __init void pmax_setup_memory_region(void)
{
volatile unsigned char *memory_page, dummy;
char old_handler[0x80];
@@ -50,15 +49,14 @@ static inline void pmax_setup_memory_region(void)
}
memcpy((void *)(CKSEG0 + 0x80), &old_handler, 0x80);
- add_memory_region(0, (unsigned long)memory_page - CKSEG1 - CHUNK_SIZE,
- BOOT_MEM_RAM);
+ memblock_add(0, (unsigned long)memory_page - CKSEG1 - CHUNK_SIZE);
}
/*
* Use the REX prom calls to get hold of the memory bitmap, and thence
* determine memory size.
*/
-static inline void rex_setup_memory_region(void)
+static __init void rex_setup_memory_region(void)
{
int i, bitmap_size;
unsigned long mem_start = 0, mem_size = 0;
@@ -76,13 +74,13 @@ static inline void rex_setup_memory_region(void)
else if (!mem_size)
mem_start += (8 * bm->pagesize);
else {
- add_memory_region(mem_start, mem_size, BOOT_MEM_RAM);
+ memblock_add(mem_start, mem_size);
mem_start += mem_size + (8 * bm->pagesize);
mem_size = 0;
}
}
if (mem_size)
- add_memory_region(mem_start, mem_size, BOOT_MEM_RAM);
+ memblock_add(mem_start, mem_size);
}
void __init prom_meminit(u32 magic)
diff --git a/arch/mips/dec/setup.c b/arch/mips/dec/setup.c
index d4e868b828e5..eaad0ed4b523 100644
--- a/arch/mips/dec/setup.c
+++ b/arch/mips/dec/setup.c
@@ -6,7 +6,7 @@
* for more details.
*
* Copyright (C) 1998 Harald Koerfgen
- * Copyright (C) 2000, 2001, 2002, 2003, 2005 Maciej W. Rozycki
+ * Copyright (C) 2000, 2001, 2002, 2003, 2005, 2020 Maciej W. Rozycki
*/
#include <linux/console.h>
#include <linux/export.h>
@@ -15,6 +15,7 @@
#include <linux/ioport.h>
#include <linux/irq.h>
#include <linux/irqnr.h>
+#include <linux/memblock.h>
#include <linux/param.h>
#include <linux/percpu-defs.h>
#include <linux/sched.h>
@@ -22,6 +23,7 @@
#include <linux/types.h>
#include <linux/pm.h>
+#include <asm/addrspace.h>
#include <asm/bootinfo.h>
#include <asm/cpu.h>
#include <asm/cpu-features.h>
@@ -29,7 +31,9 @@
#include <asm/irq.h>
#include <asm/irq_cpu.h>
#include <asm/mipsregs.h>
+#include <asm/page.h>
#include <asm/reboot.h>
+#include <asm/sections.h>
#include <asm/time.h>
#include <asm/traps.h>
#include <asm/wbflush.h>
@@ -146,6 +150,9 @@ void __init plat_mem_setup(void)
ioport_resource.start = ~0UL;
ioport_resource.end = 0UL;
+
+ /* Stay away from the firmware working memory area for now. */
+ memblock_reserve(PHYS_OFFSET, __pa_symbol(&_text) - PHYS_OFFSET);
}
/*
diff --git a/arch/mips/fw/arc/memory.c b/arch/mips/fw/arc/memory.c
index da0712ad85f5..37625ae5e35d 100644
--- a/arch/mips/fw/arc/memory.c
+++ b/arch/mips/fw/arc/memory.c
@@ -68,20 +68,24 @@ static char *arc_mtypes[8] = {
: arc_mtypes[a.arc]
#endif
+enum {
+ mem_free, mem_prom_used, mem_reserved
+};
+
static inline int memtype_classify_arcs(union linux_memtypes type)
{
switch (type.arcs) {
case arcs_fcontig:
case arcs_free:
- return BOOT_MEM_RAM;
+ return mem_free;
case arcs_atmp:
- return BOOT_MEM_ROM_DATA;
+ return mem_prom_used;
case arcs_eblock:
case arcs_rvpage:
case arcs_bmem:
case arcs_prog:
case arcs_aperm:
- return BOOT_MEM_RESERVED;
+ return mem_reserved;
default:
BUG();
}
@@ -93,15 +97,15 @@ static inline int memtype_classify_arc(union linux_memtypes type)
switch (type.arc) {
case arc_free:
case arc_fcontig:
- return BOOT_MEM_RAM;
+ return mem_free;
case arc_atmp:
- return BOOT_MEM_ROM_DATA;
+ return mem_prom_used;
case arc_eblock:
case arc_rvpage:
case arc_bmem:
case arc_prog:
case arc_aperm:
- return BOOT_MEM_RESERVED;
+ return mem_reserved;
default:
BUG();
}
@@ -143,9 +147,17 @@ void __weak __init prom_meminit(void)
size = p->pages << ARC_PAGE_SHIFT;
type = prom_memtype_classify(p->type);
- add_memory_region(base, size, type);
+ /* ignore mirrored RAM on IP28/IP30 */
+ if (base < PHYS_OFFSET)
+ continue;
+
+ memblock_add(base, size);
+
+ if (type == mem_reserved)
+ memblock_reserve(base, size);
- if (type == BOOT_MEM_ROM_DATA) {
+ if (type == mem_prom_used) {
+ memblock_reserve(base, size);
if (nr_prom_mem >= 5) {
pr_err("Too many ROM DATA regions");
continue;
diff --git a/arch/mips/fw/sni/sniprom.c b/arch/mips/fw/sni/sniprom.c
index 80112f2298b6..8f6730376a42 100644
--- a/arch/mips/fw/sni/sniprom.c
+++ b/arch/mips/fw/sni/sniprom.c
@@ -11,6 +11,7 @@
#include <linux/kernel.h>
#include <linux/init.h>
+#include <linux/memblock.h>
#include <linux/string.h>
#include <linux/console.h>
@@ -131,8 +132,7 @@ static void __init sni_mem_init(void)
}
pr_debug("Bank%d: %08x @ %08x\n", i,
memconf[i].size, memconf[i].base);
- add_memory_region(memconf[i].base, memconf[i].size,
- BOOT_MEM_RAM);
+ memblock_add(memconf[i].base, memconf[i].size);
}
}
diff --git a/arch/mips/generic/Kconfig b/arch/mips/generic/Kconfig
index fd6019802657..55d9aed7ced9 100644
--- a/arch/mips/generic/Kconfig
+++ b/arch/mips/generic/Kconfig
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-if MIPS_GENERIC
+if MIPS_GENERIC_KERNEL
config LEGACY_BOARDS
bool
@@ -73,6 +73,12 @@ config FIT_IMAGE_FDT_OCELOT
from Microsemi in the FIT kernel image.
This requires u-boot on the platform.
+config BOARD_INGENIC
+ bool "Support boards based on Ingenic SoCs"
+ select MACH_INGENIC_GENERIC
+ help
+ Enable support for boards based on Ingenic SoCs.
+
config VIRT_BOARD_RANCHU
bool "Support Ranchu platform for Android emulator"
help
diff --git a/arch/mips/generic/Makefile b/arch/mips/generic/Makefile
index 2384a6b09e4c..e37a59bae0a6 100644
--- a/arch/mips/generic/Makefile
+++ b/arch/mips/generic/Makefile
@@ -11,4 +11,5 @@ obj-y += proc.o
obj-$(CONFIG_YAMON_DT_SHIM) += yamon-dt.o
obj-$(CONFIG_LEGACY_BOARD_SEAD3) += board-sead3.o
obj-$(CONFIG_LEGACY_BOARD_OCELOT) += board-ocelot.o
+obj-$(CONFIG_MACH_INGENIC) += board-ingenic.o
obj-$(CONFIG_VIRT_BOARD_RANCHU) += board-ranchu.o
diff --git a/arch/mips/generic/Platform b/arch/mips/generic/Platform
index 53c33cb72974..f8ef2f9d107e 100644
--- a/arch/mips/generic/Platform
+++ b/arch/mips/generic/Platform
@@ -8,8 +8,12 @@
# option) any later version.
#
+# Note: order matters, keep the asm/mach-generic include last.
+cflags-$(CONFIG_MACH_INGENIC_SOC) += -I$(srctree)/arch/mips/include/asm/mach-ingenic
cflags-$(CONFIG_MIPS_GENERIC) += -I$(srctree)/arch/mips/include/asm/mach-generic
+
load-$(CONFIG_MIPS_GENERIC) += 0xffffffff80100000
+zload-$(CONFIG_MIPS_GENERIC) += 0xffffffff81000000
all-$(CONFIG_MIPS_GENERIC) := vmlinux.gz.itb
its-y := vmlinux.its.S
diff --git a/arch/mips/generic/board-ingenic.c b/arch/mips/generic/board-ingenic.c
new file mode 100644
index 000000000000..0cec0bea13d6
--- /dev/null
+++ b/arch/mips/generic/board-ingenic.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Support for Ingenic SoCs
+ *
+ * Copyright (C) 2009-2010, Lars-Peter Clausen <lars@metafoo.de>
+ * Copyright (C) 2011, Maarten ter Huurne <maarten@treewalker.org>
+ * Copyright (C) 2020 Paul Cercueil <paul@crapouillou.net>
+ */
+
+#include <linux/of_address.h>
+#include <linux/of_fdt.h>
+#include <linux/pm.h>
+#include <linux/sizes.h>
+#include <linux/suspend.h>
+#include <linux/types.h>
+
+#include <asm/bootinfo.h>
+#include <asm/machine.h>
+#include <asm/reboot.h>
+
+static __init char *ingenic_get_system_type(unsigned long machtype)
+{
+ switch (machtype) {
+ case MACH_INGENIC_X2000E:
+ return "X2000E";
+ case MACH_INGENIC_X2000:
+ return "X2000";
+ case MACH_INGENIC_X1830:
+ return "X1830";
+ case MACH_INGENIC_X1000E:
+ return "X1000E";
+ case MACH_INGENIC_X1000:
+ return "X1000";
+ case MACH_INGENIC_JZ4780:
+ return "JZ4780";
+ case MACH_INGENIC_JZ4775:
+ return "JZ4775";
+ case MACH_INGENIC_JZ4770:
+ return "JZ4770";
+ case MACH_INGENIC_JZ4725B:
+ return "JZ4725B";
+ default:
+ return "JZ4740";
+ }
+}
+
+static __init const void *ingenic_fixup_fdt(const void *fdt, const void *match_data)
+{
+ /*
+ * Old devicetree files for the qi,lb60 board did not have a /memory
+ * node. Hardcode the memory info here.
+ */
+ if (!fdt_node_check_compatible(fdt, 0, "qi,lb60") &&
+ fdt_path_offset(fdt, "/memory") < 0)
+ early_init_dt_add_memory_arch(0, SZ_32M);
+
+ mips_machtype = (unsigned long)match_data;
+ system_type = ingenic_get_system_type(mips_machtype);
+
+ return fdt;
+}
+
+static const struct of_device_id ingenic_of_match[] __initconst = {
+ { .compatible = "ingenic,jz4740", .data = (void *)MACH_INGENIC_JZ4740 },
+ { .compatible = "ingenic,jz4725b", .data = (void *)MACH_INGENIC_JZ4725B },
+ { .compatible = "ingenic,jz4770", .data = (void *)MACH_INGENIC_JZ4770 },
+ { .compatible = "ingenic,jz4775", .data = (void *)MACH_INGENIC_JZ4775 },
+ { .compatible = "ingenic,jz4780", .data = (void *)MACH_INGENIC_JZ4780 },
+ { .compatible = "ingenic,x1000", .data = (void *)MACH_INGENIC_X1000 },
+ { .compatible = "ingenic,x1000e", .data = (void *)MACH_INGENIC_X1000E },
+ { .compatible = "ingenic,x1830", .data = (void *)MACH_INGENIC_X1830 },
+ { .compatible = "ingenic,x2000", .data = (void *)MACH_INGENIC_X2000 },
+ { .compatible = "ingenic,x2000e", .data = (void *)MACH_INGENIC_X2000E },
+ {}
+};
+
+MIPS_MACHINE(ingenic) = {
+ .matches = ingenic_of_match,
+ .fixup_fdt = ingenic_fixup_fdt,
+};
+
+static void ingenic_wait_instr(void)
+{
+ __asm__(".set push;\n"
+ ".set mips3;\n"
+ "wait;\n"
+ ".set pop;\n"
+ );
+}
+
+static void ingenic_halt(void)
+{
+ for (;;)
+ ingenic_wait_instr();
+}
+
+static int __maybe_unused ingenic_pm_enter(suspend_state_t state)
+{
+ ingenic_wait_instr();
+
+ return 0;
+}
+
+static const struct platform_suspend_ops ingenic_pm_ops __maybe_unused = {
+ .valid = suspend_valid_only_mem,
+ .enter = ingenic_pm_enter,
+};
+
+static int __init ingenic_pm_init(void)
+{
+ if (boot_cpu_type() == CPU_XBURST) {
+ if (IS_ENABLED(CONFIG_PM_SLEEP))
+ suspend_set_ops(&ingenic_pm_ops);
+ _machine_halt = ingenic_halt;
+ }
+
+ return 0;
+
+}
+late_initcall(ingenic_pm_init);
diff --git a/arch/mips/generic/init.c b/arch/mips/generic/init.c
index 805d0135a9f4..66a19337d2ab 100644
--- a/arch/mips/generic/init.c
+++ b/arch/mips/generic/init.c
@@ -39,12 +39,11 @@ void __init *plat_get_fdt(void)
/* Already set up */
return (void *)fdt;
- if ((fw_arg0 == -2) && !fdt_check_header((void *)fw_passed_dtb)) {
+ if (fw_passed_dtb && !fdt_check_header((void *)fw_passed_dtb)) {
/*
- * We booted using the UHI boot protocol, so we have been
- * provided with the appropriate device tree for the board.
- * Make use of it & search for any machine struct based upon
- * the root compatible string.
+ * We have been provided with the appropriate device tree for
+ * the board. Make use of it & search for any machine struct
+ * based upon the root compatible string.
*/
fdt = (void *)fw_passed_dtb;
@@ -106,7 +105,7 @@ void __init plat_mem_setup(void)
if (mach && mach->fixup_fdt)
fdt = mach->fixup_fdt(fdt, mach_match_data);
- strlcpy(arcs_cmdline, boot_command_line, COMMAND_LINE_SIZE);
+ fw_init_cmdline();
__dt_setup_arch((void *)fdt);
}
diff --git a/arch/mips/generic/proc.c b/arch/mips/generic/proc.c
index 4c992809cc3f..cce2fde219a3 100644
--- a/arch/mips/generic/proc.c
+++ b/arch/mips/generic/proc.c
@@ -8,11 +8,16 @@
#include <asm/bootinfo.h>
+char *system_type;
+
const char *get_system_type(void)
{
const char *str;
int err;
+ if (system_type)
+ return system_type;
+
err = of_property_read_string(of_root, "model", &str);
if (!err)
return str;
diff --git a/arch/mips/include/asm/bootinfo.h b/arch/mips/include/asm/bootinfo.h
index 147c9327ce04..aa03b1237155 100644
--- a/arch/mips/include/asm/bootinfo.h
+++ b/arch/mips/include/asm/bootinfo.h
@@ -79,8 +79,10 @@ enum ingenic_machine_type {
MACH_INGENIC_JZ4775,
MACH_INGENIC_JZ4780,
MACH_INGENIC_X1000,
+ MACH_INGENIC_X1000E,
MACH_INGENIC_X1830,
MACH_INGENIC_X2000,
+ MACH_INGENIC_X2000E,
};
extern char *system_type;
@@ -88,13 +90,6 @@ const char *get_system_type(void);
extern unsigned long mips_machtype;
-#define BOOT_MEM_RAM 1
-#define BOOT_MEM_ROM_DATA 2
-#define BOOT_MEM_RESERVED 3
-#define BOOT_MEM_INIT_RAM 4
-#define BOOT_MEM_NOMAP 5
-
-extern void add_memory_region(phys_addr_t start, phys_addr_t size, long type);
extern void detect_memory_region(phys_addr_t start, phys_addr_t sz_min, phys_addr_t sz_max);
extern void prom_init(void);
diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h
index 78cf7e300f12..f2e216eef7da 100644
--- a/arch/mips/include/asm/cpu-features.h
+++ b/arch/mips/include/asm/cpu-features.h
@@ -171,9 +171,6 @@
#ifndef cpu_has_llsc
#define cpu_has_llsc __isa_ge_or_opt(1, MIPS_CPU_LLSC)
#endif
-#ifndef cpu_has_bp_ghist
-#define cpu_has_bp_ghist __opt(MIPS_CPU_BP_GHIST)
-#endif
#ifndef kernel_uses_llsc
#define kernel_uses_llsc cpu_has_llsc
#endif
diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index 388a82f28a87..c9222cc2244f 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h
@@ -398,7 +398,6 @@ enum cpu_type_enum {
#define MIPS_CPU_RW_LLB BIT_ULL(32) /* LLADDR/LLB writes are allowed */
#define MIPS_CPU_LPA BIT_ULL(33) /* CPU supports Large Physical Addressing */
#define MIPS_CPU_CDMM BIT_ULL(34) /* CPU has Common Device Memory Map */
-#define MIPS_CPU_BP_GHIST BIT_ULL(35) /* R12K+ Branch Prediction Global History */
#define MIPS_CPU_SP BIT_ULL(36) /* Small (1KB) page support */
#define MIPS_CPU_FTLB BIT_ULL(37) /* CPU has Fixed-page-size TLB */
#define MIPS_CPU_NAN_LEGACY BIT_ULL(38) /* Legacy NaN implemented */
diff --git a/arch/mips/include/asm/futex.h b/arch/mips/include/asm/futex.h
index 2bf8f6014579..d85248404c52 100644
--- a/arch/mips/include/asm/futex.h
+++ b/arch/mips/include/asm/futex.h
@@ -21,7 +21,7 @@
#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \
{ \
- if (cpu_has_llsc && R10000_LLSC_WAR) { \
+ if (cpu_has_llsc && IS_ENABLED(CONFIG_WAR_R10000_LLSC)) { \
__asm__ __volatile__( \
" .set push \n" \
" .set noat \n" \
@@ -133,7 +133,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
if (!access_ok(uaddr, sizeof(u32)))
return -EFAULT;
- if (cpu_has_llsc && R10000_LLSC_WAR) {
+ if (cpu_has_llsc && IS_ENABLED(CONFIG_WAR_R10000_LLSC)) {
__asm__ __volatile__(
"# futex_atomic_cmpxchg_inatomic \n"
" .set push \n"
diff --git a/arch/mips/include/asm/idle.h b/arch/mips/include/asm/idle.h
index 655a6dbc861a..0992cad9c632 100644
--- a/arch/mips/include/asm/idle.h
+++ b/arch/mips/include/asm/idle.h
@@ -15,6 +15,8 @@ static inline int using_rollback_handler(void)
return cpu_wait == r4k_wait;
}
+extern void __init check_wait(void);
+
extern int mips_cpuidle_wait_enter(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int index);
diff --git a/arch/mips/include/asm/llsc.h b/arch/mips/include/asm/llsc.h
index c49738bc3bda..ec09fe5d6d6c 100644
--- a/arch/mips/include/asm/llsc.h
+++ b/arch/mips/include/asm/llsc.h
@@ -28,7 +28,7 @@
* works around a bug present in R10000 CPUs prior to revision 3.0 that could
* cause ll-sc sequences to execute non-atomically.
*/
-#if R10000_LLSC_WAR
+#ifdef CONFIG_WAR_R10000_LLSC
# define __SC_BEQZ "beqzl "
#elif MIPS_ISA_REV >= 6
# define __SC_BEQZ "beqzc "
diff --git a/arch/mips/include/asm/local.h b/arch/mips/include/asm/local.h
index fef0fda8f82f..ecda7295ddcd 100644
--- a/arch/mips/include/asm/local.h
+++ b/arch/mips/include/asm/local.h
@@ -31,7 +31,7 @@ static __inline__ long local_add_return(long i, local_t * l)
{
unsigned long result;
- if (kernel_uses_llsc && R10000_LLSC_WAR) {
+ if (kernel_uses_llsc && IS_ENABLED(CONFIG_WAR_R10000_LLSC)) {
unsigned long temp;
__asm__ __volatile__(
@@ -80,7 +80,7 @@ static __inline__ long local_sub_return(long i, local_t * l)
{
unsigned long result;
- if (kernel_uses_llsc && R10000_LLSC_WAR) {
+ if (kernel_uses_llsc && IS_ENABLED(CONFIG_WAR_R10000_LLSC)) {
unsigned long temp;
__asm__ __volatile__(
diff --git a/arch/mips/include/asm/m48t37.h b/arch/mips/include/asm/m48t37.h
deleted file mode 100644
index 3687a02e692b..000000000000
--- a/arch/mips/include/asm/m48t37.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Registers for the SGS-Thomson M48T37 Timekeeper RAM chip
- */
-#ifndef _ASM_M48T37_H
-#define _ASM_M48T37_H
-
-#include <linux/spinlock.h>
-
-extern spinlock_t rtc_lock;
-
-struct m48t37_rtc {
- volatile u8 pad[0x7ff0]; /* NVRAM */
- volatile u8 flags;
- volatile u8 century;
- volatile u8 alarm_sec;
- volatile u8 alarm_min;
- volatile u8 alarm_hour;
- volatile u8 alarm_data;
- volatile u8 interrupts;
- volatile u8 watchdog;
- volatile u8 control;
- volatile u8 sec;
- volatile u8 min;
- volatile u8 hour;
- volatile u8 day;
- volatile u8 date;
- volatile u8 month;
- volatile u8 year;
-};
-
-#define M48T37_RTC_SET 0x80
-#define M48T37_RTC_STOPPED 0x80
-#define M48T37_RTC_READ 0x40
-
-#endif /* _ASM_M48T37_H */
diff --git a/arch/mips/include/asm/mach-au1x00/cpu-feature-overrides.h b/arch/mips/include/asm/mach-au1x00/cpu-feature-overrides.h
index ecfbb5aeada3..e6e527224a15 100644
--- a/arch/mips/include/asm/mach-au1x00/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-au1x00/cpu-feature-overrides.h
@@ -39,7 +39,6 @@
#define cpu_has_guestctl2 0
#define cpu_has_guestid 0
#define cpu_has_drg 0
-#define cpu_has_bp_ghist 0
#define cpu_has_mips16 0
#define cpu_has_mips16e2 0
#define cpu_has_mdmx 0
diff --git a/arch/mips/include/asm/mach-au1x00/gpio-au1300.h b/arch/mips/include/asm/mach-au1x00/gpio-au1300.h
index d25846a1291f..d16add7ba49d 100644
--- a/arch/mips/include/asm/mach-au1x00/gpio-au1300.h
+++ b/arch/mips/include/asm/mach-au1x00/gpio-au1300.h
@@ -120,141 +120,4 @@ static inline int au1300_gpio_getinitlvl(unsigned int gpio)
return (v >> gpio) & 1;
}
-/**********************************************************************/
-
-/* Linux gpio framework integration.
-*
-* 4 use cases of Alchemy GPIOS:
-*(1) GPIOLIB=y, ALCHEMY_GPIO_INDIRECT=y:
-* Board must register gpiochips.
-*(2) GPIOLIB=y, ALCHEMY_GPIO_INDIRECT=n:
-* A gpiochip for the 75 GPIOs is registered.
-*
-*(3) GPIOLIB=n, ALCHEMY_GPIO_INDIRECT=y:
-* the boards' gpio.h must provide the linux gpio wrapper functions,
-*
-*(4) GPIOLIB=n, ALCHEMY_GPIO_INDIRECT=n:
-* inlinable gpio functions are provided which enable access to the
-* Au1300 gpios only by using the numbers straight out of the data-
-* sheets.
-
-* Cases 1 and 3 are intended for boards which want to provide their own
-* GPIO namespace and -operations (i.e. for example you have 8 GPIOs
-* which are in part provided by spare Au1300 GPIO pins and in part by
-* an external FPGA but you still want them to be accessible in linux
-* as gpio0-7. The board can of course use the alchemy_gpioX_* functions
-* as required).
-*/
-
-#ifndef CONFIG_GPIOLIB
-
-#ifdef CONFIG_ALCHEMY_GPIOINT_AU1300
-
-#ifndef CONFIG_ALCHEMY_GPIO_INDIRECT /* case (4) */
-
-static inline int gpio_direction_input(unsigned int gpio)
-{
- return au1300_gpio_direction_input(gpio);
-}
-
-static inline int gpio_direction_output(unsigned int gpio, int v)
-{
- return au1300_gpio_direction_output(gpio, v);
-}
-
-static inline int gpio_get_value(unsigned int gpio)
-{
- return au1300_gpio_get_value(gpio);
-}
-
-static inline void gpio_set_value(unsigned int gpio, int v)
-{
- au1300_gpio_set_value(gpio, v);
-}
-
-static inline int gpio_get_value_cansleep(unsigned gpio)
-{
- return gpio_get_value(gpio);
-}
-
-static inline void gpio_set_value_cansleep(unsigned gpio, int value)
-{
- gpio_set_value(gpio, value);
-}
-
-static inline int gpio_is_valid(unsigned int gpio)
-{
- return au1300_gpio_is_valid(gpio);
-}
-
-static inline int gpio_cansleep(unsigned int gpio)
-{
- return au1300_gpio_cansleep(gpio);
-}
-
-static inline int gpio_to_irq(unsigned int gpio)
-{
- return au1300_gpio_to_irq(gpio);
-}
-
-static inline int irq_to_gpio(unsigned int irq)
-{
- return au1300_irq_to_gpio(irq);
-}
-
-static inline int gpio_request(unsigned int gpio, const char *label)
-{
- return 0;
-}
-
-static inline int gpio_request_one(unsigned gpio,
- unsigned long flags, const char *label)
-{
- return 0;
-}
-
-static inline int gpio_request_array(struct gpio *array, size_t num)
-{
- return 0;
-}
-
-static inline void gpio_free(unsigned gpio)
-{
-}
-
-static inline void gpio_free_array(struct gpio *array, size_t num)
-{
-}
-
-static inline int gpio_set_debounce(unsigned gpio, unsigned debounce)
-{
- return -ENOSYS;
-}
-
-static inline void gpio_unexport(unsigned gpio)
-{
-}
-
-static inline int gpio_export(unsigned gpio, bool direction_may_change)
-{
- return -ENOSYS;
-}
-
-static inline int gpio_sysfs_set_active_low(unsigned gpio, int value)
-{
- return -ENOSYS;
-}
-
-static inline int gpio_export_link(struct device *dev, const char *name,
- unsigned gpio)
-{
- return -ENOSYS;
-}
-
-#endif /* !CONFIG_ALCHEMY_GPIO_INDIRECT */
-
-#endif /* CONFIG_ALCHEMY_GPIOINT_AU1300 */
-
-#endif /* CONFIG GPIOLIB */
-
#endif /* _GPIO_AU1300_H_ */
diff --git a/arch/mips/include/asm/mach-bcm47xx/bcm47xx.h b/arch/mips/include/asm/mach-bcm47xx/bcm47xx.h
index d7f1ef246d5c..93817bfb7fb2 100644
--- a/arch/mips/include/asm/mach-bcm47xx/bcm47xx.h
+++ b/arch/mips/include/asm/mach-bcm47xx/bcm47xx.h
@@ -10,6 +10,7 @@
#include <linux/bcma/bcma.h>
#include <linux/bcma/bcma_soc.h>
#include <linux/bcm47xx_nvram.h>
+#include <linux/bcm47xx_sprom.h>
enum bcm47xx_bus_type {
#ifdef CONFIG_BCM47XX_SSB
@@ -32,9 +33,6 @@ union bcm47xx_bus {
extern union bcm47xx_bus bcm47xx_bus;
extern enum bcm47xx_bus_type bcm47xx_bus_type;
-void bcm47xx_fill_sprom(struct ssb_sprom *sprom, const char *prefix,
- bool fallback);
-
void bcm47xx_set_system_type(u16 chip_id);
#endif /* __ASM_BCM47XX_H */
diff --git a/arch/mips/include/asm/mach-cavium-octeon/war.h b/arch/mips/include/asm/mach-cavium-octeon/war.h
deleted file mode 100644
index 2421411b7636..000000000000
--- a/arch/mips/include/asm/mach-cavium-octeon/war.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
- * Copyright (C) 2008 Cavium Networks <support@caviumnetworks.com>
- */
-#ifndef __ASM_MIPS_MACH_CAVIUM_OCTEON_WAR_H
-#define __ASM_MIPS_MACH_CAVIUM_OCTEON_WAR_H
-
-#define R4600_V1_INDEX_ICACHEOP_WAR 0
-#define R4600_V1_HIT_CACHEOP_WAR 0
-#define R4600_V2_HIT_CACHEOP_WAR 0
-#define BCM1250_M3_WAR 0
-#define SIBYTE_1956_WAR 0
-#define MIPS4K_ICACHE_REFILL_WAR 0
-#define MIPS_CACHE_SYNC_WAR 0
-#define TX49XX_ICACHE_INDEX_INV_WAR 0
-#define ICACHE_REFILLS_WORKAROUND_WAR 0
-#define R10000_LLSC_WAR 0
-#define MIPS34K_MISSED_ITLB_WAR 0
-
-#define CAVIUM_OCTEON_DCACHE_PREFETCH_WAR \
- OCTEON_IS_MODEL(OCTEON_CN6XXX)
-
-#endif /* __ASM_MIPS_MACH_CAVIUM_OCTEON_WAR_H */
diff --git a/arch/mips/include/asm/mach-generic/irq.h b/arch/mips/include/asm/mach-generic/irq.h
index 72ac2c202c55..079889ced4f3 100644
--- a/arch/mips/include/asm/mach-generic/irq.h
+++ b/arch/mips/include/asm/mach-generic/irq.h
@@ -9,7 +9,7 @@
#define __ASM_MACH_GENERIC_IRQ_H
#ifndef NR_IRQS
-#define NR_IRQS 128
+#define NR_IRQS 256
#endif
#ifdef CONFIG_I8259
diff --git a/arch/mips/include/asm/mach-generic/war.h b/arch/mips/include/asm/mach-generic/war.h
deleted file mode 100644
index f0f4a35d0870..000000000000
--- a/arch/mips/include/asm/mach-generic/war.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
- */
-#ifndef __ASM_MACH_GENERIC_WAR_H
-#define __ASM_MACH_GENERIC_WAR_H
-
-#define R4600_V1_INDEX_ICACHEOP_WAR 0
-#define R4600_V1_HIT_CACHEOP_WAR 0
-#define R4600_V2_HIT_CACHEOP_WAR 0
-#define BCM1250_M3_WAR 0
-#define SIBYTE_1956_WAR 0
-#define MIPS4K_ICACHE_REFILL_WAR 0
-#define MIPS_CACHE_SYNC_WAR 0
-#define TX49XX_ICACHE_INDEX_INV_WAR 0
-#define ICACHE_REFILLS_WORKAROUND_WAR 0
-#define R10000_LLSC_WAR 0
-#define MIPS34K_MISSED_ITLB_WAR 0
-
-#endif /* __ASM_MACH_GENERIC_WAR_H */
diff --git a/arch/mips/include/asm/mach-jz4740/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ingenic/cpu-feature-overrides.h
index 7c5e576f9d96..7c5e576f9d96 100644
--- a/arch/mips/include/asm/mach-jz4740/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-ingenic/cpu-feature-overrides.h
diff --git a/arch/mips/include/asm/mach-ip22/war.h b/arch/mips/include/asm/mach-ip22/war.h
deleted file mode 100644
index b48eb4ac362d..000000000000
--- a/arch/mips/include/asm/mach-ip22/war.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
- */
-#ifndef __ASM_MIPS_MACH_IP22_WAR_H
-#define __ASM_MIPS_MACH_IP22_WAR_H
-
-/*
- * R4600 CPU modules for the Indy come with both V1.7 and V2.0 processors.
- */
-
-#define R4600_V1_INDEX_ICACHEOP_WAR 1
-#define R4600_V1_HIT_CACHEOP_WAR 1
-#define R4600_V2_HIT_CACHEOP_WAR 1
-#define BCM1250_M3_WAR 0
-#define SIBYTE_1956_WAR 0
-#define MIPS4K_ICACHE_REFILL_WAR 0
-#define MIPS_CACHE_SYNC_WAR 0
-#define TX49XX_ICACHE_INDEX_INV_WAR 0
-#define ICACHE_REFILLS_WORKAROUND_WAR 0
-#define R10000_LLSC_WAR 0
-#define MIPS34K_MISSED_ITLB_WAR 0
-
-#endif /* __ASM_MIPS_MACH_IP22_WAR_H */
diff --git a/arch/mips/include/asm/mach-ip27/kmalloc.h b/arch/mips/include/asm/mach-ip27/kmalloc.h
deleted file mode 100644
index 82c23ce2afa7..000000000000
--- a/arch/mips/include/asm/mach-ip27/kmalloc.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef __ASM_MACH_IP27_KMALLOC_H
-#define __ASM_MACH_IP27_KMALLOC_H
-
-/*
- * All happy, no need to define ARCH_DMA_MINALIGN
- */
-
-#endif /* __ASM_MACH_IP27_KMALLOC_H */
diff --git a/arch/mips/include/asm/mach-ip27/war.h b/arch/mips/include/asm/mach-ip27/war.h
deleted file mode 100644
index ef3efce0094a..000000000000
--- a/arch/mips/include/asm/mach-ip27/war.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
- */
-#ifndef __ASM_MIPS_MACH_IP27_WAR_H
-#define __ASM_MIPS_MACH_IP27_WAR_H
-
-#define R4600_V1_INDEX_ICACHEOP_WAR 0
-#define R4600_V1_HIT_CACHEOP_WAR 0
-#define R4600_V2_HIT_CACHEOP_WAR 0
-#define BCM1250_M3_WAR 0
-#define SIBYTE_1956_WAR 0
-#define MIPS4K_ICACHE_REFILL_WAR 0
-#define MIPS_CACHE_SYNC_WAR 0
-#define TX49XX_ICACHE_INDEX_INV_WAR 0
-#define ICACHE_REFILLS_WORKAROUND_WAR 0
-#define R10000_LLSC_WAR 1
-#define MIPS34K_MISSED_ITLB_WAR 0
-
-#endif /* __ASM_MIPS_MACH_IP27_WAR_H */
diff --git a/arch/mips/include/asm/mach-ip28/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip28/cpu-feature-overrides.h
index ba8b4e30b3e2..613bbc10c1f2 100644
--- a/arch/mips/include/asm/mach-ip28/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-ip28/cpu-feature-overrides.h
@@ -25,7 +25,7 @@
#define cpu_has_mcheck 0
#define cpu_has_ejtag 0
-#define cpu_has_llsc 1
+#define cpu_has_llsc 0
#define cpu_has_vtag_icache 0
#define cpu_has_dc_aliases 0 /* see probe_pcache() */
#define cpu_has_ic_fills_f_dc 0
diff --git a/arch/mips/include/asm/mach-ip28/war.h b/arch/mips/include/asm/mach-ip28/war.h
deleted file mode 100644
index 61cd67354829..000000000000
--- a/arch/mips/include/asm/mach-ip28/war.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
- */
-#ifndef __ASM_MIPS_MACH_IP28_WAR_H
-#define __ASM_MIPS_MACH_IP28_WAR_H
-
-#define R4600_V1_INDEX_ICACHEOP_WAR 0
-#define R4600_V1_HIT_CACHEOP_WAR 0
-#define R4600_V2_HIT_CACHEOP_WAR 0
-#define BCM1250_M3_WAR 0
-#define SIBYTE_1956_WAR 0
-#define MIPS4K_ICACHE_REFILL_WAR 0
-#define MIPS_CACHE_SYNC_WAR 0
-#define TX49XX_ICACHE_INDEX_INV_WAR 0
-#define ICACHE_REFILLS_WORKAROUND_WAR 0
-#define R10000_LLSC_WAR 1
-#define MIPS34K_MISSED_ITLB_WAR 0
-
-#endif /* __ASM_MIPS_MACH_IP28_WAR_H */
diff --git a/arch/mips/include/asm/mach-ip30/irq.h b/arch/mips/include/asm/mach-ip30/irq.h
deleted file mode 100644
index 27ba899c95be..000000000000
--- a/arch/mips/include/asm/mach-ip30/irq.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * HEART IRQ defines
- *
- * Copyright (C) 2009 Johannes Dickgreber <tanzy@gmx.de>
- * 2014-2016 Joshua Kinard <kumba@gentoo.org>
- *
- */
-
-#ifndef __ASM_MACH_IP30_IRQ_H
-#define __ASM_MACH_IP30_IRQ_H
-
-/*
- * HEART has 64 hardware interrupts, but use 128 to leave room for a few
- * software interrupts as well (such as the CPU timer interrupt.
- */
-#define NR_IRQS 128
-
-extern void __init ip30_install_ipi(void);
-
-/*
- * HEART has 64 interrupt vectors available to it, subdivided into five
- * priority levels. They are numbered 0 to 63.
- */
-#define HEART_NUM_IRQS 64
-
-/*
- * These are the five interrupt priority levels and their corresponding
- * CPU IPx interrupt pins.
- *
- * Level 4 - Error Interrupts.
- * Level 3 - HEART timer interrupt.
- * Level 2 - CPU IPI, CPU debug, power putton, general device interrupts.
- * Level 1 - General device interrupts.
- * Level 0 - General device GFX flow control interrupts.
- */
-#define HEART_L4_INT_MASK 0xfff8000000000000ULL /* IP6 */
-#define HEART_L3_INT_MASK 0x0004000000000000ULL /* IP5 */
-#define HEART_L2_INT_MASK 0x0003ffff00000000ULL /* IP4 */
-#define HEART_L1_INT_MASK 0x00000000ffff0000ULL /* IP3 */
-#define HEART_L0_INT_MASK 0x000000000000ffffULL /* IP2 */
-
-/* HEART L0 Interrupts (Low Priority) */
-#define HEART_L0_INT_GENERIC 0
-#define HEART_L0_INT_FLOW_CTRL_HWTR_0 1
-#define HEART_L0_INT_FLOW_CTRL_HWTR_1 2
-
-/* HEART L2 Interrupts (High Priority) */
-#define HEART_L2_INT_RESCHED_CPU_0 46
-#define HEART_L2_INT_RESCHED_CPU_1 47
-#define HEART_L2_INT_CALL_CPU_0 48
-#define HEART_L2_INT_CALL_CPU_1 49
-
-/* HEART L3 Interrupts (Compare/Counter Timer) */
-#define HEART_L3_INT_TIMER 50
-
-/* HEART L4 Interrupts (Errors) */
-#define HEART_L4_INT_XWID_ERR_9 51
-#define HEART_L4_INT_XWID_ERR_A 52
-#define HEART_L4_INT_XWID_ERR_B 53
-#define HEART_L4_INT_XWID_ERR_C 54
-#define HEART_L4_INT_XWID_ERR_D 55
-#define HEART_L4_INT_XWID_ERR_E 56
-#define HEART_L4_INT_XWID_ERR_F 57
-#define HEART_L4_INT_XWID_ERR_XBOW 58
-#define HEART_L4_INT_CPU_BUS_ERR_0 59
-#define HEART_L4_INT_CPU_BUS_ERR_1 60
-#define HEART_L4_INT_CPU_BUS_ERR_2 61
-#define HEART_L4_INT_CPU_BUS_ERR_3 62
-#define HEART_L4_INT_HEART_EXCP 63
-
-/*
- * Power Switch is wired via BaseIO BRIDGE slot #6.
- *
- * ACFail is wired via BaseIO BRIDGE slot #7.
- */
-#define IP30_POWER_IRQ HEART_L2_INT_POWER_BTN
-
-#include <asm/mach-generic/irq.h>
-
-#define IP30_HEART_L0_IRQ (MIPS_CPU_IRQ_BASE + 2)
-#define IP30_HEART_L1_IRQ (MIPS_CPU_IRQ_BASE + 3)
-#define IP30_HEART_L2_IRQ (MIPS_CPU_IRQ_BASE + 4)
-#define IP30_HEART_TIMER_IRQ (MIPS_CPU_IRQ_BASE + 5)
-#define IP30_HEART_ERR_IRQ (MIPS_CPU_IRQ_BASE + 6)
-
-#endif /* __ASM_MACH_IP30_IRQ_H */
diff --git a/arch/mips/include/asm/mach-ip30/war.h b/arch/mips/include/asm/mach-ip30/war.h
deleted file mode 100644
index a1fa0c1f5300..000000000000
--- a/arch/mips/include/asm/mach-ip30/war.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
- */
-#ifndef __ASM_MIPS_MACH_IP30_WAR_H
-#define __ASM_MIPS_MACH_IP30_WAR_H
-
-#define R4600_V1_INDEX_ICACHEOP_WAR 0
-#define R4600_V1_HIT_CACHEOP_WAR 0
-#define R4600_V2_HIT_CACHEOP_WAR 0
-#define BCM1250_M3_WAR 0
-#define SIBYTE_1956_WAR 0
-#define MIPS4K_ICACHE_REFILL_WAR 0
-#define MIPS_CACHE_SYNC_WAR 0
-#define TX49XX_ICACHE_INDEX_INV_WAR 0
-#define ICACHE_REFILLS_WORKAROUND_WAR 0
-#ifdef CONFIG_CPU_R10000
-#define R10000_LLSC_WAR 1
-#else
-#define R10000_LLSC_WAR 0
-#endif
-#define MIPS34K_MISSED_ITLB_WAR 0
-
-#endif /* __ASM_MIPS_MACH_IP30_WAR_H */
diff --git a/arch/mips/include/asm/mach-ip32/war.h b/arch/mips/include/asm/mach-ip32/war.h
deleted file mode 100644
index e77b9d1b6c96..000000000000
--- a/arch/mips/include/asm/mach-ip32/war.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
- */
-#ifndef __ASM_MIPS_MACH_IP32_WAR_H
-#define __ASM_MIPS_MACH_IP32_WAR_H
-
-#define R4600_V1_INDEX_ICACHEOP_WAR 0
-#define R4600_V1_HIT_CACHEOP_WAR 0
-#define R4600_V2_HIT_CACHEOP_WAR 0
-#define BCM1250_M3_WAR 0
-#define SIBYTE_1956_WAR 0
-#define MIPS4K_ICACHE_REFILL_WAR 0
-#define MIPS_CACHE_SYNC_WAR 0
-#define TX49XX_ICACHE_INDEX_INV_WAR 0
-#define ICACHE_REFILLS_WORKAROUND_WAR 1
-#define R10000_LLSC_WAR 0
-#define MIPS34K_MISSED_ITLB_WAR 0
-
-#endif /* __ASM_MIPS_MACH_IP32_WAR_H */
diff --git a/arch/mips/include/asm/mach-jz4740/irq.h b/arch/mips/include/asm/mach-jz4740/irq.h
deleted file mode 100644
index 27c543bd340f..000000000000
--- a/arch/mips/include/asm/mach-jz4740/irq.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2009-2010, Lars-Peter Clausen <lars@metafoo.de>
- * JZ4740 IRQ definitions
- */
-
-#ifndef __ASM_MACH_JZ4740_IRQ_H__
-#define __ASM_MACH_JZ4740_IRQ_H__
-
-#define MIPS_CPU_IRQ_BASE 0
-#define NR_IRQS 256
-
-#endif
diff --git a/arch/mips/include/asm/mach-loongson2ef/mc146818rtc.h b/arch/mips/include/asm/mach-loongson2ef/mc146818rtc.h
deleted file mode 100644
index 00d602629a55..000000000000
--- a/arch/mips/include/asm/mach-loongson2ef/mc146818rtc.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1998, 2001, 03, 07 by Ralf Baechle (ralf@linux-mips.org)
- *
- * RTC routines for PC style attached Dallas chip.
- */
-#ifndef __ASM_MACH_LOONGSON2EF_MC146818RTC_H
-#define __ASM_MACH_LOONGSON2EF_MC146818RTC_H
-
-#include <linux/io.h>
-
-#define RTC_PORT(x) (0x70 + (x))
-#define RTC_IRQ 8
-
-static inline unsigned char CMOS_READ(unsigned long addr)
-{
- outb_p(addr, RTC_PORT(0));
- return inb_p(RTC_PORT(1));
-}
-
-static inline void CMOS_WRITE(unsigned char data, unsigned long addr)
-{
- outb_p(addr, RTC_PORT(0));
- outb_p(data, RTC_PORT(1));
-}
-
-#define RTC_ALWAYS_BCD 0
-
-#ifndef mc146818_decode_year
-#define mc146818_decode_year(year) ((year) < 70 ? (year) + 2000 : (year) + 1970)
-#endif
-
-#endif /* __ASM_MACH_LOONGSON2EF_MC146818RTC_H */
diff --git a/arch/mips/include/asm/mach-loongson64/irq.h b/arch/mips/include/asm/mach-loongson64/irq.h
index bf2480923154..98ea977cf0b8 100644
--- a/arch/mips/include/asm/mach-loongson64/irq.h
+++ b/arch/mips/include/asm/mach-loongson64/irq.h
@@ -5,7 +5,8 @@
/* cpu core interrupt numbers */
#define NR_IRQS_LEGACY 16
#define NR_MIPS_CPU_IRQS 8
-#define NR_IRQS (NR_IRQS_LEGACY + NR_MIPS_CPU_IRQS + 256)
+#define NR_MAX_CHAINED_IRQS 40 /* Chained IRQs means those not directly used by devices */
+#define NR_IRQS (NR_IRQS_LEGACY + NR_MIPS_CPU_IRQS + NR_MAX_CHAINED_IRQS + 256)
#define MIPS_CPU_IRQ_BASE NR_IRQS_LEGACY
diff --git a/arch/mips/include/asm/mach-loongson64/mmzone.h b/arch/mips/include/asm/mach-loongson64/mmzone.h
index 5eaca4fe3f92..ebb1deaa77b9 100644
--- a/arch/mips/include/asm/mach-loongson64/mmzone.h
+++ b/arch/mips/include/asm/mach-loongson64/mmzone.h
@@ -10,13 +10,9 @@
#define _ASM_MACH_LOONGSON64_MMZONE_H
#define NODE_ADDRSPACE_SHIFT 44
-#define NODE0_ADDRSPACE_OFFSET 0x000000000000UL
-#define NODE1_ADDRSPACE_OFFSET 0x100000000000UL
-#define NODE2_ADDRSPACE_OFFSET 0x200000000000UL
-#define NODE3_ADDRSPACE_OFFSET 0x300000000000UL
#define pa_to_nid(addr) (((addr) & 0xf00000000000) >> NODE_ADDRSPACE_SHIFT)
-#define nid_to_addrbase(nid) ((nid) << NODE_ADDRSPACE_SHIFT)
+#define nid_to_addrbase(nid) ((unsigned long)(nid) << NODE_ADDRSPACE_SHIFT)
extern struct pglist_data *__node_data[];
diff --git a/arch/mips/include/asm/mach-malta/malta-dtshim.h b/arch/mips/include/asm/mach-malta/malta-dtshim.h
deleted file mode 100644
index 7c97b710121d..000000000000
--- a/arch/mips/include/asm/mach-malta/malta-dtshim.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2015 Imagination Technologies
- * Author: Paul Burton <paul.burton@mips.com>
- */
-
-#ifndef __MIPS_MALTA_DTSHIM_H__
-#define __MIPS_MALTA_DTSHIM_H__
-
-#include <linux/init.h>
-
-#ifdef CONFIG_MIPS_MALTA
-
-extern void __init *malta_dt_shim(void *fdt);
-
-#else /* !CONFIG_MIPS_MALTA */
-
-static inline void *malta_dt_shim(void *fdt)
-{
- return fdt;
-}
-
-#endif /* !CONFIG_MIPS_MALTA */
-
-#endif /* __MIPS_MALTA_DTSHIM_H__ */
diff --git a/arch/mips/include/asm/mach-malta/malta-pm.h b/arch/mips/include/asm/mach-malta/malta-pm.h
deleted file mode 100644
index 2a5146d79313..000000000000
--- a/arch/mips/include/asm/mach-malta/malta-pm.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2014 Imagination Technologies
- * Author: Paul Burton <paul.burton@mips.com>
- */
-
-#ifndef __ASM_MIPS_MACH_MALTA_PM_H__
-#define __ASM_MIPS_MACH_MALTA_PM_H__
-
-#include <asm/mips-boards/piix4.h>
-
-#ifdef CONFIG_MIPS_MALTA_PM
-
-/**
- * mips_pm_suspend - enter a suspend state
- * @state: the state to enter, one of PIIX4_FUNC3IO_PMCNTRL_SUS_TYP_*
- *
- * Enters a suspend state via the Malta's PIIX4. If the state to be entered
- * is one which loses context (eg. SOFF) then this function will never
- * return.
- */
-extern int mips_pm_suspend(unsigned state);
-
-#else /* !CONFIG_MIPS_MALTA_PM */
-
-static inline int mips_pm_suspend(unsigned state)
-{
- return -EINVAL;
-}
-
-#endif /* !CONFIG_MIPS_MALTA_PM */
-
-#endif /* __ASM_MIPS_MACH_MALTA_PM_H__ */
diff --git a/arch/mips/include/asm/mach-malta/war.h b/arch/mips/include/asm/mach-malta/war.h
deleted file mode 100644
index d62d2ffe515e..000000000000
--- a/arch/mips/include/asm/mach-malta/war.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
- */
-#ifndef __ASM_MIPS_MACH_MIPS_WAR_H
-#define __ASM_MIPS_MACH_MIPS_WAR_H
-
-#define R4600_V1_INDEX_ICACHEOP_WAR 0
-#define R4600_V1_HIT_CACHEOP_WAR 0
-#define R4600_V2_HIT_CACHEOP_WAR 0
-#define BCM1250_M3_WAR 0
-#define SIBYTE_1956_WAR 0
-#define MIPS4K_ICACHE_REFILL_WAR 1
-#define MIPS_CACHE_SYNC_WAR 1
-#define TX49XX_ICACHE_INDEX_INV_WAR 0
-#define ICACHE_REFILLS_WORKAROUND_WAR 1
-#define R10000_LLSC_WAR 0
-#define MIPS34K_MISSED_ITLB_WAR 0
-
-#endif /* __ASM_MIPS_MACH_MIPS_WAR_H */
diff --git a/arch/mips/include/asm/mach-paravirt/cpu-feature-overrides.h b/arch/mips/include/asm/mach-paravirt/cpu-feature-overrides.h
deleted file mode 100644
index 23ecf816daa7..000000000000
--- a/arch/mips/include/asm/mach-paravirt/cpu-feature-overrides.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2013 Cavium, Inc.
- */
-#ifndef __ASM_MACH_PARAVIRT_CPU_FEATURE_OVERRIDES_H
-#define __ASM_MACH_PARAVIRT_CPU_FEATURE_OVERRIDES_H
-
-#define cpu_has_4kex 1
-#define cpu_has_3k_cache 0
-#define cpu_has_tx39_cache 0
-#define cpu_has_counter 1
-#define cpu_has_llsc 1
-/*
- * We Disable LL/SC on non SMP systems as it is faster to disable
- * interrupts for atomic access than a LL/SC.
- */
-#ifdef CONFIG_SMP
-# define kernel_uses_llsc 1
-#else
-# define kernel_uses_llsc 0
-#endif
-
-#ifdef CONFIG_CPU_CAVIUM_OCTEON
-#define cpu_dcache_line_size() 128
-#define cpu_icache_line_size() 128
-#define cpu_has_octeon_cache 1
-#define cpu_has_4k_cache 0
-#else
-#define cpu_has_4k_cache 1
-#endif
-
-#endif /* __ASM_MACH_PARAVIRT_CPU_FEATURE_OVERRIDES_H */
diff --git a/arch/mips/include/asm/mach-paravirt/irq.h b/arch/mips/include/asm/mach-paravirt/irq.h
deleted file mode 100644
index 9b4d35eca977..000000000000
--- a/arch/mips/include/asm/mach-paravirt/irq.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2013 Cavium, Inc.
- */
-#ifndef __ASM_MACH_PARAVIRT_IRQ_H__
-#define __ASM_MACH_PARAVIRT_IRQ_H__
-
-#define NR_IRQS 64
-#define MIPS_CPU_IRQ_BASE 1
-
-#define MIPS_IRQ_PCIA (MIPS_CPU_IRQ_BASE + 8)
-
-#define MIPS_IRQ_MBOX0 (MIPS_CPU_IRQ_BASE + 32)
-#define MIPS_IRQ_MBOX1 (MIPS_CPU_IRQ_BASE + 33)
-
-#endif /* __ASM_MACH_PARAVIRT_IRQ_H__ */
diff --git a/arch/mips/include/asm/mach-paravirt/kernel-entry-init.h b/arch/mips/include/asm/mach-paravirt/kernel-entry-init.h
deleted file mode 100644
index c9f5769dfc8f..000000000000
--- a/arch/mips/include/asm/mach-paravirt/kernel-entry-init.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2013 Cavium, Inc
- */
-#ifndef __ASM_MACH_PARAVIRT_KERNEL_ENTRY_H
-#define __ASM_MACH_PARAVIRT_KERNEL_ENTRY_H
-
-#define CP0_EBASE $15, 1
-
- .macro kernel_entry_setup
-#ifdef CONFIG_SMP
- mfc0 t0, CP0_EBASE
- andi t0, t0, 0x3ff # CPUNum
- beqz t0, 1f
- # CPUs other than zero goto smp_bootstrap
- j smp_bootstrap
-#endif /* CONFIG_SMP */
-
-1:
- .endm
-
-/*
- * Do SMP slave processor setup necessary before we can safely execute
- * C code.
- */
- .macro smp_slave_setup
- mfc0 t0, CP0_EBASE
- andi t0, t0, 0x3ff # CPUNum
- slti t1, t0, NR_CPUS
- bnez t1, 1f
-2:
- di
- wait
- b 2b # Unknown CPU, loop forever.
-1:
- PTR_LA t1, paravirt_smp_sp
- PTR_SLL t0, PTR_SCALESHIFT
- PTR_ADDU t1, t1, t0
-3:
- PTR_L sp, 0(t1)
- beqz sp, 3b # Spin until told to proceed.
-
- PTR_LA t1, paravirt_smp_gp
- PTR_ADDU t1, t1, t0
- sync
- PTR_L gp, 0(t1)
- .endm
-
-#endif /* __ASM_MACH_PARAVIRT_KERNEL_ENTRY_H */
diff --git a/arch/mips/include/asm/mach-pnx833x/gpio.h b/arch/mips/include/asm/mach-pnx833x/gpio.h
deleted file mode 100644
index 85b5b8e26118..000000000000
--- a/arch/mips/include/asm/mach-pnx833x/gpio.h
+++ /dev/null
@@ -1,159 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * gpio.h: GPIO Support for PNX833X.
- *
- * Copyright 2008 NXP Semiconductors
- * Chris Steel <chris.steel@nxp.com>
- * Daniel Laird <daniel.j.laird@nxp.com>
- */
-#ifndef __ASM_MIPS_MACH_PNX833X_GPIO_H
-#define __ASM_MIPS_MACH_PNX833X_GPIO_H
-
-/* BIG FAT WARNING: races danger!
- No protections exist here. Current users are only early init code,
- when locking is not needed because no concurrency yet exists there,
- and GPIO IRQ dispatcher, which does locking.
- However, if many uses will ever happen, proper locking will be needed
- - including locking between different uses
-*/
-
-#include <asm/mach-pnx833x/pnx833x.h>
-
-#define SET_REG_BIT(reg, bit) do { (reg |= (1 << (bit))); } while (0)
-#define CLEAR_REG_BIT(reg, bit) do { (reg &= ~(1 << (bit))); } while (0)
-
-/* Initialize GPIO to a known state */
-static inline void pnx833x_gpio_init(void)
-{
- PNX833X_PIO_DIR = 0;
- PNX833X_PIO_DIR2 = 0;
- PNX833X_PIO_SEL = 0;
- PNX833X_PIO_SEL2 = 0;
- PNX833X_PIO_INT_EDGE = 0;
- PNX833X_PIO_INT_HI = 0;
- PNX833X_PIO_INT_LO = 0;
-
- /* clear any GPIO interrupt requests */
- PNX833X_PIO_INT_CLEAR = 0xffff;
- PNX833X_PIO_INT_CLEAR = 0;
- PNX833X_PIO_INT_ENABLE = 0;
-}
-
-/* Select GPIO direction for a pin */
-static inline void pnx833x_gpio_select_input(unsigned int pin)
-{
- if (pin < 32)
- CLEAR_REG_BIT(PNX833X_PIO_DIR, pin);
- else
- CLEAR_REG_BIT(PNX833X_PIO_DIR2, pin & 31);
-}
-static inline void pnx833x_gpio_select_output(unsigned int pin)
-{
- if (pin < 32)
- SET_REG_BIT(PNX833X_PIO_DIR, pin);
- else
- SET_REG_BIT(PNX833X_PIO_DIR2, pin & 31);
-}
-
-/* Select GPIO or alternate function for a pin */
-static inline void pnx833x_gpio_select_function_io(unsigned int pin)
-{
- if (pin < 32)
- CLEAR_REG_BIT(PNX833X_PIO_SEL, pin);
- else
- CLEAR_REG_BIT(PNX833X_PIO_SEL2, pin & 31);
-}
-static inline void pnx833x_gpio_select_function_alt(unsigned int pin)
-{
- if (pin < 32)
- SET_REG_BIT(PNX833X_PIO_SEL, pin);
- else
- SET_REG_BIT(PNX833X_PIO_SEL2, pin & 31);
-}
-
-/* Read GPIO pin */
-static inline int pnx833x_gpio_read(unsigned int pin)
-{
- if (pin < 32)
- return (PNX833X_PIO_IN >> pin) & 1;
- else
- return (PNX833X_PIO_IN2 >> (pin & 31)) & 1;
-}
-
-/* Write GPIO pin */
-static inline void pnx833x_gpio_write(unsigned int val, unsigned int pin)
-{
- if (pin < 32) {
- if (val)
- SET_REG_BIT(PNX833X_PIO_OUT, pin);
- else
- CLEAR_REG_BIT(PNX833X_PIO_OUT, pin);
- } else {
- if (val)
- SET_REG_BIT(PNX833X_PIO_OUT2, pin & 31);
- else
- CLEAR_REG_BIT(PNX833X_PIO_OUT2, pin & 31);
- }
-}
-
-/* Configure GPIO interrupt */
-#define GPIO_INT_NONE 0
-#define GPIO_INT_LEVEL_LOW 1
-#define GPIO_INT_LEVEL_HIGH 2
-#define GPIO_INT_EDGE_RISING 3
-#define GPIO_INT_EDGE_FALLING 4
-#define GPIO_INT_EDGE_BOTH 5
-static inline void pnx833x_gpio_setup_irq(int when, unsigned int pin)
-{
- switch (when) {
- case GPIO_INT_LEVEL_LOW:
- CLEAR_REG_BIT(PNX833X_PIO_INT_EDGE, pin);
- CLEAR_REG_BIT(PNX833X_PIO_INT_HI, pin);
- SET_REG_BIT(PNX833X_PIO_INT_LO, pin);
- break;
- case GPIO_INT_LEVEL_HIGH:
- CLEAR_REG_BIT(PNX833X_PIO_INT_EDGE, pin);
- SET_REG_BIT(PNX833X_PIO_INT_HI, pin);
- CLEAR_REG_BIT(PNX833X_PIO_INT_LO, pin);
- break;
- case GPIO_INT_EDGE_RISING:
- SET_REG_BIT(PNX833X_PIO_INT_EDGE, pin);
- SET_REG_BIT(PNX833X_PIO_INT_HI, pin);
- CLEAR_REG_BIT(PNX833X_PIO_INT_LO, pin);
- break;
- case GPIO_INT_EDGE_FALLING:
- SET_REG_BIT(PNX833X_PIO_INT_EDGE, pin);
- CLEAR_REG_BIT(PNX833X_PIO_INT_HI, pin);
- SET_REG_BIT(PNX833X_PIO_INT_LO, pin);
- break;
- case GPIO_INT_EDGE_BOTH:
- SET_REG_BIT(PNX833X_PIO_INT_EDGE, pin);
- SET_REG_BIT(PNX833X_PIO_INT_HI, pin);
- SET_REG_BIT(PNX833X_PIO_INT_LO, pin);
- break;
- default:
- CLEAR_REG_BIT(PNX833X_PIO_INT_EDGE, pin);
- CLEAR_REG_BIT(PNX833X_PIO_INT_HI, pin);
- CLEAR_REG_BIT(PNX833X_PIO_INT_LO, pin);
- break;
- }
-}
-
-/* Enable/disable GPIO interrupt */
-static inline void pnx833x_gpio_enable_irq(unsigned int pin)
-{
- SET_REG_BIT(PNX833X_PIO_INT_ENABLE, pin);
-}
-static inline void pnx833x_gpio_disable_irq(unsigned int pin)
-{
- CLEAR_REG_BIT(PNX833X_PIO_INT_ENABLE, pin);
-}
-
-/* Clear GPIO interrupt request */
-static inline void pnx833x_gpio_clear_irq(unsigned int pin)
-{
- SET_REG_BIT(PNX833X_PIO_INT_CLEAR, pin);
- CLEAR_REG_BIT(PNX833X_PIO_INT_CLEAR, pin);
-}
-
-#endif
diff --git a/arch/mips/include/asm/mach-pnx833x/irq-mapping.h b/arch/mips/include/asm/mach-pnx833x/irq-mapping.h
deleted file mode 100644
index 32d8063c1bbc..000000000000
--- a/arch/mips/include/asm/mach-pnx833x/irq-mapping.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-
-/*
- * irq.h: IRQ mappings for PNX833X.
- *
- * Copyright 2008 NXP Semiconductors
- * Chris Steel <chris.steel@nxp.com>
- * Daniel Laird <daniel.j.laird@nxp.com>
- */
-
-#ifndef __ASM_MIPS_MACH_PNX833X_IRQ_MAPPING_H
-#define __ASM_MIPS_MACH_PNX833X_IRQ_MAPPING_H
-/*
- * The "IRQ numbers" are completely virtual.
- *
- * In PNX8330/1, we have 48 interrupt lines, numbered from 1 to 48.
- * Let's use numbers 1..48 for PIC interrupts, number 0 for timer interrupt,
- * numbers 49..64 for (virtual) GPIO interrupts.
- *
- * In PNX8335, we have 57 interrupt lines, numbered from 1 to 57,
- * connected to PIC, which uses core hardware interrupt 2, and also
- * a timer interrupt through hardware interrupt 5.
- * Let's use numbers 1..64 for PIC interrupts, number 0 for timer interrupt,
- * numbers 65..80 for (virtual) GPIO interrupts.
- *
- */
-#include <irq.h>
-
-#define PNX833X_TIMER_IRQ (MIPS_CPU_IRQ_BASE + 7)
-
-/* Interrupts supported by PIC */
-#define PNX833X_PIC_I2C0_INT (PNX833X_PIC_IRQ_BASE + 1)
-#define PNX833X_PIC_I2C1_INT (PNX833X_PIC_IRQ_BASE + 2)
-#define PNX833X_PIC_UART0_INT (PNX833X_PIC_IRQ_BASE + 3)
-#define PNX833X_PIC_UART1_INT (PNX833X_PIC_IRQ_BASE + 4)
-#define PNX833X_PIC_TS_IN0_DV_INT (PNX833X_PIC_IRQ_BASE + 5)
-#define PNX833X_PIC_TS_IN0_DMA_INT (PNX833X_PIC_IRQ_BASE + 6)
-#define PNX833X_PIC_GPIO_INT (PNX833X_PIC_IRQ_BASE + 7)
-#define PNX833X_PIC_AUDIO_DEC_INT (PNX833X_PIC_IRQ_BASE + 8)
-#define PNX833X_PIC_VIDEO_DEC_INT (PNX833X_PIC_IRQ_BASE + 9)
-#define PNX833X_PIC_CONFIG_INT (PNX833X_PIC_IRQ_BASE + 10)
-#define PNX833X_PIC_AOI_INT (PNX833X_PIC_IRQ_BASE + 11)
-#define PNX833X_PIC_SYNC_INT (PNX833X_PIC_IRQ_BASE + 12)
-#define PNX8330_PIC_SPU_INT (PNX833X_PIC_IRQ_BASE + 13)
-#define PNX8335_PIC_SATA_INT (PNX833X_PIC_IRQ_BASE + 13)
-#define PNX833X_PIC_OSD_INT (PNX833X_PIC_IRQ_BASE + 14)
-#define PNX833X_PIC_DISP1_INT (PNX833X_PIC_IRQ_BASE + 15)
-#define PNX833X_PIC_DEINTERLACER_INT (PNX833X_PIC_IRQ_BASE + 16)
-#define PNX833X_PIC_DISPLAY2_INT (PNX833X_PIC_IRQ_BASE + 17)
-#define PNX833X_PIC_VC_INT (PNX833X_PIC_IRQ_BASE + 18)
-#define PNX833X_PIC_SC_INT (PNX833X_PIC_IRQ_BASE + 19)
-#define PNX833X_PIC_IDE_INT (PNX833X_PIC_IRQ_BASE + 20)
-#define PNX833X_PIC_IDE_DMA_INT (PNX833X_PIC_IRQ_BASE + 21)
-#define PNX833X_PIC_TS_IN1_DV_INT (PNX833X_PIC_IRQ_BASE + 22)
-#define PNX833X_PIC_TS_IN1_DMA_INT (PNX833X_PIC_IRQ_BASE + 23)
-#define PNX833X_PIC_SGDX_DMA_INT (PNX833X_PIC_IRQ_BASE + 24)
-#define PNX833X_PIC_TS_OUT_INT (PNX833X_PIC_IRQ_BASE + 25)
-#define PNX833X_PIC_IR_INT (PNX833X_PIC_IRQ_BASE + 26)
-#define PNX833X_PIC_VMSP1_INT (PNX833X_PIC_IRQ_BASE + 27)
-#define PNX833X_PIC_VMSP2_INT (PNX833X_PIC_IRQ_BASE + 28)
-#define PNX833X_PIC_PIBC_INT (PNX833X_PIC_IRQ_BASE + 29)
-#define PNX833X_PIC_TS_IN0_TRD_INT (PNX833X_PIC_IRQ_BASE + 30)
-#define PNX833X_PIC_SGDX_TPD_INT (PNX833X_PIC_IRQ_BASE + 31)
-#define PNX833X_PIC_USB_INT (PNX833X_PIC_IRQ_BASE + 32)
-#define PNX833X_PIC_TS_IN1_TRD_INT (PNX833X_PIC_IRQ_BASE + 33)
-#define PNX833X_PIC_CLOCK_INT (PNX833X_PIC_IRQ_BASE + 34)
-#define PNX833X_PIC_SGDX_PARSER_INT (PNX833X_PIC_IRQ_BASE + 35)
-#define PNX833X_PIC_VMSP_DMA_INT (PNX833X_PIC_IRQ_BASE + 36)
-
-#if defined(CONFIG_SOC_PNX8335)
-#define PNX8335_PIC_MIU_INT (PNX833X_PIC_IRQ_BASE + 37)
-#define PNX8335_PIC_AVCHIP_IRQ_INT (PNX833X_PIC_IRQ_BASE + 38)
-#define PNX8335_PIC_SYNC_HD_INT (PNX833X_PIC_IRQ_BASE + 39)
-#define PNX8335_PIC_DISP_HD_INT (PNX833X_PIC_IRQ_BASE + 40)
-#define PNX8335_PIC_DISP_SCALER_INT (PNX833X_PIC_IRQ_BASE + 41)
-#define PNX8335_PIC_OSD_HD1_INT (PNX833X_PIC_IRQ_BASE + 42)
-#define PNX8335_PIC_DTL_WRITER_Y_INT (PNX833X_PIC_IRQ_BASE + 43)
-#define PNX8335_PIC_DTL_WRITER_C_INT (PNX833X_PIC_IRQ_BASE + 44)
-#define PNX8335_PIC_DTL_EMULATOR_Y_IR_INT (PNX833X_PIC_IRQ_BASE + 45)
-#define PNX8335_PIC_DTL_EMULATOR_C_IR_INT (PNX833X_PIC_IRQ_BASE + 46)
-#define PNX8335_PIC_DENC_TTX_INT (PNX833X_PIC_IRQ_BASE + 47)
-#define PNX8335_PIC_MMI_SIF0_INT (PNX833X_PIC_IRQ_BASE + 48)
-#define PNX8335_PIC_MMI_SIF1_INT (PNX833X_PIC_IRQ_BASE + 49)
-#define PNX8335_PIC_MMI_CDMMU_INT (PNX833X_PIC_IRQ_BASE + 50)
-#define PNX8335_PIC_PIBCS_INT (PNX833X_PIC_IRQ_BASE + 51)
-#define PNX8335_PIC_ETHERNET_INT (PNX833X_PIC_IRQ_BASE + 52)
-#define PNX8335_PIC_VMSP1_0_INT (PNX833X_PIC_IRQ_BASE + 53)
-#define PNX8335_PIC_VMSP1_1_INT (PNX833X_PIC_IRQ_BASE + 54)
-#define PNX8335_PIC_VMSP1_DMA_INT (PNX833X_PIC_IRQ_BASE + 55)
-#define PNX8335_PIC_TDGR_DE_INT (PNX833X_PIC_IRQ_BASE + 56)
-#define PNX8335_PIC_IR1_IRQ_INT (PNX833X_PIC_IRQ_BASE + 57)
-#endif
-
-/* GPIO interrupts */
-#define PNX833X_GPIO_0_INT (PNX833X_GPIO_IRQ_BASE + 0)
-#define PNX833X_GPIO_1_INT (PNX833X_GPIO_IRQ_BASE + 1)
-#define PNX833X_GPIO_2_INT (PNX833X_GPIO_IRQ_BASE + 2)
-#define PNX833X_GPIO_3_INT (PNX833X_GPIO_IRQ_BASE + 3)
-#define PNX833X_GPIO_4_INT (PNX833X_GPIO_IRQ_BASE + 4)
-#define PNX833X_GPIO_5_INT (PNX833X_GPIO_IRQ_BASE + 5)
-#define PNX833X_GPIO_6_INT (PNX833X_GPIO_IRQ_BASE + 6)
-#define PNX833X_GPIO_7_INT (PNX833X_GPIO_IRQ_BASE + 7)
-#define PNX833X_GPIO_8_INT (PNX833X_GPIO_IRQ_BASE + 8)
-#define PNX833X_GPIO_9_INT (PNX833X_GPIO_IRQ_BASE + 9)
-#define PNX833X_GPIO_10_INT (PNX833X_GPIO_IRQ_BASE + 10)
-#define PNX833X_GPIO_11_INT (PNX833X_GPIO_IRQ_BASE + 11)
-#define PNX833X_GPIO_12_INT (PNX833X_GPIO_IRQ_BASE + 12)
-#define PNX833X_GPIO_13_INT (PNX833X_GPIO_IRQ_BASE + 13)
-#define PNX833X_GPIO_14_INT (PNX833X_GPIO_IRQ_BASE + 14)
-#define PNX833X_GPIO_15_INT (PNX833X_GPIO_IRQ_BASE + 15)
-
-#endif
diff --git a/arch/mips/include/asm/mach-pnx833x/irq.h b/arch/mips/include/asm/mach-pnx833x/irq.h
deleted file mode 100644
index b7a6dab5b9f7..000000000000
--- a/arch/mips/include/asm/mach-pnx833x/irq.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * irq.h: IRQ mappings for PNX833X.
- *
- * Copyright 2008 NXP Semiconductors
- * Chris Steel <chris.steel@nxp.com>
- * Daniel Laird <daniel.j.laird@nxp.com>
- */
-
-#ifndef __ASM_MIPS_MACH_PNX833X_IRQ_H
-#define __ASM_MIPS_MACH_PNX833X_IRQ_H
-/*
- * The "IRQ numbers" are completely virtual.
- *
- * In PNX8330/1, we have 48 interrupt lines, numbered from 1 to 48.
- * Let's use numbers 1..48 for PIC interrupts, number 0 for timer interrupt,
- * numbers 49..64 for (virtual) GPIO interrupts.
- *
- * In PNX8335, we have 57 interrupt lines, numbered from 1 to 57,
- * connected to PIC, which uses core hardware interrupt 2, and also
- * a timer interrupt through hardware interrupt 5.
- * Let's use numbers 1..64 for PIC interrupts, number 0 for timer interrupt,
- * numbers 65..80 for (virtual) GPIO interrupts.
- *
- */
-#if defined(CONFIG_SOC_PNX8335)
- #define PNX833X_PIC_NUM_IRQ 58
-#else
- #define PNX833X_PIC_NUM_IRQ 37
-#endif
-
-#define MIPS_CPU_NUM_IRQ 8
-#define PNX833X_GPIO_NUM_IRQ 16
-
-#define MIPS_CPU_IRQ_BASE 0
-#define PNX833X_PIC_IRQ_BASE (MIPS_CPU_IRQ_BASE + MIPS_CPU_NUM_IRQ)
-#define PNX833X_GPIO_IRQ_BASE (PNX833X_PIC_IRQ_BASE + PNX833X_PIC_NUM_IRQ)
-#define NR_IRQS (MIPS_CPU_NUM_IRQ + PNX833X_PIC_NUM_IRQ + PNX833X_GPIO_NUM_IRQ)
-
-#endif
diff --git a/arch/mips/include/asm/mach-pnx833x/pnx833x.h b/arch/mips/include/asm/mach-pnx833x/pnx833x.h
deleted file mode 100644
index 00bb67a36386..000000000000
--- a/arch/mips/include/asm/mach-pnx833x/pnx833x.h
+++ /dev/null
@@ -1,189 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * pnx833x.h: Register mappings for PNX833X.
- *
- * Copyright 2008 NXP Semiconductors
- * Chris Steel <chris.steel@nxp.com>
- * Daniel Laird <daniel.j.laird@nxp.com>
- */
-#ifndef __ASM_MIPS_MACH_PNX833X_PNX833X_H
-#define __ASM_MIPS_MACH_PNX833X_PNX833X_H
-
-/* All regs are accessed in KSEG1 */
-#define PNX833X_BASE (0xa0000000ul + 0x17E00000ul)
-
-#define PNX833X_REG(offs) (*((volatile unsigned long *)(PNX833X_BASE + offs)))
-
-/* Registers are named exactly as in PNX833X docs, just with PNX833X_ prefix */
-
-/* Read access to multibit fields */
-#define PNX833X_BIT(val, reg, field) ((val) & PNX833X_##reg##_##field)
-#define PNX833X_REGBIT(reg, field) PNX833X_BIT(PNX833X_##reg, reg, field)
-
-/* Use PNX833X_FIELD to extract a field from val */
-#define PNX_FIELD(cpu, val, reg, field) \
- (((val) & PNX##cpu##_##reg##_##field##_MASK) >> \
- PNX##cpu##_##reg##_##field##_SHIFT)
-#define PNX833X_FIELD(val, reg, field) PNX_FIELD(833X, val, reg, field)
-#define PNX8330_FIELD(val, reg, field) PNX_FIELD(8330, val, reg, field)
-#define PNX8335_FIELD(val, reg, field) PNX_FIELD(8335, val, reg, field)
-
-/* Use PNX833X_REGFIELD to extract a field from a register */
-#define PNX833X_REGFIELD(reg, field) PNX833X_FIELD(PNX833X_##reg, reg, field)
-#define PNX8330_REGFIELD(reg, field) PNX8330_FIELD(PNX8330_##reg, reg, field)
-#define PNX8335_REGFIELD(reg, field) PNX8335_FIELD(PNX8335_##reg, reg, field)
-
-
-#define PNX_WRITEFIELD(cpu, val, reg, field) \
- (PNX##cpu##_##reg = (PNX##cpu##_##reg & ~(PNX##cpu##_##reg##_##field##_MASK)) | \
- ((val) << PNX##cpu##_##reg##_##field##_SHIFT))
-#define PNX833X_WRITEFIELD(val, reg, field) \
- PNX_WRITEFIELD(833X, val, reg, field)
-#define PNX8330_WRITEFIELD(val, reg, field) \
- PNX_WRITEFIELD(8330, val, reg, field)
-#define PNX8335_WRITEFIELD(val, reg, field) \
- PNX_WRITEFIELD(8335, val, reg, field)
-
-
-/* Macros to detect CPU type */
-
-#define PNX833X_CONFIG_MODULE_ID PNX833X_REG(0x7FFC)
-#define PNX833X_CONFIG_MODULE_ID_MAJREV_MASK 0x0000f000
-#define PNX833X_CONFIG_MODULE_ID_MAJREV_SHIFT 12
-#define PNX8330_CONFIG_MODULE_MAJREV 4
-#define PNX8335_CONFIG_MODULE_MAJREV 5
-#define CPU_IS_PNX8330 (PNX833X_REGFIELD(CONFIG_MODULE_ID, MAJREV) == \
- PNX8330_CONFIG_MODULE_MAJREV)
-#define CPU_IS_PNX8335 (PNX833X_REGFIELD(CONFIG_MODULE_ID, MAJREV) == \
- PNX8335_CONFIG_MODULE_MAJREV)
-
-
-
-#define PNX833X_RESET_CONTROL PNX833X_REG(0x8004)
-#define PNX833X_RESET_CONTROL_2 PNX833X_REG(0x8014)
-
-#define PNX833X_PIC_REG(offs) PNX833X_REG(0x01000 + (offs))
-#define PNX833X_PIC_INT_PRIORITY PNX833X_PIC_REG(0x0)
-#define PNX833X_PIC_INT_SRC PNX833X_PIC_REG(0x4)
-#define PNX833X_PIC_INT_SRC_INT_SRC_MASK 0x00000FF8ul /* bits 11:3 */
-#define PNX833X_PIC_INT_SRC_INT_SRC_SHIFT 3
-#define PNX833X_PIC_INT_REG(irq) PNX833X_PIC_REG(0x10 + 4*(irq))
-
-#define PNX833X_CLOCK_CPUCP_CTL PNX833X_REG(0x9228)
-#define PNX833X_CLOCK_CPUCP_CTL_EXIT_RESET 0x00000002ul /* bit 1 */
-#define PNX833X_CLOCK_CPUCP_CTL_DIV_CLOCK_MASK 0x00000018ul /* bits 4:3 */
-#define PNX833X_CLOCK_CPUCP_CTL_DIV_CLOCK_SHIFT 3
-
-#define PNX8335_CLOCK_PLL_CPU_CTL PNX833X_REG(0x9020)
-#define PNX8335_CLOCK_PLL_CPU_CTL_FREQ_MASK 0x1f
-#define PNX8335_CLOCK_PLL_CPU_CTL_FREQ_SHIFT 0
-
-#define PNX833X_CONFIG_MUX PNX833X_REG(0x7004)
-#define PNX833X_CONFIG_MUX_IDE_MUX 0x00000080 /* bit 7 */
-
-#define PNX8330_CONFIG_POLYFUSE_7 PNX833X_REG(0x7040)
-#define PNX8330_CONFIG_POLYFUSE_7_BOOT_MODE_MASK 0x00180000
-#define PNX8330_CONFIG_POLYFUSE_7_BOOT_MODE_SHIFT 19
-
-#define PNX833X_PIO_IN PNX833X_REG(0xF000)
-#define PNX833X_PIO_OUT PNX833X_REG(0xF004)
-#define PNX833X_PIO_DIR PNX833X_REG(0xF008)
-#define PNX833X_PIO_SEL PNX833X_REG(0xF014)
-#define PNX833X_PIO_INT_EDGE PNX833X_REG(0xF020)
-#define PNX833X_PIO_INT_HI PNX833X_REG(0xF024)
-#define PNX833X_PIO_INT_LO PNX833X_REG(0xF028)
-#define PNX833X_PIO_INT_STATUS PNX833X_REG(0xFFE0)
-#define PNX833X_PIO_INT_ENABLE PNX833X_REG(0xFFE4)
-#define PNX833X_PIO_INT_CLEAR PNX833X_REG(0xFFE8)
-#define PNX833X_PIO_IN2 PNX833X_REG(0xF05C)
-#define PNX833X_PIO_OUT2 PNX833X_REG(0xF060)
-#define PNX833X_PIO_DIR2 PNX833X_REG(0xF064)
-#define PNX833X_PIO_SEL2 PNX833X_REG(0xF068)
-
-#define PNX833X_UART0_PORTS_START (PNX833X_BASE + 0xB000)
-#define PNX833X_UART0_PORTS_END (PNX833X_BASE + 0xBFFF)
-#define PNX833X_UART1_PORTS_START (PNX833X_BASE + 0xC000)
-#define PNX833X_UART1_PORTS_END (PNX833X_BASE + 0xCFFF)
-
-#define PNX833X_USB_PORTS_START (PNX833X_BASE + 0x19000)
-#define PNX833X_USB_PORTS_END (PNX833X_BASE + 0x19FFF)
-
-#define PNX833X_CONFIG_USB PNX833X_REG(0x7008)
-
-#define PNX833X_I2C0_PORTS_START (PNX833X_BASE + 0xD000)
-#define PNX833X_I2C0_PORTS_END (PNX833X_BASE + 0xDFFF)
-#define PNX833X_I2C1_PORTS_START (PNX833X_BASE + 0xE000)
-#define PNX833X_I2C1_PORTS_END (PNX833X_BASE + 0xEFFF)
-
-#define PNX833X_IDE_PORTS_START (PNX833X_BASE + 0x1A000)
-#define PNX833X_IDE_PORTS_END (PNX833X_BASE + 0x1AFFF)
-#define PNX833X_IDE_MODULE_ID PNX833X_REG(0x1AFFC)
-
-#define PNX833X_IDE_MODULE_ID_MODULE_ID_MASK 0xFFFF0000
-#define PNX833X_IDE_MODULE_ID_MODULE_ID_SHIFT 16
-#define PNX833X_IDE_MODULE_ID_VALUE 0xA009
-
-
-#define PNX833X_MIU_SEL0 PNX833X_REG(0x2004)
-#define PNX833X_MIU_SEL0_TIMING PNX833X_REG(0x2008)
-#define PNX833X_MIU_SEL1 PNX833X_REG(0x200C)
-#define PNX833X_MIU_SEL1_TIMING PNX833X_REG(0x2010)
-#define PNX833X_MIU_SEL2 PNX833X_REG(0x2014)
-#define PNX833X_MIU_SEL2_TIMING PNX833X_REG(0x2018)
-#define PNX833X_MIU_SEL3 PNX833X_REG(0x201C)
-#define PNX833X_MIU_SEL3_TIMING PNX833X_REG(0x2020)
-
-#define PNX833X_MIU_SEL0_SPI_MODE_ENABLE_MASK (1 << 14)
-#define PNX833X_MIU_SEL0_SPI_MODE_ENABLE_SHIFT 14
-
-#define PNX833X_MIU_SEL0_BURST_MODE_ENABLE_MASK (1 << 7)
-#define PNX833X_MIU_SEL0_BURST_MODE_ENABLE_SHIFT 7
-
-#define PNX833X_MIU_SEL0_BURST_PAGE_LEN_MASK (0xF << 9)
-#define PNX833X_MIU_SEL0_BURST_PAGE_LEN_SHIFT 9
-
-#define PNX833X_MIU_CONFIG_SPI PNX833X_REG(0x2000)
-
-#define PNX833X_MIU_CONFIG_SPI_OPCODE_MASK (0xFF << 3)
-#define PNX833X_MIU_CONFIG_SPI_OPCODE_SHIFT 3
-
-#define PNX833X_MIU_CONFIG_SPI_DATA_ENABLE_MASK (1 << 2)
-#define PNX833X_MIU_CONFIG_SPI_DATA_ENABLE_SHIFT 2
-
-#define PNX833X_MIU_CONFIG_SPI_ADDR_ENABLE_MASK (1 << 1)
-#define PNX833X_MIU_CONFIG_SPI_ADDR_ENABLE_SHIFT 1
-
-#define PNX833X_MIU_CONFIG_SPI_SYNC_MASK (1 << 0)
-#define PNX833X_MIU_CONFIG_SPI_SYNC_SHIFT 0
-
-#define PNX833X_WRITE_CONFIG_SPI(opcode, data_enable, addr_enable, sync) \
- (PNX833X_MIU_CONFIG_SPI = \
- ((opcode) << PNX833X_MIU_CONFIG_SPI_OPCODE_SHIFT) | \
- ((data_enable) << PNX833X_MIU_CONFIG_SPI_DATA_ENABLE_SHIFT) | \
- ((addr_enable) << PNX833X_MIU_CONFIG_SPI_ADDR_ENABLE_SHIFT) | \
- ((sync) << PNX833X_MIU_CONFIG_SPI_SYNC_SHIFT))
-
-#define PNX8335_IP3902_PORTS_START (PNX833X_BASE + 0x2F000)
-#define PNX8335_IP3902_PORTS_END (PNX833X_BASE + 0x2FFFF)
-#define PNX8335_IP3902_MODULE_ID PNX833X_REG(0x2FFFC)
-
-#define PNX8335_IP3902_MODULE_ID_MODULE_ID_MASK 0xFFFF0000
-#define PNX8335_IP3902_MODULE_ID_MODULE_ID_SHIFT 16
-#define PNX8335_IP3902_MODULE_ID_VALUE 0x3902
-
- /* I/O location(gets remapped)*/
-#define PNX8335_NAND_BASE 0x18000000
-/* I/O location with CLE high */
-#define PNX8335_NAND_CLE_MASK 0x00100000
-/* I/O location with ALE high */
-#define PNX8335_NAND_ALE_MASK 0x00010000
-
-#define PNX8335_SATA_PORTS_START (PNX833X_BASE + 0x2E000)
-#define PNX8335_SATA_PORTS_END (PNX833X_BASE + 0x2EFFF)
-#define PNX8335_SATA_MODULE_ID PNX833X_REG(0x2EFFC)
-
-#define PNX8335_SATA_MODULE_ID_MODULE_ID_MASK 0xFFFF0000
-#define PNX8335_SATA_MODULE_ID_MODULE_ID_SHIFT 16
-#define PNX8335_SATA_MODULE_ID_VALUE 0xA099
-
-#endif
diff --git a/arch/mips/include/asm/mach-rc32434/war.h b/arch/mips/include/asm/mach-rc32434/war.h
deleted file mode 100644
index af430d26f713..000000000000
--- a/arch/mips/include/asm/mach-rc32434/war.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
- */
-#ifndef __ASM_MIPS_MACH_MIPS_WAR_H
-#define __ASM_MIPS_MACH_MIPS_WAR_H
-
-#define R4600_V1_INDEX_ICACHEOP_WAR 0
-#define R4600_V1_HIT_CACHEOP_WAR 0
-#define R4600_V2_HIT_CACHEOP_WAR 0
-#define BCM1250_M3_WAR 0
-#define SIBYTE_1956_WAR 0
-#define MIPS4K_ICACHE_REFILL_WAR 1
-#define MIPS_CACHE_SYNC_WAR 0
-#define TX49XX_ICACHE_INDEX_INV_WAR 0
-#define ICACHE_REFILLS_WORKAROUND_WAR 0
-#define R10000_LLSC_WAR 0
-#define MIPS34K_MISSED_ITLB_WAR 0
-
-#endif /* __ASM_MIPS_MACH_MIPS_WAR_H */
diff --git a/arch/mips/include/asm/mach-rm/war.h b/arch/mips/include/asm/mach-rm/war.h
deleted file mode 100644
index eca16d167c2f..000000000000
--- a/arch/mips/include/asm/mach-rm/war.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
- */
-#ifndef __ASM_MIPS_MACH_RM_WAR_H
-#define __ASM_MIPS_MACH_RM_WAR_H
-
-/*
- * The RM200C seems to have been shipped only with V2.0 R4600s
- */
-
-#define R4600_V1_INDEX_ICACHEOP_WAR 0
-#define R4600_V1_HIT_CACHEOP_WAR 0
-#define R4600_V2_HIT_CACHEOP_WAR 1
-#define BCM1250_M3_WAR 0
-#define SIBYTE_1956_WAR 0
-#define MIPS4K_ICACHE_REFILL_WAR 0
-#define MIPS_CACHE_SYNC_WAR 0
-#define TX49XX_ICACHE_INDEX_INV_WAR 0
-#define ICACHE_REFILLS_WORKAROUND_WAR 0
-#define R10000_LLSC_WAR 0
-#define MIPS34K_MISSED_ITLB_WAR 0
-
-#endif /* __ASM_MIPS_MACH_RM_WAR_H */
diff --git a/arch/mips/include/asm/mach-sibyte/war.h b/arch/mips/include/asm/mach-sibyte/war.h
deleted file mode 100644
index 4755b6116807..000000000000
--- a/arch/mips/include/asm/mach-sibyte/war.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
- */
-#ifndef __ASM_MIPS_MACH_SIBYTE_WAR_H
-#define __ASM_MIPS_MACH_SIBYTE_WAR_H
-
-#define R4600_V1_INDEX_ICACHEOP_WAR 0
-#define R4600_V1_HIT_CACHEOP_WAR 0
-#define R4600_V2_HIT_CACHEOP_WAR 0
-
-#if defined(CONFIG_SB1_PASS_2_WORKAROUNDS)
-
-#ifndef __ASSEMBLY__
-extern int sb1250_m3_workaround_needed(void);
-#endif
-
-#define BCM1250_M3_WAR sb1250_m3_workaround_needed()
-#define SIBYTE_1956_WAR 1
-
-#else
-
-#define BCM1250_M3_WAR 0
-#define SIBYTE_1956_WAR 0
-
-#endif
-
-#define MIPS4K_ICACHE_REFILL_WAR 0
-#define MIPS_CACHE_SYNC_WAR 0
-#define TX49XX_ICACHE_INDEX_INV_WAR 0
-#define ICACHE_REFILLS_WORKAROUND_WAR 0
-#define R10000_LLSC_WAR 0
-#define MIPS34K_MISSED_ITLB_WAR 0
-
-#endif /* __ASM_MIPS_MACH_SIBYTE_WAR_H */
diff --git a/arch/mips/include/asm/mach-tx49xx/war.h b/arch/mips/include/asm/mach-tx49xx/war.h
deleted file mode 100644
index 445abb4eb769..000000000000
--- a/arch/mips/include/asm/mach-tx49xx/war.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
- */
-#ifndef __ASM_MIPS_MACH_TX49XX_WAR_H
-#define __ASM_MIPS_MACH_TX49XX_WAR_H
-
-#define R4600_V1_INDEX_ICACHEOP_WAR 0
-#define R4600_V1_HIT_CACHEOP_WAR 0
-#define R4600_V2_HIT_CACHEOP_WAR 0
-#define BCM1250_M3_WAR 0
-#define SIBYTE_1956_WAR 0
-#define MIPS4K_ICACHE_REFILL_WAR 0
-#define MIPS_CACHE_SYNC_WAR 0
-#define TX49XX_ICACHE_INDEX_INV_WAR 1
-#define ICACHE_REFILLS_WORKAROUND_WAR 0
-#define R10000_LLSC_WAR 0
-#define MIPS34K_MISSED_ITLB_WAR 0
-
-#endif /* __ASM_MIPS_MACH_TX49XX_WAR_H */
diff --git a/arch/mips/include/asm/mips-boards/malta.h b/arch/mips/include/asm/mips-boards/malta.h
index 65de4fb06096..254be3d62519 100644
--- a/arch/mips/include/asm/mips-boards/malta.h
+++ b/arch/mips/include/asm/mips-boards/malta.h
@@ -92,4 +92,6 @@ static inline unsigned long get_msc_port_base(unsigned long reg)
#define MALTA_JMPRS_REG 0x1f000210
+extern void __init *malta_dt_shim(void *fdt);
+
#endif /* __ASM_MIPS_BOARDS_MALTA_H */
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index 4ddc12e4444a..a0e8ae5497b6 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -389,6 +389,13 @@
#define ST0_CU3 0x80000000
#define ST0_XX 0x80000000 /* MIPS IV naming */
+/* in-kernel enabled CUs */
+#ifdef CONFIG_CPU_LOONGSON64
+#define ST0_KERNEL_CUMASK (ST0_CU0 | ST0_CU2)
+#else
+#define ST0_KERNEL_CUMASK ST0_CU0
+#endif
+
/*
* Bitfields and bit numbers in the coprocessor 0 IntCtl register. (MIPSR2)
*/
@@ -1706,12 +1713,6 @@ do { \
#define read_c0_count() __read_32bit_c0_register($9, 0)
#define write_c0_count(val) __write_32bit_c0_register($9, 0, val)
-#define read_c0_count2() __read_32bit_c0_register($9, 6) /* pnx8550 */
-#define write_c0_count2(val) __write_32bit_c0_register($9, 6, val)
-
-#define read_c0_count3() __read_32bit_c0_register($9, 7) /* pnx8550 */
-#define write_c0_count3(val) __write_32bit_c0_register($9, 7, val)
-
#define read_c0_entryhi() __read_ulong_c0_register($10, 0)
#define write_c0_entryhi(val) __write_ulong_c0_register($10, 0, val)
@@ -1730,12 +1731,6 @@ do { \
#define read_c0_guestctl0ext() __read_32bit_c0_register($11, 4)
#define write_c0_guestctl0ext(val) __write_32bit_c0_register($11, 4, val)
-#define read_c0_compare2() __read_32bit_c0_register($11, 6) /* pnx8550 */
-#define write_c0_compare2(val) __write_32bit_c0_register($11, 6, val)
-
-#define read_c0_compare3() __read_32bit_c0_register($11, 7) /* pnx8550 */
-#define write_c0_compare3(val) __write_32bit_c0_register($11, 7, val)
-
#define read_c0_status() __read_32bit_c0_register($12, 0)
#define write_c0_status(val) __write_32bit_c0_register($12, 0, val)
@@ -2728,7 +2723,7 @@ static inline void tlb_probe(void)
static inline void tlb_read(void)
{
-#if MIPS34K_MISSED_ITLB_WAR
+#ifdef CONFIG_WAR_MIPS34K_MISSED_ITLB
int res = 0;
__asm__ __volatile__(
@@ -2750,7 +2745,7 @@ static inline void tlb_read(void)
"tlbr\n\t"
".set reorder");
-#if MIPS34K_MISSED_ITLB_WAR
+#ifdef CONFIG_WAR_MIPS34K_MISSED_ITLB
if ((res & _ULCAST_(1)))
__asm__ __volatile__(
" .set push \n"
diff --git a/arch/mips/include/asm/netlogic/psb-bootinfo.h b/arch/mips/include/asm/netlogic/psb-bootinfo.h
index 6878307f0ee6..c716e9397113 100644
--- a/arch/mips/include/asm/netlogic/psb-bootinfo.h
+++ b/arch/mips/include/asm/netlogic/psb-bootinfo.h
@@ -77,21 +77,6 @@ struct psb_info {
uint64_t avail_mem_map;
};
-enum {
- NETLOGIC_IO_SPACE = 0x10,
- PCIX_IO_SPACE,
- PCIX_CFG_SPACE,
- PCIX_MEMORY_SPACE,
- HT_IO_SPACE,
- HT_CFG_SPACE,
- HT_MEMORY_SPACE,
- SRAM_SPACE,
- FLASH_CONTROLLER_SPACE
-};
-
-#define NLM_MAX_ARGS 64
-#define NLM_MAX_ENVS 32
-
/* This is what netlboot passes and linux boot_mem_map is subtly different */
#define NLM_BOOT_MEM_MAP_MAX 32
struct nlm_boot_mem_map {
@@ -102,6 +87,7 @@ struct nlm_boot_mem_map {
uint32_t type; /* type of memory segment */
} map[NLM_BOOT_MEM_MAP_MAX];
};
+#define NLM_BOOT_MEM_RAM 1
/* Pointer to saved boot loader info */
extern struct psb_info nlm_prom_info;
diff --git a/arch/mips/include/asm/octeon/cvmx-bootinfo.h b/arch/mips/include/asm/octeon/cvmx-bootinfo.h
index 62787765575e..c114a7ba0bad 100644
--- a/arch/mips/include/asm/octeon/cvmx-bootinfo.h
+++ b/arch/mips/include/asm/octeon/cvmx-bootinfo.h
@@ -295,6 +295,8 @@ enum cvmx_board_types_enum {
*/
CVMX_BOARD_TYPE_CUST_PRIVATE_MIN = 20001,
CVMX_BOARD_TYPE_UBNT_E100 = 20002,
+ CVMX_BOARD_TYPE_UBNT_E200 = 20003,
+ CVMX_BOARD_TYPE_UBNT_E220 = 20005,
CVMX_BOARD_TYPE_CUST_DSR1000N = 20006,
CVMX_BOARD_TYPE_KONTRON_S1901 = 21901,
CVMX_BOARD_TYPE_CUST_PRIVATE_MAX = 30000,
@@ -396,6 +398,8 @@ static inline const char *cvmx_board_type_to_string(enum
/* Customer private range */
ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_PRIVATE_MIN)
ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_UBNT_E100)
+ ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_UBNT_E200)
+ ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_UBNT_E220)
ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_DSR1000N)
ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_KONTRON_S1901)
ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_PRIVATE_MAX)
diff --git a/arch/mips/include/asm/pgtable-bits.h b/arch/mips/include/asm/pgtable-bits.h
index e26dc41a8a68..2362842ee2b5 100644
--- a/arch/mips/include/asm/pgtable-bits.h
+++ b/arch/mips/include/asm/pgtable-bits.h
@@ -249,11 +249,6 @@ static inline uint64_t pte_to_entrylo(unsigned long pte_val)
#define _CACHE_CACHABLE_NONCOHERENT (5<<_CACHE_SHIFT)
-#elif defined(CONFIG_MACH_INGENIC)
-
-/* Ingenic uses the WA bit to achieve write-combine memory writes */
-#define _CACHE_UNCACHED_ACCELERATED (1<<_CACHE_SHIFT)
-
#endif
#ifndef _CACHE_CACHABLE_NO_WA
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index dd7a0f552cac..e5ef0fdd4838 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -37,8 +37,6 @@ struct vm_area_struct;
_PAGE_GLOBAL | _page_cachable_default)
#define PAGE_KERNEL_NC __pgprot(_PAGE_PRESENT | __READABLE | __WRITEABLE | \
_PAGE_GLOBAL | _CACHE_CACHABLE_NONCOHERENT)
-#define PAGE_USERIO __pgprot(_PAGE_PRESENT | _PAGE_WRITE | \
- _page_cachable_default)
#define PAGE_KERNEL_UNCACHED __pgprot(_PAGE_PRESENT | __READABLE | \
__WRITEABLE | _PAGE_GLOBAL | _CACHE_UNCACHED)
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index 856e12f6063d..7834e7c0c78a 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -29,6 +29,7 @@
*/
extern unsigned int vced_count, vcei_count;
+extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
#ifdef CONFIG_32BIT
#ifdef CONFIG_KVM_GUEST
diff --git a/arch/mips/include/asm/r4k-timer.h b/arch/mips/include/asm/r4k-timer.h
index afe9e0e03fe9..6e7361629348 100644
--- a/arch/mips/include/asm/r4k-timer.h
+++ b/arch/mips/include/asm/r4k-timer.h
@@ -5,8 +5,8 @@
*
* Copyright (C) 2008 by Ralf Baechle (ralf@linux-mips.org)
*/
-#ifndef __ASM_R4K_TYPES_H
-#define __ASM_R4K_TYPES_H
+#ifndef __ASM_R4K_TIMER_H
+#define __ASM_R4K_TIMER_H
#include <linux/compiler.h>
@@ -27,4 +27,4 @@ static inline void synchronise_count_slave(int cpu)
#endif
-#endif /* __ASM_R4K_TYPES_H */
+#endif /* __ASM_R4K_TIMER_H */
diff --git a/arch/mips/include/asm/sgi/heart.h b/arch/mips/include/asm/sgi/heart.h
index c423221b4792..0d03751955c4 100644
--- a/arch/mips/include/asm/sgi/heart.h
+++ b/arch/mips/include/asm/sgi/heart.h
@@ -264,6 +264,57 @@ struct ip30_heart_regs { /* 0x0ff00000 */
#define HC_NCOR_MEM_ERR BIT(1)
#define HC_COR_MEM_ERR BIT(0)
+/*
+ * HEART has 64 interrupt vectors available to it, subdivided into five
+ * priority levels. They are numbered 0 to 63.
+ */
+#define HEART_NUM_IRQS 64
+
+/*
+ * These are the five interrupt priority levels and their corresponding
+ * CPU IPx interrupt pins.
+ *
+ * Level 4 - Error Interrupts.
+ * Level 3 - HEART timer interrupt.
+ * Level 2 - CPU IPI, CPU debug, power putton, general device interrupts.
+ * Level 1 - General device interrupts.
+ * Level 0 - General device GFX flow control interrupts.
+ */
+#define HEART_L4_INT_MASK 0xfff8000000000000ULL /* IP6 */
+#define HEART_L3_INT_MASK 0x0004000000000000ULL /* IP5 */
+#define HEART_L2_INT_MASK 0x0003ffff00000000ULL /* IP4 */
+#define HEART_L1_INT_MASK 0x00000000ffff0000ULL /* IP3 */
+#define HEART_L0_INT_MASK 0x000000000000ffffULL /* IP2 */
+
+/* HEART L0 Interrupts (Low Priority) */
+#define HEART_L0_INT_GENERIC 0
+#define HEART_L0_INT_FLOW_CTRL_HWTR_0 1
+#define HEART_L0_INT_FLOW_CTRL_HWTR_1 2
+
+/* HEART L2 Interrupts (High Priority) */
+#define HEART_L2_INT_RESCHED_CPU_0 46
+#define HEART_L2_INT_RESCHED_CPU_1 47
+#define HEART_L2_INT_CALL_CPU_0 48
+#define HEART_L2_INT_CALL_CPU_1 49
+
+/* HEART L3 Interrupts (Compare/Counter Timer) */
+#define HEART_L3_INT_TIMER 50
+
+/* HEART L4 Interrupts (Errors) */
+#define HEART_L4_INT_XWID_ERR_9 51
+#define HEART_L4_INT_XWID_ERR_A 52
+#define HEART_L4_INT_XWID_ERR_B 53
+#define HEART_L4_INT_XWID_ERR_C 54
+#define HEART_L4_INT_XWID_ERR_D 55
+#define HEART_L4_INT_XWID_ERR_E 56
+#define HEART_L4_INT_XWID_ERR_F 57
+#define HEART_L4_INT_XWID_ERR_XBOW 58
+#define HEART_L4_INT_CPU_BUS_ERR_0 59
+#define HEART_L4_INT_CPU_BUS_ERR_1 60
+#define HEART_L4_INT_CPU_BUS_ERR_2 61
+#define HEART_L4_INT_CPU_BUS_ERR_3 62
+#define HEART_L4_INT_HEART_EXCP 63
+
extern struct ip30_heart_regs __iomem *heart_regs;
#define heart_read ____raw_readq
diff --git a/arch/mips/include/asm/stackframe.h b/arch/mips/include/asm/stackframe.h
index 3e8d2aaf96af..aa430a6c68b2 100644
--- a/arch/mips/include/asm/stackframe.h
+++ b/arch/mips/include/asm/stackframe.h
@@ -450,7 +450,7 @@
*/
.macro CLI
mfc0 t0, CP0_STATUS
- li t1, ST0_CU0 | STATMASK
+ li t1, ST0_KERNEL_CUMASK | STATMASK
or t0, t1
xori t0, STATMASK
mtc0 t0, CP0_STATUS
@@ -463,7 +463,7 @@
*/
.macro STI
mfc0 t0, CP0_STATUS
- li t1, ST0_CU0 | STATMASK
+ li t1, ST0_KERNEL_CUMASK | STATMASK
or t0, t1
xori t0, STATMASK & ~1
mtc0 t0, CP0_STATUS
@@ -477,7 +477,7 @@
*/
.macro KMODE
mfc0 t0, CP0_STATUS
- li t1, ST0_CU0 | (STATMASK & ~1)
+ li t1, ST0_KERNEL_CUMASK | (STATMASK & ~1)
#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
andi t2, t0, ST0_IEP
srl t2, 2
diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h
index 0b0a93bf83cd..a4374b4cb88f 100644
--- a/arch/mips/include/asm/switch_to.h
+++ b/arch/mips/include/asm/switch_to.h
@@ -117,6 +117,8 @@ do { \
__restore_dsp(next); \
} \
if (cop2_present) { \
+ u32 status = read_c0_status(); \
+ \
set_c0_status(ST0_CU2); \
if ((KSTK_STATUS(prev) & ST0_CU2)) { \
if (cop2_lazy_restore) \
@@ -127,7 +129,7 @@ do { \
!cop2_lazy_restore) { \
cop2_restore(next); \
} \
- clear_c0_status(ST0_CU2); \
+ write_c0_status(status); \
} \
__clear_r5_hw_ll_bit(); \
__clear_software_ll_bit(); \
diff --git a/arch/mips/include/asm/txx9/tx4939.h b/arch/mips/include/asm/txx9/tx4939.h
index 00805ac6e9fc..abf980af9ef4 100644
--- a/arch/mips/include/asm/txx9/tx4939.h
+++ b/arch/mips/include/asm/txx9/tx4939.h
@@ -498,7 +498,6 @@ struct tx4939_vpc_desc {
((((mst) + 245/2) / 245UL * 429 * 16 + 19) / 19 / 2)
void tx4939_wdt_init(void);
-void tx4939_add_memory_regions(void);
void tx4939_setup(void);
void tx4939_time_init(unsigned int tmrnr);
void tx4939_sio_init(unsigned int sclk, unsigned int cts_mask);
diff --git a/arch/mips/include/asm/war.h b/arch/mips/include/asm/war.h
index e43f800e662d..21443f096238 100644
--- a/arch/mips/include/asm/war.h
+++ b/arch/mips/include/asm/war.h
@@ -9,8 +9,6 @@
#ifndef _ASM_WAR_H
#define _ASM_WAR_H
-#include <war.h>
-
/*
* Work around certain R4000 CPU errata (as implemented by GCC):
*
@@ -72,152 +70,4 @@
#define DADDI_WAR 0
#endif
-/*
- * Another R4600 erratum. Due to the lack of errata information the exact
- * technical details aren't known. I've experimentally found that disabling
- * interrupts during indexed I-cache flushes seems to be sufficient to deal
- * with the issue.
- */
-#ifndef R4600_V1_INDEX_ICACHEOP_WAR
-#error Check setting of R4600_V1_INDEX_ICACHEOP_WAR for your platform
-#endif
-
-/*
- * Pleasures of the R4600 V1.x. Cite from the IDT R4600 V1.7 errata:
- *
- * 18. The CACHE instructions Hit_Writeback_Invalidate_D, Hit_Writeback_D,
- * Hit_Invalidate_D and Create_Dirty_Excl_D should only be
- * executed if there is no other dcache activity. If the dcache is
- * accessed for another instruction immeidately preceding when these
- * cache instructions are executing, it is possible that the dcache
- * tag match outputs used by these cache instructions will be
- * incorrect. These cache instructions should be preceded by at least
- * four instructions that are not any kind of load or store
- * instruction.
- *
- * This is not allowed: lw
- * nop
- * nop
- * nop
- * cache Hit_Writeback_Invalidate_D
- *
- * This is allowed: lw
- * nop
- * nop
- * nop
- * nop
- * cache Hit_Writeback_Invalidate_D
- */
-#ifndef R4600_V1_HIT_CACHEOP_WAR
-#error Check setting of R4600_V1_HIT_CACHEOP_WAR for your platform
-#endif
-
-
-/*
- * Writeback and invalidate the primary cache dcache before DMA.
- *
- * R4600 v2.0 bug: "The CACHE instructions Hit_Writeback_Inv_D,
- * Hit_Writeback_D, Hit_Invalidate_D and Create_Dirty_Exclusive_D will only
- * operate correctly if the internal data cache refill buffer is empty. These
- * CACHE instructions should be separated from any potential data cache miss
- * by a load instruction to an uncached address to empty the response buffer."
- * (Revision 2.0 device errata from IDT available on https://www.idt.com/
- * in .pdf format.)
- */
-#ifndef R4600_V2_HIT_CACHEOP_WAR
-#error Check setting of R4600_V2_HIT_CACHEOP_WAR for your platform
-#endif
-
-/*
- * Workaround for the Sibyte M3 errata the text of which can be found at
- *
- * http://sibyte.broadcom.com/hw/bcm1250/docs/pass2errata.txt
- *
- * This will enable the use of a special TLB refill handler which does a
- * consistency check on the information in c0_badvaddr and c0_entryhi and
- * will just return and take the exception again if the information was
- * found to be inconsistent.
- */
-#ifndef BCM1250_M3_WAR
-#error Check setting of BCM1250_M3_WAR for your platform
-#endif
-
-/*
- * This is a DUART workaround related to glitches around register accesses
- */
-#ifndef SIBYTE_1956_WAR
-#error Check setting of SIBYTE_1956_WAR for your platform
-#endif
-
-/*
- * Fill buffers not flushed on CACHE instructions
- *
- * Hit_Invalidate_I cacheops invalidate an icache line but the refill
- * for that line can get stale data from the fill buffer instead of
- * accessing memory if the previous icache miss was also to that line.
- *
- * Workaround: generate an icache refill from a different line
- *
- * Affects:
- * MIPS 4K RTL revision <3.0, PRID revision <4
- */
-#ifndef MIPS4K_ICACHE_REFILL_WAR
-#error Check setting of MIPS4K_ICACHE_REFILL_WAR for your platform
-#endif
-
-/*
- * Missing implicit forced flush of evictions caused by CACHE
- * instruction
- *
- * Evictions caused by a CACHE instructions are not forced on to the
- * bus. The BIU gives higher priority to fetches than to the data from
- * the eviction buffer and no collision detection is performed between
- * fetches and pending data from the eviction buffer.
- *
- * Workaround: Execute a SYNC instruction after the cache instruction
- *
- * Affects:
- * MIPS 5Kc,5Kf RTL revision <2.3, PRID revision <8
- * MIPS 20Kc RTL revision <4.0, PRID revision <?
- */
-#ifndef MIPS_CACHE_SYNC_WAR
-#error Check setting of MIPS_CACHE_SYNC_WAR for your platform
-#endif
-
-/*
- * From TX49/H2 manual: "If the instruction (i.e. CACHE) is issued for
- * the line which this instruction itself exists, the following
- * operation is not guaranteed."
- *
- * Workaround: do two phase flushing for Index_Invalidate_I
- */
-#ifndef TX49XX_ICACHE_INDEX_INV_WAR
-#error Check setting of TX49XX_ICACHE_INDEX_INV_WAR for your platform
-#endif
-
-/*
- * The RM7000 processors and the E9000 cores have a bug (though PMC-Sierra
- * opposes it being called that) where invalid instructions in the same
- * I-cache line worth of instructions being fetched may case spurious
- * exceptions.
- */
-#ifndef ICACHE_REFILLS_WORKAROUND_WAR
-#error Check setting of ICACHE_REFILLS_WORKAROUND_WAR for your platform
-#endif
-
-/*
- * On the R10000 up to version 2.6 (not sure about 2.7) there is a bug that
- * may cause ll / sc and lld / scd sequences to execute non-atomically.
- */
-#ifndef R10000_LLSC_WAR
-#error Check setting of R10000_LLSC_WAR for your platform
-#endif
-
-/*
- * 34K core erratum: "Problems Executing the TLBR Instruction"
- */
-#ifndef MIPS34K_MISSED_ITLB_WAR
-#error Check setting of MIPS34K_MISSED_ITLB_WAR for your platform
-#endif
-
#endif /* _ASM_WAR_H */
diff --git a/arch/mips/jz4740/Kconfig b/arch/mips/ingenic/Kconfig
index c2a6fbf8e411..3238e16febd5 100644
--- a/arch/mips/jz4740/Kconfig
+++ b/arch/mips/ingenic/Kconfig
@@ -1,15 +1,21 @@
# SPDX-License-Identifier: GPL-2.0
+
+config MACH_INGENIC_GENERIC
+ bool
+ select MACH_INGENIC
+ select MACH_JZ4740
+ select MACH_JZ4770
+ select MACH_JZ4780
+ select MACH_X1000
+
choice
prompt "Machine type"
- depends on MACH_INGENIC
+ depends on MACH_INGENIC_SOC
default INGENIC_GENERIC_BOARD
config INGENIC_GENERIC_BOARD
bool "Generic board"
- select MACH_JZ4740
- select MACH_JZ4770
- select MACH_JZ4780
- select MACH_X1000
+ select MACH_INGENIC_GENERIC
config JZ4740_QI_LB60
bool "Qi Hardware Ben NanoNote"
diff --git a/arch/mips/jz4740/Makefile b/arch/mips/jz4740/Makefile
deleted file mode 100644
index f96c0f5eca44..000000000000
--- a/arch/mips/jz4740/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Makefile for the Ingenic JZ4740.
-#
-
-# Object file lists.
-obj-y += setup.o
-
-CFLAGS_setup.o = -I$(src)/../../../scripts/dtc/libfdt
diff --git a/arch/mips/jz4740/Platform b/arch/mips/jz4740/Platform
deleted file mode 100644
index bd35d0621b13..000000000000
--- a/arch/mips/jz4740/Platform
+++ /dev/null
@@ -1,3 +0,0 @@
-cflags-$(CONFIG_MACH_INGENIC) += -I$(srctree)/arch/mips/include/asm/mach-jz4740
-load-$(CONFIG_MACH_INGENIC) += 0xffffffff80010000
-zload-$(CONFIG_MACH_INGENIC) += 0xffffffff81000000
diff --git a/arch/mips/jz4740/setup.c b/arch/mips/jz4740/setup.c
deleted file mode 100644
index 51d906325ce6..000000000000
--- a/arch/mips/jz4740/setup.c
+++ /dev/null
@@ -1,145 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2009-2010, Lars-Peter Clausen <lars@metafoo.de>
- * Copyright (C) 2011, Maarten ter Huurne <maarten@treewalker.org>
- * JZ4740 setup code
- */
-
-#include <linux/clocksource.h>
-#include <linux/init.h>
-#include <linux/io.h>
-#include <linux/irqchip.h>
-#include <linux/kernel.h>
-#include <linux/libfdt.h>
-#include <linux/of_clk.h>
-#include <linux/of_fdt.h>
-#include <linux/pm.h>
-#include <linux/sizes.h>
-#include <linux/suspend.h>
-
-#include <asm/bootinfo.h>
-#include <asm/fw/fw.h>
-#include <asm/prom.h>
-#include <asm/reboot.h>
-#include <asm/time.h>
-
-static unsigned long __init get_board_mach_type(const void *fdt)
-{
- if (!fdt_node_check_compatible(fdt, 0, "ingenic,x2000"))
- return MACH_INGENIC_X2000;
- if (!fdt_node_check_compatible(fdt, 0, "ingenic,x1830"))
- return MACH_INGENIC_X1830;
- if (!fdt_node_check_compatible(fdt, 0, "ingenic,x1000"))
- return MACH_INGENIC_X1000;
- if (!fdt_node_check_compatible(fdt, 0, "ingenic,jz4780"))
- return MACH_INGENIC_JZ4780;
- if (!fdt_node_check_compatible(fdt, 0, "ingenic,jz4770"))
- return MACH_INGENIC_JZ4770;
- if (!fdt_node_check_compatible(fdt, 0, "ingenic,jz4725b"))
- return MACH_INGENIC_JZ4725B;
-
- return MACH_INGENIC_JZ4740;
-}
-
-void __init plat_mem_setup(void)
-{
- void *dtb = (void *)fw_passed_dtb;
-
- __dt_setup_arch(dtb);
-
- /*
- * Old devicetree files for the qi,lb60 board did not have a /memory
- * node. Hardcode the memory info here.
- */
- if (!fdt_node_check_compatible(dtb, 0, "qi,lb60") &&
- fdt_path_offset(dtb, "/memory") < 0)
- early_init_dt_add_memory_arch(0, SZ_32M);
-
- mips_machtype = get_board_mach_type(dtb);
-}
-
-void __init device_tree_init(void)
-{
- if (!initial_boot_params)
- return;
-
- unflatten_and_copy_device_tree();
-}
-
-const char *get_system_type(void)
-{
- switch (mips_machtype) {
- case MACH_INGENIC_X2000:
- return "X2000";
- case MACH_INGENIC_X1830:
- return "X1830";
- case MACH_INGENIC_X1000:
- return "X1000";
- case MACH_INGENIC_JZ4780:
- return "JZ4780";
- case MACH_INGENIC_JZ4770:
- return "JZ4770";
- case MACH_INGENIC_JZ4725B:
- return "JZ4725B";
- default:
- return "JZ4740";
- }
-}
-
-void __init arch_init_irq(void)
-{
- irqchip_init();
-}
-
-void __init plat_time_init(void)
-{
- of_clk_init(NULL);
- timer_probe();
-}
-
-void __init prom_init(void)
-{
- fw_init_cmdline();
-}
-
-void __init prom_free_prom_memory(void)
-{
-}
-
-static void jz4740_wait_instr(void)
-{
- __asm__(".set push;\n"
- ".set mips3;\n"
- "wait;\n"
- ".set pop;\n"
- );
-}
-
-static void jz4740_halt(void)
-{
- for (;;)
- jz4740_wait_instr();
-}
-
-static int __maybe_unused jz4740_pm_enter(suspend_state_t state)
-{
- jz4740_wait_instr();
-
- return 0;
-}
-
-static const struct platform_suspend_ops jz4740_pm_ops __maybe_unused = {
- .valid = suspend_valid_only_mem,
- .enter = jz4740_pm_enter,
-};
-
-static int __init jz4740_pm_init(void)
-{
- if (IS_ENABLED(CONFIG_PM_SLEEP))
- suspend_set_ops(&jz4740_pm_ops);
- _machine_halt = jz4740_halt;
-
- return 0;
-
-}
-late_initcall(jz4740_pm_init);
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index 13a26d254829..2a05b923f579 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -5,11 +5,17 @@
extra-y := head.o vmlinux.lds
-obj-y += cmpxchg.o cpu-probe.o branch.o elf.o entry.o genex.o idle.o irq.o \
+obj-y += branch.o cmpxchg.o elf.o entry.o genex.o idle.o irq.o \
process.o prom.o ptrace.o reset.o setup.o signal.o \
syscall.o time.o topology.o traps.o unaligned.o watch.o \
vdso.o cacheinfo.o
+ifdef CONFIG_CPU_R3K_TLB
+obj-y += cpu-r3k-probe.o
+else
+obj-y += cpu-probe.o
+endif
+
ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_ftrace.o = -pg
CFLAGS_REMOVE_early_printk.o = -pg
@@ -42,6 +48,7 @@ sw-$(CONFIG_CPU_TX39XX) := r2300_switch.o
sw-$(CONFIG_CPU_CAVIUM_OCTEON) := octeon_switch.o
obj-y += $(sw-y)
+obj-$(CONFIG_MIPS_FP_SUPPORT) += fpu-probe.o
obj-$(CONFIG_CPU_R2300_FPU) += r2300_fpu.o
obj-$(CONFIG_CPU_R4K_FPU) += r4k_fpu.o
diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c
index fb3e203698ea..0216ff24c392 100644
--- a/arch/mips/kernel/branch.c
+++ b/arch/mips/kernel/branch.c
@@ -20,6 +20,8 @@
#include <asm/ptrace.h>
#include <linux/uaccess.h>
+#include "probes-common.h"
+
/*
* Calculate and return exception PC in case of branch delay slot
* for microMIPS and MIPS16e. It does not clear the ISA mode bit.
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index e2955f1f6316..e6853697a056 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -28,336 +28,14 @@
#include <asm/spram.h>
#include <linux/uaccess.h>
+#include "fpu-probe.h"
+
#include <asm/mach-loongson64/cpucfg-emul.h>
/* Hardware capabilities */
unsigned int elf_hwcap __read_mostly;
EXPORT_SYMBOL_GPL(elf_hwcap);
-#ifdef CONFIG_MIPS_FP_SUPPORT
-
-/*
- * Get the FPU Implementation/Revision.
- */
-static inline unsigned long cpu_get_fpu_id(void)
-{
- unsigned long tmp, fpu_id;
-
- tmp = read_c0_status();
- __enable_fpu(FPU_AS_IS);
- fpu_id = read_32bit_cp1_register(CP1_REVISION);
- write_c0_status(tmp);
- return fpu_id;
-}
-
-/*
- * Check if the CPU has an external FPU.
- */
-static inline int __cpu_has_fpu(void)
-{
- return (cpu_get_fpu_id() & FPIR_IMP_MASK) != FPIR_IMP_NONE;
-}
-
-/*
- * Determine the FCSR mask for FPU hardware.
- */
-static inline void cpu_set_fpu_fcsr_mask(struct cpuinfo_mips *c)
-{
- unsigned long sr, mask, fcsr, fcsr0, fcsr1;
-
- fcsr = c->fpu_csr31;
- mask = FPU_CSR_ALL_X | FPU_CSR_ALL_E | FPU_CSR_ALL_S | FPU_CSR_RM;
-
- sr = read_c0_status();
- __enable_fpu(FPU_AS_IS);
-
- fcsr0 = fcsr & mask;
- write_32bit_cp1_register(CP1_STATUS, fcsr0);
- fcsr0 = read_32bit_cp1_register(CP1_STATUS);
-
- fcsr1 = fcsr | ~mask;
- write_32bit_cp1_register(CP1_STATUS, fcsr1);
- fcsr1 = read_32bit_cp1_register(CP1_STATUS);
-
- write_32bit_cp1_register(CP1_STATUS, fcsr);
-
- write_c0_status(sr);
-
- c->fpu_msk31 = ~(fcsr0 ^ fcsr1) & ~mask;
-}
-
-/*
- * Determine the IEEE 754 NaN encodings and ABS.fmt/NEG.fmt execution modes
- * supported by FPU hardware.
- */
-static void cpu_set_fpu_2008(struct cpuinfo_mips *c)
-{
- if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 |
- MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 |
- MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5 |
- MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) {
- unsigned long sr, fir, fcsr, fcsr0, fcsr1;
-
- sr = read_c0_status();
- __enable_fpu(FPU_AS_IS);
-
- fir = read_32bit_cp1_register(CP1_REVISION);
- if (fir & MIPS_FPIR_HAS2008) {
- fcsr = read_32bit_cp1_register(CP1_STATUS);
-
- /*
- * MAC2008 toolchain never landed in real world, so we're only
- * testing wether it can be disabled and don't try to enabled
- * it.
- */
- fcsr0 = fcsr & ~(FPU_CSR_ABS2008 | FPU_CSR_NAN2008 | FPU_CSR_MAC2008);
- write_32bit_cp1_register(CP1_STATUS, fcsr0);
- fcsr0 = read_32bit_cp1_register(CP1_STATUS);
-
- fcsr1 = fcsr | FPU_CSR_ABS2008 | FPU_CSR_NAN2008;
- write_32bit_cp1_register(CP1_STATUS, fcsr1);
- fcsr1 = read_32bit_cp1_register(CP1_STATUS);
-
- write_32bit_cp1_register(CP1_STATUS, fcsr);
-
- if (c->isa_level & (MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2)) {
- /*
- * The bit for MAC2008 might be reused by R6 in future,
- * so we only test for R2-R5.
- */
- if (fcsr0 & FPU_CSR_MAC2008)
- c->options |= MIPS_CPU_MAC_2008_ONLY;
- }
-
- if (!(fcsr0 & FPU_CSR_NAN2008))
- c->options |= MIPS_CPU_NAN_LEGACY;
- if (fcsr1 & FPU_CSR_NAN2008)
- c->options |= MIPS_CPU_NAN_2008;
-
- if ((fcsr0 ^ fcsr1) & FPU_CSR_ABS2008)
- c->fpu_msk31 &= ~FPU_CSR_ABS2008;
- else
- c->fpu_csr31 |= fcsr & FPU_CSR_ABS2008;
-
- if ((fcsr0 ^ fcsr1) & FPU_CSR_NAN2008)
- c->fpu_msk31 &= ~FPU_CSR_NAN2008;
- else
- c->fpu_csr31 |= fcsr & FPU_CSR_NAN2008;
- } else {
- c->options |= MIPS_CPU_NAN_LEGACY;
- }
-
- write_c0_status(sr);
- } else {
- c->options |= MIPS_CPU_NAN_LEGACY;
- }
-}
-
-/*
- * IEEE 754 conformance mode to use. Affects the NaN encoding and the
- * ABS.fmt/NEG.fmt execution mode.
- */
-static enum { STRICT, LEGACY, STD2008, RELAXED } ieee754 = STRICT;
-
-/*
- * Set the IEEE 754 NaN encodings and the ABS.fmt/NEG.fmt execution modes
- * to support by the FPU emulator according to the IEEE 754 conformance
- * mode selected. Note that "relaxed" straps the emulator so that it
- * allows 2008-NaN binaries even for legacy processors.
- */
-static void cpu_set_nofpu_2008(struct cpuinfo_mips *c)
-{
- c->options &= ~(MIPS_CPU_NAN_2008 | MIPS_CPU_NAN_LEGACY);
- c->fpu_csr31 &= ~(FPU_CSR_ABS2008 | FPU_CSR_NAN2008);
- c->fpu_msk31 &= ~(FPU_CSR_ABS2008 | FPU_CSR_NAN2008);
-
- switch (ieee754) {
- case STRICT:
- if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 |
- MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 |
- MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5 |
- MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) {
- c->options |= MIPS_CPU_NAN_2008 | MIPS_CPU_NAN_LEGACY;
- } else {
- c->options |= MIPS_CPU_NAN_LEGACY;
- c->fpu_msk31 |= FPU_CSR_ABS2008 | FPU_CSR_NAN2008;
- }
- break;
- case LEGACY:
- c->options |= MIPS_CPU_NAN_LEGACY;
- c->fpu_msk31 |= FPU_CSR_ABS2008 | FPU_CSR_NAN2008;
- break;
- case STD2008:
- c->options |= MIPS_CPU_NAN_2008;
- c->fpu_csr31 |= FPU_CSR_ABS2008 | FPU_CSR_NAN2008;
- c->fpu_msk31 |= FPU_CSR_ABS2008 | FPU_CSR_NAN2008;
- break;
- case RELAXED:
- c->options |= MIPS_CPU_NAN_2008 | MIPS_CPU_NAN_LEGACY;
- break;
- }
-}
-
-/*
- * Override the IEEE 754 NaN encoding and ABS.fmt/NEG.fmt execution mode
- * according to the "ieee754=" parameter.
- */
-static void cpu_set_nan_2008(struct cpuinfo_mips *c)
-{
- switch (ieee754) {
- case STRICT:
- mips_use_nan_legacy = !!cpu_has_nan_legacy;
- mips_use_nan_2008 = !!cpu_has_nan_2008;
- break;
- case LEGACY:
- mips_use_nan_legacy = !!cpu_has_nan_legacy;
- mips_use_nan_2008 = !cpu_has_nan_legacy;
- break;
- case STD2008:
- mips_use_nan_legacy = !cpu_has_nan_2008;
- mips_use_nan_2008 = !!cpu_has_nan_2008;
- break;
- case RELAXED:
- mips_use_nan_legacy = true;
- mips_use_nan_2008 = true;
- break;
- }
-}
-
-/*
- * IEEE 754 NaN encoding and ABS.fmt/NEG.fmt execution mode override
- * settings:
- *
- * strict: accept binaries that request a NaN encoding supported by the FPU
- * legacy: only accept legacy-NaN binaries
- * 2008: only accept 2008-NaN binaries
- * relaxed: accept any binaries regardless of whether supported by the FPU
- */
-static int __init ieee754_setup(char *s)
-{
- if (!s)
- return -1;
- else if (!strcmp(s, "strict"))
- ieee754 = STRICT;
- else if (!strcmp(s, "legacy"))
- ieee754 = LEGACY;
- else if (!strcmp(s, "2008"))
- ieee754 = STD2008;
- else if (!strcmp(s, "relaxed"))
- ieee754 = RELAXED;
- else
- return -1;
-
- if (!(boot_cpu_data.options & MIPS_CPU_FPU))
- cpu_set_nofpu_2008(&boot_cpu_data);
- cpu_set_nan_2008(&boot_cpu_data);
-
- return 0;
-}
-
-early_param("ieee754", ieee754_setup);
-
-/*
- * Set the FIR feature flags for the FPU emulator.
- */
-static void cpu_set_nofpu_id(struct cpuinfo_mips *c)
-{
- u32 value;
-
- value = 0;
- if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 |
- MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 |
- MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5 |
- MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6))
- value |= MIPS_FPIR_D | MIPS_FPIR_S;
- if (c->isa_level & (MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 |
- MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5 |
- MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6))
- value |= MIPS_FPIR_F64 | MIPS_FPIR_L | MIPS_FPIR_W;
- if (c->options & MIPS_CPU_NAN_2008)
- value |= MIPS_FPIR_HAS2008;
- c->fpu_id = value;
-}
-
-/* Determined FPU emulator mask to use for the boot CPU with "nofpu". */
-static unsigned int mips_nofpu_msk31;
-
-/*
- * Set options for FPU hardware.
- */
-static void cpu_set_fpu_opts(struct cpuinfo_mips *c)
-{
- c->fpu_id = cpu_get_fpu_id();
- mips_nofpu_msk31 = c->fpu_msk31;
-
- if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 |
- MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 |
- MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5 |
- MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) {
- if (c->fpu_id & MIPS_FPIR_3D)
- c->ases |= MIPS_ASE_MIPS3D;
- if (c->fpu_id & MIPS_FPIR_UFRP)
- c->options |= MIPS_CPU_UFR;
- if (c->fpu_id & MIPS_FPIR_FREP)
- c->options |= MIPS_CPU_FRE;
- }
-
- cpu_set_fpu_fcsr_mask(c);
- cpu_set_fpu_2008(c);
- cpu_set_nan_2008(c);
-}
-
-/*
- * Set options for the FPU emulator.
- */
-static void cpu_set_nofpu_opts(struct cpuinfo_mips *c)
-{
- c->options &= ~MIPS_CPU_FPU;
- c->fpu_msk31 = mips_nofpu_msk31;
-
- cpu_set_nofpu_2008(c);
- cpu_set_nan_2008(c);
- cpu_set_nofpu_id(c);
-}
-
-static int mips_fpu_disabled;
-
-static int __init fpu_disable(char *s)
-{
- cpu_set_nofpu_opts(&boot_cpu_data);
- mips_fpu_disabled = 1;
-
- return 1;
-}
-
-__setup("nofpu", fpu_disable);
-
-#else /* !CONFIG_MIPS_FP_SUPPORT */
-
-#define mips_fpu_disabled 1
-
-static inline unsigned long cpu_get_fpu_id(void)
-{
- return FPIR_IMP_NONE;
-}
-
-static inline int __cpu_has_fpu(void)
-{
- return 0;
-}
-
-static void cpu_set_fpu_opts(struct cpuinfo_mips *c)
-{
- /* no-op */
-}
-
-static void cpu_set_nofpu_opts(struct cpuinfo_mips *c)
-{
- /* no-op */
-}
-
-#endif /* CONFIG_MIPS_FP_SUPPORT */
-
static inline unsigned long cpu_get_msa_id(void)
{
unsigned long status, msa_id;
@@ -1600,8 +1278,9 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
c->options = MIPS_CPU_TLB | MIPS_CPU_4K_CACHE | MIPS_CPU_4KEX |
MIPS_CPU_FPU | MIPS_CPU_32FPR |
MIPS_CPU_COUNTER | MIPS_CPU_WATCH |
- MIPS_CPU_LLSC | MIPS_CPU_BP_GHIST;
+ MIPS_CPU_LLSC;
c->tlbsize = 64;
+ write_c0_r10k_diag(read_c0_r10k_diag() | R10K_DIAG_E_GHIST);
break;
case PRID_IMP_R14000:
if (((c->processor_id >> 4) & 0x0f) > 2) {
@@ -1615,8 +1294,9 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
c->options = MIPS_CPU_TLB | MIPS_CPU_4K_CACHE | MIPS_CPU_4KEX |
MIPS_CPU_FPU | MIPS_CPU_32FPR |
MIPS_CPU_COUNTER | MIPS_CPU_WATCH |
- MIPS_CPU_LLSC | MIPS_CPU_BP_GHIST;
+ MIPS_CPU_LLSC;
c->tlbsize = 64;
+ write_c0_r10k_diag(read_c0_r10k_diag() | R10K_DIAG_E_GHIST);
break;
case PRID_IMP_LOONGSON_64C: /* Loongson-2/3 */
switch (c->processor_id & PRID_REV_MASK) {
@@ -2123,7 +1803,10 @@ static inline void cpu_probe_ingenic(struct cpuinfo_mips *c, unsigned int cpu)
/* XBurst does not implement the CP0 counter. */
c->options &= ~MIPS_CPU_COUNTER;
- BUG_ON(!__builtin_constant_p(cpu_has_counter) || cpu_has_counter);
+ BUG_ON(__builtin_constant_p(cpu_has_counter) && cpu_has_counter);
+
+ /* XBurst has virtually tagged icache */
+ c->icache.flags |= MIPS_CACHE_VTAG;
switch (c->processor_id & PRID_IMP_MASK) {
@@ -2169,8 +1852,9 @@ static inline void cpu_probe_ingenic(struct cpuinfo_mips *c, unsigned int cpu)
/* XBurst®1 with MXU2.0 SIMD ISA */
case PRID_IMP_XBURST_REV2:
+ /* Ingenic uses the WA bit to achieve write-combine memory writes */
+ c->writecombine = _CACHE_CACHABLE_WA;
c->cputype = CPU_XBURST;
- c->writecombine = _CACHE_UNCACHED_ACCELERATED;
__cpu_name[cpu] = "Ingenic XBurst";
break;
@@ -2372,10 +2056,6 @@ void cpu_probe(void)
else
cpu_set_nofpu_opts(c);
- if (cpu_has_bp_ghist)
- write_c0_r10k_diag(read_c0_r10k_diag() |
- R10K_DIAG_E_GHIST);
-
if (cpu_has_mips_r2_r6) {
c->srsets = ((read_c0_srsctl() >> 26) & 0x0f) + 1;
/* R2 has Performance Counter Interrupt indicator */
diff --git a/arch/mips/kernel/cpu-r3k-probe.c b/arch/mips/kernel/cpu-r3k-probe.c
new file mode 100644
index 000000000000..abdbbe8c5a43
--- /dev/null
+++ b/arch/mips/kernel/cpu-r3k-probe.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Processor capabilities determination functions.
+ *
+ * Copyright (C) xxxx the Anonymous
+ * Copyright (C) 1994 - 2006 Ralf Baechle
+ * Copyright (C) 2003, 2004 Maciej W. Rozycki
+ * Copyright (C) 2001, 2004, 2011, 2012 MIPS Technologies, Inc.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/ptrace.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/export.h>
+
+#include <asm/bugs.h>
+#include <asm/cpu.h>
+#include <asm/cpu-features.h>
+#include <asm/cpu-type.h>
+#include <asm/fpu.h>
+#include <asm/mipsregs.h>
+#include <asm/elf.h>
+
+#include "fpu-probe.h"
+
+/* Hardware capabilities */
+unsigned int elf_hwcap __read_mostly;
+EXPORT_SYMBOL_GPL(elf_hwcap);
+
+void __init check_bugs32(void)
+{
+
+}
+
+/*
+ * Probe whether cpu has config register by trying to play with
+ * alternate cache bit and see whether it matters.
+ * It's used by cpu_probe to distinguish between R3000A and R3081.
+ */
+static inline int cpu_has_confreg(void)
+{
+#ifdef CONFIG_CPU_R3000
+ extern unsigned long r3k_cache_size(unsigned long);
+ unsigned long size1, size2;
+ unsigned long cfg = read_c0_conf();
+
+ size1 = r3k_cache_size(ST0_ISC);
+ write_c0_conf(cfg ^ R30XX_CONF_AC);
+ size2 = r3k_cache_size(ST0_ISC);
+ write_c0_conf(cfg);
+ return size1 != size2;
+#else
+ return 0;
+#endif
+}
+
+static inline void set_elf_platform(int cpu, const char *plat)
+{
+ if (cpu == 0)
+ __elf_platform = plat;
+}
+
+const char *__cpu_name[NR_CPUS];
+const char *__elf_platform;
+const char *__elf_base_platform;
+
+void cpu_probe(void)
+{
+ struct cpuinfo_mips *c = &current_cpu_data;
+ unsigned int cpu = smp_processor_id();
+
+ /*
+ * Set a default elf platform, cpu probe may later
+ * overwrite it with a more precise value
+ */
+ set_elf_platform(cpu, "mips");
+
+ c->processor_id = PRID_IMP_UNKNOWN;
+ c->fpu_id = FPIR_IMP_NONE;
+ c->cputype = CPU_UNKNOWN;
+ c->writecombine = _CACHE_UNCACHED;
+
+ c->fpu_csr31 = FPU_CSR_RN;
+ c->fpu_msk31 = FPU_CSR_RSVD | FPU_CSR_ABS2008 | FPU_CSR_NAN2008 |
+ FPU_CSR_CONDX | FPU_CSR_FS;
+
+ c->srsets = 1;
+
+ c->processor_id = read_c0_prid();
+ switch (c->processor_id & (PRID_COMP_MASK | PRID_IMP_MASK)) {
+ case PRID_COMP_LEGACY | PRID_IMP_R2000:
+ c->cputype = CPU_R2000;
+ __cpu_name[cpu] = "R2000";
+ c->options = MIPS_CPU_TLB | MIPS_CPU_3K_CACHE |
+ MIPS_CPU_NOFPUEX;
+ if (__cpu_has_fpu())
+ c->options |= MIPS_CPU_FPU;
+ c->tlbsize = 64;
+ break;
+ case PRID_COMP_LEGACY | PRID_IMP_R3000:
+ if ((c->processor_id & PRID_REV_MASK) == PRID_REV_R3000A) {
+ if (cpu_has_confreg()) {
+ c->cputype = CPU_R3081E;
+ __cpu_name[cpu] = "R3081";
+ } else {
+ c->cputype = CPU_R3000A;
+ __cpu_name[cpu] = "R3000A";
+ }
+ } else {
+ c->cputype = CPU_R3000;
+ __cpu_name[cpu] = "R3000";
+ }
+ c->options = MIPS_CPU_TLB | MIPS_CPU_3K_CACHE |
+ MIPS_CPU_NOFPUEX;
+ if (__cpu_has_fpu())
+ c->options |= MIPS_CPU_FPU;
+ c->tlbsize = 64;
+ break;
+ case PRID_COMP_LEGACY | PRID_IMP_TX39:
+ c->options = MIPS_CPU_TLB | MIPS_CPU_TX39_CACHE;
+
+ if ((c->processor_id & 0xf0) == (PRID_REV_TX3927 & 0xf0)) {
+ c->cputype = CPU_TX3927;
+ __cpu_name[cpu] = "TX3927";
+ c->tlbsize = 64;
+ } else {
+ switch (c->processor_id & PRID_REV_MASK) {
+ case PRID_REV_TX3912:
+ c->cputype = CPU_TX3912;
+ __cpu_name[cpu] = "TX3912";
+ c->tlbsize = 32;
+ break;
+ case PRID_REV_TX3922:
+ c->cputype = CPU_TX3922;
+ __cpu_name[cpu] = "TX3922";
+ c->tlbsize = 64;
+ break;
+ }
+ }
+ break;
+ }
+
+ BUG_ON(!__cpu_name[cpu]);
+ BUG_ON(c->cputype == CPU_UNKNOWN);
+
+ /*
+ * Platform code can force the cpu type to optimize code
+ * generation. In that case be sure the cpu type is correctly
+ * manually setup otherwise it could trigger some nasty bugs.
+ */
+ BUG_ON(current_cpu_type() != c->cputype);
+
+ if (mips_fpu_disabled)
+ c->options &= ~MIPS_CPU_FPU;
+
+ if (c->options & MIPS_CPU_FPU)
+ cpu_set_fpu_opts(c);
+ else
+ cpu_set_nofpu_opts(c);
+}
+
+void cpu_report(void)
+{
+ struct cpuinfo_mips *c = &current_cpu_data;
+
+ pr_info("CPU%d revision is: %08x (%s)\n",
+ smp_processor_id(), c->processor_id, cpu_name_string());
+ if (c->options & MIPS_CPU_FPU)
+ pr_info("FPU revision is: %08x\n", c->fpu_id);
+}
diff --git a/arch/mips/kernel/fpu-probe.c b/arch/mips/kernel/fpu-probe.c
new file mode 100644
index 000000000000..e689d6a83234
--- /dev/null
+++ b/arch/mips/kernel/fpu-probe.c
@@ -0,0 +1,321 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Processor capabilities determination functions.
+ *
+ * Copyright (C) xxxx the Anonymous
+ * Copyright (C) 1994 - 2006 Ralf Baechle
+ * Copyright (C) 2003, 2004 Maciej W. Rozycki
+ * Copyright (C) 2001, 2004, 2011, 2012 MIPS Technologies, Inc.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+
+#include <asm/bugs.h>
+#include <asm/cpu.h>
+#include <asm/cpu-features.h>
+#include <asm/cpu-type.h>
+#include <asm/elf.h>
+#include <asm/fpu.h>
+#include <asm/mipsregs.h>
+
+#include "fpu-probe.h"
+
+/*
+ * Get the FPU Implementation/Revision.
+ */
+static inline unsigned long cpu_get_fpu_id(void)
+{
+ unsigned long tmp, fpu_id;
+
+ tmp = read_c0_status();
+ __enable_fpu(FPU_AS_IS);
+ fpu_id = read_32bit_cp1_register(CP1_REVISION);
+ write_c0_status(tmp);
+ return fpu_id;
+}
+
+/*
+ * Check if the CPU has an external FPU.
+ */
+int __cpu_has_fpu(void)
+{
+ return (cpu_get_fpu_id() & FPIR_IMP_MASK) != FPIR_IMP_NONE;
+}
+
+/*
+ * Determine the FCSR mask for FPU hardware.
+ */
+static inline void cpu_set_fpu_fcsr_mask(struct cpuinfo_mips *c)
+{
+ unsigned long sr, mask, fcsr, fcsr0, fcsr1;
+
+ fcsr = c->fpu_csr31;
+ mask = FPU_CSR_ALL_X | FPU_CSR_ALL_E | FPU_CSR_ALL_S | FPU_CSR_RM;
+
+ sr = read_c0_status();
+ __enable_fpu(FPU_AS_IS);
+
+ fcsr0 = fcsr & mask;
+ write_32bit_cp1_register(CP1_STATUS, fcsr0);
+ fcsr0 = read_32bit_cp1_register(CP1_STATUS);
+
+ fcsr1 = fcsr | ~mask;
+ write_32bit_cp1_register(CP1_STATUS, fcsr1);
+ fcsr1 = read_32bit_cp1_register(CP1_STATUS);
+
+ write_32bit_cp1_register(CP1_STATUS, fcsr);
+
+ write_c0_status(sr);
+
+ c->fpu_msk31 = ~(fcsr0 ^ fcsr1) & ~mask;
+}
+
+/*
+ * Determine the IEEE 754 NaN encodings and ABS.fmt/NEG.fmt execution modes
+ * supported by FPU hardware.
+ */
+static void cpu_set_fpu_2008(struct cpuinfo_mips *c)
+{
+ if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 |
+ MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 |
+ MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5 |
+ MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) {
+ unsigned long sr, fir, fcsr, fcsr0, fcsr1;
+
+ sr = read_c0_status();
+ __enable_fpu(FPU_AS_IS);
+
+ fir = read_32bit_cp1_register(CP1_REVISION);
+ if (fir & MIPS_FPIR_HAS2008) {
+ fcsr = read_32bit_cp1_register(CP1_STATUS);
+
+ /*
+ * MAC2008 toolchain never landed in real world, so
+ * we're only testing whether it can be disabled and
+ * don't try to enabled it.
+ */
+ fcsr0 = fcsr & ~(FPU_CSR_ABS2008 | FPU_CSR_NAN2008 |
+ FPU_CSR_MAC2008);
+ write_32bit_cp1_register(CP1_STATUS, fcsr0);
+ fcsr0 = read_32bit_cp1_register(CP1_STATUS);
+
+ fcsr1 = fcsr | FPU_CSR_ABS2008 | FPU_CSR_NAN2008;
+ write_32bit_cp1_register(CP1_STATUS, fcsr1);
+ fcsr1 = read_32bit_cp1_register(CP1_STATUS);
+
+ write_32bit_cp1_register(CP1_STATUS, fcsr);
+
+ if (c->isa_level & (MIPS_CPU_ISA_M32R2 |
+ MIPS_CPU_ISA_M64R2)) {
+ /*
+ * The bit for MAC2008 might be reused by R6
+ * in future, so we only test for R2-R5.
+ */
+ if (fcsr0 & FPU_CSR_MAC2008)
+ c->options |= MIPS_CPU_MAC_2008_ONLY;
+ }
+
+ if (!(fcsr0 & FPU_CSR_NAN2008))
+ c->options |= MIPS_CPU_NAN_LEGACY;
+ if (fcsr1 & FPU_CSR_NAN2008)
+ c->options |= MIPS_CPU_NAN_2008;
+
+ if ((fcsr0 ^ fcsr1) & FPU_CSR_ABS2008)
+ c->fpu_msk31 &= ~FPU_CSR_ABS2008;
+ else
+ c->fpu_csr31 |= fcsr & FPU_CSR_ABS2008;
+
+ if ((fcsr0 ^ fcsr1) & FPU_CSR_NAN2008)
+ c->fpu_msk31 &= ~FPU_CSR_NAN2008;
+ else
+ c->fpu_csr31 |= fcsr & FPU_CSR_NAN2008;
+ } else {
+ c->options |= MIPS_CPU_NAN_LEGACY;
+ }
+
+ write_c0_status(sr);
+ } else {
+ c->options |= MIPS_CPU_NAN_LEGACY;
+ }
+}
+
+/*
+ * IEEE 754 conformance mode to use. Affects the NaN encoding and the
+ * ABS.fmt/NEG.fmt execution mode.
+ */
+static enum { STRICT, LEGACY, STD2008, RELAXED } ieee754 = STRICT;
+
+/*
+ * Set the IEEE 754 NaN encodings and the ABS.fmt/NEG.fmt execution modes
+ * to support by the FPU emulator according to the IEEE 754 conformance
+ * mode selected. Note that "relaxed" straps the emulator so that it
+ * allows 2008-NaN binaries even for legacy processors.
+ */
+static void cpu_set_nofpu_2008(struct cpuinfo_mips *c)
+{
+ c->options &= ~(MIPS_CPU_NAN_2008 | MIPS_CPU_NAN_LEGACY);
+ c->fpu_csr31 &= ~(FPU_CSR_ABS2008 | FPU_CSR_NAN2008);
+ c->fpu_msk31 &= ~(FPU_CSR_ABS2008 | FPU_CSR_NAN2008);
+
+ switch (ieee754) {
+ case STRICT:
+ if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 |
+ MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 |
+ MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5 |
+ MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) {
+ c->options |= MIPS_CPU_NAN_2008 | MIPS_CPU_NAN_LEGACY;
+ } else {
+ c->options |= MIPS_CPU_NAN_LEGACY;
+ c->fpu_msk31 |= FPU_CSR_ABS2008 | FPU_CSR_NAN2008;
+ }
+ break;
+ case LEGACY:
+ c->options |= MIPS_CPU_NAN_LEGACY;
+ c->fpu_msk31 |= FPU_CSR_ABS2008 | FPU_CSR_NAN2008;
+ break;
+ case STD2008:
+ c->options |= MIPS_CPU_NAN_2008;
+ c->fpu_csr31 |= FPU_CSR_ABS2008 | FPU_CSR_NAN2008;
+ c->fpu_msk31 |= FPU_CSR_ABS2008 | FPU_CSR_NAN2008;
+ break;
+ case RELAXED:
+ c->options |= MIPS_CPU_NAN_2008 | MIPS_CPU_NAN_LEGACY;
+ break;
+ }
+}
+
+/*
+ * Override the IEEE 754 NaN encoding and ABS.fmt/NEG.fmt execution mode
+ * according to the "ieee754=" parameter.
+ */
+static void cpu_set_nan_2008(struct cpuinfo_mips *c)
+{
+ switch (ieee754) {
+ case STRICT:
+ mips_use_nan_legacy = !!cpu_has_nan_legacy;
+ mips_use_nan_2008 = !!cpu_has_nan_2008;
+ break;
+ case LEGACY:
+ mips_use_nan_legacy = !!cpu_has_nan_legacy;
+ mips_use_nan_2008 = !cpu_has_nan_legacy;
+ break;
+ case STD2008:
+ mips_use_nan_legacy = !cpu_has_nan_2008;
+ mips_use_nan_2008 = !!cpu_has_nan_2008;
+ break;
+ case RELAXED:
+ mips_use_nan_legacy = true;
+ mips_use_nan_2008 = true;
+ break;
+ }
+}
+
+/*
+ * IEEE 754 NaN encoding and ABS.fmt/NEG.fmt execution mode override
+ * settings:
+ *
+ * strict: accept binaries that request a NaN encoding supported by the FPU
+ * legacy: only accept legacy-NaN binaries
+ * 2008: only accept 2008-NaN binaries
+ * relaxed: accept any binaries regardless of whether supported by the FPU
+ */
+static int __init ieee754_setup(char *s)
+{
+ if (!s)
+ return -1;
+ else if (!strcmp(s, "strict"))
+ ieee754 = STRICT;
+ else if (!strcmp(s, "legacy"))
+ ieee754 = LEGACY;
+ else if (!strcmp(s, "2008"))
+ ieee754 = STD2008;
+ else if (!strcmp(s, "relaxed"))
+ ieee754 = RELAXED;
+ else
+ return -1;
+
+ if (!(boot_cpu_data.options & MIPS_CPU_FPU))
+ cpu_set_nofpu_2008(&boot_cpu_data);
+ cpu_set_nan_2008(&boot_cpu_data);
+
+ return 0;
+}
+
+early_param("ieee754", ieee754_setup);
+
+/*
+ * Set the FIR feature flags for the FPU emulator.
+ */
+static void cpu_set_nofpu_id(struct cpuinfo_mips *c)
+{
+ u32 value;
+
+ value = 0;
+ if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 |
+ MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 |
+ MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5 |
+ MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6))
+ value |= MIPS_FPIR_D | MIPS_FPIR_S;
+ if (c->isa_level & (MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 |
+ MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5 |
+ MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6))
+ value |= MIPS_FPIR_F64 | MIPS_FPIR_L | MIPS_FPIR_W;
+ if (c->options & MIPS_CPU_NAN_2008)
+ value |= MIPS_FPIR_HAS2008;
+ c->fpu_id = value;
+}
+
+/* Determined FPU emulator mask to use for the boot CPU with "nofpu". */
+static unsigned int mips_nofpu_msk31;
+
+/*
+ * Set options for FPU hardware.
+ */
+void cpu_set_fpu_opts(struct cpuinfo_mips *c)
+{
+ c->fpu_id = cpu_get_fpu_id();
+ mips_nofpu_msk31 = c->fpu_msk31;
+
+ if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 |
+ MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 |
+ MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5 |
+ MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) {
+ if (c->fpu_id & MIPS_FPIR_3D)
+ c->ases |= MIPS_ASE_MIPS3D;
+ if (c->fpu_id & MIPS_FPIR_UFRP)
+ c->options |= MIPS_CPU_UFR;
+ if (c->fpu_id & MIPS_FPIR_FREP)
+ c->options |= MIPS_CPU_FRE;
+ }
+
+ cpu_set_fpu_fcsr_mask(c);
+ cpu_set_fpu_2008(c);
+ cpu_set_nan_2008(c);
+}
+
+/*
+ * Set options for the FPU emulator.
+ */
+void cpu_set_nofpu_opts(struct cpuinfo_mips *c)
+{
+ c->options &= ~MIPS_CPU_FPU;
+ c->fpu_msk31 = mips_nofpu_msk31;
+
+ cpu_set_nofpu_2008(c);
+ cpu_set_nan_2008(c);
+ cpu_set_nofpu_id(c);
+}
+
+int mips_fpu_disabled;
+
+static int __init fpu_disable(char *s)
+{
+ cpu_set_nofpu_opts(&boot_cpu_data);
+ mips_fpu_disabled = 1;
+
+ return 1;
+}
+
+__setup("nofpu", fpu_disable);
+
diff --git a/arch/mips/kernel/fpu-probe.h b/arch/mips/kernel/fpu-probe.h
new file mode 100644
index 000000000000..951ce50890d0
--- /dev/null
+++ b/arch/mips/kernel/fpu-probe.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <linux/kernel.h>
+
+#include <asm/cpu.h>
+#include <asm/cpu-info.h>
+
+#ifdef CONFIG_MIPS_FP_SUPPORT
+
+extern int mips_fpu_disabled;
+
+int __cpu_has_fpu(void);
+void cpu_set_fpu_opts(struct cpuinfo_mips *c);
+void cpu_set_nofpu_opts(struct cpuinfo_mips *c);
+
+#else /* !CONFIG_MIPS_FP_SUPPORT */
+
+#define mips_fpu_disabled 1
+
+static inline unsigned long cpu_get_fpu_id(void)
+{
+ return FPIR_IMP_NONE;
+}
+
+static inline int __cpu_has_fpu(void)
+{
+ return 0;
+}
+
+static inline void cpu_set_fpu_opts(struct cpuinfo_mips *c)
+{
+ /* no-op */
+}
+
+static inline void cpu_set_nofpu_opts(struct cpuinfo_mips *c)
+{
+ /* no-op */
+}
+
+#endif /* CONFIG_MIPS_FP_SUPPORT */
diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c
index 2625232bfe52..f57e68f40a34 100644
--- a/arch/mips/kernel/ftrace.c
+++ b/arch/mips/kernel/ftrace.c
@@ -37,10 +37,6 @@ void arch_ftrace_update_code(int command)
ftrace_modify_all_code(command);
}
-#endif
-
-#ifdef CONFIG_DYNAMIC_FTRACE
-
#define JAL 0x0c000000 /* jump & link: ip --> ra, jump to target */
#define ADDR_MASK 0x03ffffff /* op_code|addr : 31...26|25 ....0 */
#define JUMP_RANGE_MASK ((1UL << 28) - 1)
diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S
index 7dd234e788e6..61b73580b877 100644
--- a/arch/mips/kernel/head.S
+++ b/arch/mips/kernel/head.S
@@ -35,7 +35,7 @@
.macro setup_c0_status set clr
.set push
mfc0 t0, CP0_STATUS
- or t0, ST0_CU0|\set|0x1f|\clr
+ or t0, ST0_KERNEL_CUMASK|\set|0x1f|\clr
xor t0, 0x1f|\clr
mtc0 t0, CP0_STATUS
.set noreorder
diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c
index 1a08428eedcf..6c590ef27648 100644
--- a/arch/mips/kernel/mips-mt-fpaff.c
+++ b/arch/mips/kernel/mips-mt-fpaff.c
@@ -167,7 +167,7 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len,
return -EINVAL;
get_online_cpus();
- read_lock(&tasklist_lock);
+ rcu_read_lock();
retval = -ESRCH;
p = find_process_by_pid(pid);
@@ -181,7 +181,7 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len,
cpumask_and(&mask, &allowed, cpu_active_mask);
out_unlock:
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
put_online_cpus();
if (retval)
return retval;
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index f5dc316a826a..75ebd8d7bd5d 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -52,6 +52,7 @@
#include <asm/inst.h>
#include <asm/stacktrace.h>
#include <asm/irq_regs.h>
+#include <asm/exec.h>
#ifdef CONFIG_HOTPLUG_CPU
void arch_cpu_idle_dead(void)
@@ -68,7 +69,7 @@ void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp)
unsigned long status;
/* New thread loses kernel privileges. */
- status = regs->cp0_status & ~(ST0_CU0|ST0_CU1|ST0_FR|KU_MASK);
+ status = regs->cp0_status & ~(ST0_CU0|ST0_CU1|ST0_CU2|ST0_FR|KU_MASK);
status |= KU_USER;
regs->cp0_status = status;
lose_fpu(0);
@@ -133,7 +134,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
childregs = (struct pt_regs *) childksp - 1;
/* Put the stack after the struct pt_regs. */
childksp = (unsigned long) childregs;
- p->thread.cp0_status = read_c0_status() & ~(ST0_CU2|ST0_CU1);
+ p->thread.cp0_status = (read_c0_status() & ~(ST0_CU2|ST0_CU1)) | ST0_KERNEL_CUMASK;
if (unlikely(p->flags & PF_KTHREAD)) {
/* kernel thread */
unsigned long status = p->thread.cp0_status;
@@ -279,7 +280,21 @@ static inline int is_ra_save_ins(union mips_instruction *ip, int *poff)
*poff = ip->i_format.simmediate / sizeof(ulong);
return 1;
}
-
+#ifdef CONFIG_CPU_LOONGSON64
+ if ((ip->loongson3_lswc2_format.opcode == swc2_op) &&
+ (ip->loongson3_lswc2_format.ls == 1) &&
+ (ip->loongson3_lswc2_format.fr == 0) &&
+ (ip->loongson3_lswc2_format.base == 29)) {
+ if (ip->loongson3_lswc2_format.rt == 31) {
+ *poff = ip->loongson3_lswc2_format.offset << 1;
+ return 1;
+ }
+ if (ip->loongson3_lswc2_format.rq == 31) {
+ *poff = (ip->loongson3_lswc2_format.offset << 1) + 1;
+ return 1;
+ }
+ }
+#endif
return 0;
#endif
}
diff --git a/arch/mips/kernel/prom.c b/arch/mips/kernel/prom.c
index 9e50dc8df2f6..6abebd57b218 100644
--- a/arch/mips/kernel/prom.c
+++ b/arch/mips/kernel/prom.c
@@ -36,31 +36,6 @@ char *mips_get_machine_name(void)
}
#ifdef CONFIG_USE_OF
-void __init early_init_dt_add_memory_arch(u64 base, u64 size)
-{
- if (base >= PHYS_ADDR_MAX) {
- pr_warn("Trying to add an invalid memory region, skipped\n");
- return;
- }
-
- /* Truncate the passed memory region instead of type casting */
- if (base + size - 1 >= PHYS_ADDR_MAX || base + size < base) {
- pr_warn("Truncate memory region %llx @ %llx to size %llx\n",
- size, base, PHYS_ADDR_MAX - base);
- size = PHYS_ADDR_MAX - base;
- }
-
- add_memory_region(base, size, BOOT_MEM_RAM);
-}
-
-int __init early_init_dt_reserve_memory_arch(phys_addr_t base,
- phys_addr_t size, bool nomap)
-{
- add_memory_region(base, size,
- nomap ? BOOT_MEM_NOMAP : BOOT_MEM_RESERVED);
-
- return 0;
-}
void __init __dt_setup_arch(void *bph)
{
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index ee8636ccded2..fccdbe2e7c2b 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -91,45 +91,6 @@ unsigned long ARCH_PFN_OFFSET;
EXPORT_SYMBOL(ARCH_PFN_OFFSET);
#endif
-void __init add_memory_region(phys_addr_t start, phys_addr_t size, long type)
-{
- /*
- * Note: This function only exists for historical reason,
- * new code should use memblock_add or memblock_add_node instead.
- */
-
- /*
- * If the region reaches the top of the physical address space, adjust
- * the size slightly so that (start + size) doesn't overflow
- */
- if (start + size - 1 == PHYS_ADDR_MAX)
- --size;
-
- /* Sanity check */
- if (start + size < start) {
- pr_warn("Trying to add an invalid memory region, skipped\n");
- return;
- }
-
- if (start < PHYS_OFFSET)
- return;
-
- memblock_add(start, size);
- /* Reserve any memory except the ordinary RAM ranges. */
- switch (type) {
- case BOOT_MEM_RAM:
- break;
-
- case BOOT_MEM_NOMAP: /* Discard the range from the system. */
- memblock_remove(start, size);
- break;
-
- default: /* Reserve the rest of the memory types at boot time */
- memblock_reserve(start, size);
- break;
- }
-}
-
void __init detect_memory_region(phys_addr_t start, phys_addr_t sz_min, phys_addr_t sz_max)
{
void *dm = &detect_magic;
@@ -146,7 +107,7 @@ void __init detect_memory_region(phys_addr_t start, phys_addr_t sz_min, phys_add
((unsigned long long) sz_min) / SZ_1M,
((unsigned long long) sz_max) / SZ_1M);
- add_memory_region(start, size, BOOT_MEM_RAM);
+ memblock_add(start, size);
}
/*
@@ -396,7 +357,7 @@ static int __init early_parse_mem(char *p)
if (*p == '@')
start = memparse(p + 1, &p);
- add_memory_region(start, size, BOOT_MEM_RAM);
+ memblock_add(start, size);
return 0;
}
@@ -422,13 +383,14 @@ static int __init early_parse_memmap(char *p)
if (*p == '@') {
start_at = memparse(p+1, &p);
- add_memory_region(start_at, mem_size, BOOT_MEM_RAM);
+ memblock_add(start_at, mem_size);
} else if (*p == '#') {
pr_err("\"memmap=nn#ss\" (force ACPI data) invalid on MIPS\n");
return -EINVAL;
} else if (*p == '$') {
start_at = memparse(p+1, &p);
- add_memory_region(start_at, mem_size, BOOT_MEM_RESERVED);
+ memblock_add(start_at, mem_size);
+ memblock_reserve(start_at, mem_size);
} else {
pr_err("\"memmap\" invalid format!\n");
return -EINVAL;
@@ -443,7 +405,7 @@ static int __init early_parse_memmap(char *p)
early_param("memmap", early_parse_memmap);
#ifdef CONFIG_PROC_VMCORE
-unsigned long setup_elfcorehdr, setup_elfcorehdr_size;
+static unsigned long setup_elfcorehdr, setup_elfcorehdr_size;
static int __init early_parse_elfcorehdr(char *p)
{
phys_addr_t start, end;
@@ -472,6 +434,11 @@ early_param("elfcorehdr", early_parse_elfcorehdr);
#endif
#ifdef CONFIG_KEXEC
+
+/* 64M alignment for crash kernel regions */
+#define CRASH_ALIGN SZ_64M
+#define CRASH_ADDR_MAX SZ_512M
+
static void __init mips_parse_crashkernel(void)
{
unsigned long long total_mem;
@@ -484,9 +451,22 @@ static void __init mips_parse_crashkernel(void)
if (ret != 0 || crash_size <= 0)
return;
- if (!memblock_find_in_range(crash_base, crash_base + crash_size, crash_size, 1)) {
- pr_warn("Invalid memory region reserved for crash kernel\n");
- return;
+ if (crash_base <= 0) {
+ crash_base = memblock_find_in_range(CRASH_ALIGN, CRASH_ADDR_MAX,
+ crash_size, CRASH_ALIGN);
+ if (!crash_base) {
+ pr_warn("crashkernel reservation failed - No suitable area found.\n");
+ return;
+ }
+ } else {
+ unsigned long long start;
+
+ start = memblock_find_in_range(crash_base, crash_base + crash_size,
+ crash_size, 1);
+ if (start != crash_base) {
+ pr_warn("Invalid memory region reserved for crash kernel\n");
+ return;
+ }
}
crashk_res.start = crash_base;
@@ -621,7 +601,7 @@ static void __init bootcmdline_init(void)
* arch_mem_init - initialize memory management subsystem
*
* o plat_mem_setup() detects the memory configuration and will record detected
- * memory areas using add_memory_region.
+ * memory areas using memblock_add.
*
* At this stage the memory configuration of the system is known to the
* kernel but generic memory management system is still entirely uninitialized.
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index a0262729cd4c..f44265025281 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -545,6 +545,12 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
return err ?: protected_restore_fp_context(sc);
}
+#ifdef CONFIG_WAR_ICACHE_REFILLS
+#define SIGMASK ~(cpu_icache_line_size()-1)
+#else
+#define SIGMASK ALMASK
+#endif
+
void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
size_t frame_size)
{
@@ -565,7 +571,7 @@ void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
sp = sigsp(sp, ksig);
- return (void __user *)((sp - frame_size) & (ICACHE_REFILLS_WORKAROUND_WAR ? ~(cpu_icache_line_size()-1) : ALMASK));
+ return (void __user *)((sp - frame_size) & SIGMASK);
}
/*
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index c333e5788664..2afa3eef486a 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -106,7 +106,7 @@ static inline int mips_atomic_set(unsigned long addr, unsigned long new)
if (unlikely(!access_ok((const void __user *)addr, 4)))
return -EINVAL;
- if (cpu_has_llsc && R10000_LLSC_WAR) {
+ if (cpu_has_llsc && IS_ENABLED(CONFIG_WAR_R10000_LLSC)) {
__asm__ __volatile__ (
" .set push \n"
" .set arch=r4000 \n"
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index cf72a0206a87..32817c954435 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -378,3 +378,4 @@
437 n32 openat2 sys_openat2
438 n32 pidfd_getfd sys_pidfd_getfd
439 n32 faccessat2 sys_faccessat2
+440 n32 process_madvise sys_process_madvise
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index 557f9954a2b9..9e4ea3c31b1c 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -354,3 +354,4 @@
437 n64 openat2 sys_openat2
438 n64 pidfd_getfd sys_pidfd_getfd
439 n64 faccessat2 sys_faccessat2
+440 n64 process_madvise sys_process_madvise
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index a17aab5abeb2..29f5f28cf5ce 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -427,3 +427,4 @@
437 o32 openat2 sys_openat2
438 o32 pidfd_getfd sys_pidfd_getfd
439 o32 faccessat2 sys_faccessat2
+440 o32 process_madvise sys_process_madvise
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index cf788591f091..e0352958e2f7 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -2204,7 +2204,7 @@ static void configure_status(void)
* flag that some firmware may have left set and the TS bit (for
* IP27). Set XX for ISA IV code to work.
*/
- unsigned int status_set = ST0_CU0;
+ unsigned int status_set = ST0_KERNEL_CUMASK;
#ifdef CONFIG_64BIT
status_set |= ST0_FR|ST0_KX|ST0_SX|ST0_UX;
#endif
diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c
index b10342018d19..917fac1636b7 100644
--- a/arch/mips/lantiq/xway/sysctrl.c
+++ b/arch/mips/lantiq/xway/sysctrl.c
@@ -112,11 +112,15 @@ static u32 pmu_clk_cr_b[] = {
#define PMU_PPE_DP BIT(23)
#define PMU_PPE_DPLUS BIT(24)
#define PMU_USB1_P BIT(26)
+#define PMU_GPHY3 BIT(26) /* grx390 */
#define PMU_USB1 BIT(27)
#define PMU_SWITCH BIT(28)
#define PMU_PPE_TOP BIT(29)
+#define PMU_GPHY0 BIT(29) /* ar10, xrx390 */
#define PMU_GPHY BIT(30)
+#define PMU_GPHY1 BIT(30) /* ar10, xrx390 */
#define PMU_PCIE_CLK BIT(31)
+#define PMU_GPHY2 BIT(31) /* ar10, xrx390 */
#define PMU1_PCIE_PHY BIT(0) /* vr9-specific,moved in ar10/grx390 */
#define PMU1_PCIE_CTL BIT(1)
@@ -465,6 +469,9 @@ void __init ltq_soc_init(void)
if (of_machine_is_compatible("lantiq,grx390") ||
of_machine_is_compatible("lantiq,ar10")) {
+ clkdev_add_pmu("1e108000.switch", "gphy0", 0, 0, PMU_GPHY0);
+ clkdev_add_pmu("1e108000.switch", "gphy1", 0, 0, PMU_GPHY1);
+ clkdev_add_pmu("1e108000.switch", "gphy2", 0, 0, PMU_GPHY2);
clkdev_add_pmu("1f203018.usb2-phy", "phy", 1, 2, PMU_ANALOG_USB0_P);
clkdev_add_pmu("1f203034.usb2-phy", "phy", 1, 2, PMU_ANALOG_USB1_P);
/* rc 0 */
@@ -496,6 +503,7 @@ void __init ltq_soc_init(void)
} else if (of_machine_is_compatible("lantiq,grx390")) {
clkdev_add_static(ltq_grx390_cpu_hz(), ltq_grx390_fpi_hz(),
ltq_grx390_fpi_hz(), ltq_grx390_pp32_hz());
+ clkdev_add_pmu("1e108000.switch", "gphy3", 0, 0, PMU_GPHY3);
clkdev_add_pmu("1e101000.usb", "otg", 1, 0, PMU_USB0);
clkdev_add_pmu("1e106000.usb", "otg", 1, 0, PMU_USB1);
/* rc 2 */
@@ -514,8 +522,6 @@ void __init ltq_soc_init(void)
clkdev_add_pmu("1e10b308.eth", NULL, 0, 0, PMU_SWITCH |
PMU_PPE_DP | PMU_PPE_TC);
clkdev_add_pmu("1da00000.usif", "NULL", 1, 0, PMU_USIF);
- clkdev_add_pmu("1e108000.switch", "gphy0", 0, 0, PMU_GPHY);
- clkdev_add_pmu("1e108000.switch", "gphy1", 0, 0, PMU_GPHY);
clkdev_add_pmu("1e103100.deu", NULL, 1, 0, PMU_DEU);
clkdev_add_pmu("1e116000.mei", "afe", 1, 2, PMU_ANALOG_DSL_AFE);
clkdev_add_pmu("1e116000.mei", "dfe", 1, 0, PMU_DFE);
diff --git a/arch/mips/loongson2ef/common/mem.c b/arch/mips/loongson2ef/common/mem.c
index ae21f1c62baa..057d58bb470e 100644
--- a/arch/mips/loongson2ef/common/mem.c
+++ b/arch/mips/loongson2ef/common/mem.c
@@ -17,10 +17,7 @@ u32 memsize, highmemsize;
void __init prom_init_memory(void)
{
- add_memory_region(0x0, (memsize << 20), BOOT_MEM_RAM);
-
- add_memory_region(memsize << 20, LOONGSON_PCI_MEM_START - (memsize <<
- 20), BOOT_MEM_RESERVED);
+ memblock_add(0x0, (memsize << 20));
#ifdef CONFIG_CPU_SUPPORTS_ADDRWINCFG
{
@@ -41,12 +38,7 @@ void __init prom_init_memory(void)
#ifdef CONFIG_64BIT
if (highmemsize > 0)
- add_memory_region(LOONGSON_HIGHMEM_START,
- highmemsize << 20, BOOT_MEM_RAM);
-
- add_memory_region(LOONGSON_PCI_MEM_END + 1, LOONGSON_HIGHMEM_START -
- LOONGSON_PCI_MEM_END - 1, BOOT_MEM_RESERVED);
-
+ memblock_add(LOONGSON_HIGHMEM_START, highmemsize << 20);
#endif /* !CONFIG_64BIT */
}
diff --git a/arch/mips/loongson32/common/prom.c b/arch/mips/loongson32/common/prom.c
index fd76114fa3b0..c133b5adf34e 100644
--- a/arch/mips/loongson32/common/prom.c
+++ b/arch/mips/loongson32/common/prom.c
@@ -7,8 +7,8 @@
#include <linux/io.h>
#include <linux/init.h>
+#include <linux/memblock.h>
#include <linux/serial_reg.h>
-#include <asm/bootinfo.h>
#include <asm/fw/fw.h>
#include <loongson1.h>
@@ -42,5 +42,5 @@ void __init prom_free_prom_memory(void)
void __init plat_mem_setup(void)
{
- add_memory_region(0x0, (memsize << 20), BOOT_MEM_RAM);
+ memblock_add(0x0, (memsize << 20));
}
diff --git a/arch/mips/loongson64/numa.c b/arch/mips/loongson64/numa.c
index ea8bb1bc667e..cf9459f79f9b 100644
--- a/arch/mips/loongson64/numa.c
+++ b/arch/mips/loongson64/numa.c
@@ -98,27 +98,6 @@ static void __init init_topology_matrix(void)
}
}
-static unsigned long nid_to_addroffset(unsigned int nid)
-{
- unsigned long result;
- switch (nid) {
- case 0:
- default:
- result = NODE0_ADDRSPACE_OFFSET;
- break;
- case 1:
- result = NODE1_ADDRSPACE_OFFSET;
- break;
- case 2:
- result = NODE2_ADDRSPACE_OFFSET;
- break;
- case 3:
- result = NODE3_ADDRSPACE_OFFSET;
- break;
- }
- return result;
-}
-
static void __init szmem(unsigned int node)
{
u32 i, mem_type;
@@ -146,7 +125,7 @@ static void __init szmem(unsigned int node)
pr_info(" start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n",
start_pfn, end_pfn, num_physpages);
memblock_add_node(PFN_PHYS(start_pfn),
- PFN_PHYS(end_pfn - start_pfn), node);
+ PFN_PHYS(node_psize), node);
break;
case SYSTEM_RAM_HIGH:
start_pfn = ((node_id << 44) + mem_start) >> PAGE_SHIFT;
@@ -158,7 +137,7 @@ static void __init szmem(unsigned int node)
pr_info(" start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n",
start_pfn, end_pfn, num_physpages);
memblock_add_node(PFN_PHYS(start_pfn),
- PFN_PHYS(end_pfn - start_pfn), node);
+ PFN_PHYS(node_psize), node);
break;
case SYSTEM_RAM_RESERVED:
pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx MB\n",
@@ -175,7 +154,7 @@ static void __init node_mem_init(unsigned int node)
unsigned long node_addrspace_offset;
unsigned long start_pfn, end_pfn;
- node_addrspace_offset = nid_to_addroffset(node);
+ node_addrspace_offset = nid_to_addrbase(node);
pr_info("Node%d's addrspace_offset is 0x%lx\n",
node, node_addrspace_offset);
@@ -242,9 +221,7 @@ void __init paging_init(void)
unsigned long zones_size[MAX_NR_ZONES] = {0, };
pagetable_init();
-#ifdef CONFIG_ZONE_DMA32
zones_size[ZONE_DMA32] = MAX_DMA32_PFN;
-#endif
zones_size[ZONE_NORMAL] = max_low_pfn;
free_area_init(zones_size);
}
diff --git a/arch/mips/loongson64/reset.c b/arch/mips/loongson64/reset.c
index bc7671079f0c..3bb8a1ed9348 100644
--- a/arch/mips/loongson64/reset.c
+++ b/arch/mips/loongson64/reset.c
@@ -15,11 +15,6 @@
#include <loongson.h>
#include <boot_param.h>
-static inline void loongson_reboot(void)
-{
- ((void (*)(void))ioremap(LOONGSON_BOOT_BASE, 4)) ();
-}
-
static void loongson_restart(char *command)
{
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 0ef717093262..9cede7ce37e6 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -130,9 +130,10 @@ struct bcache_ops *bcops = &no_sc_ops;
#define R4600_HIT_CACHEOP_WAR_IMPL \
do { \
- if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) \
+ if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) && \
+ cpu_is_r4600_v2_x()) \
*(volatile unsigned long *)CKSEG1; \
- if (R4600_V1_HIT_CACHEOP_WAR) \
+ if (IS_ENABLED(CONFIG_WAR_R4600_V1_HIT_CACHEOP)) \
__asm__ __volatile__("nop;nop;nop;nop"); \
} while (0)
@@ -238,7 +239,7 @@ static void r4k_blast_dcache_setup(void)
r4k_blast_dcache = blast_dcache128;
}
-/* force code alignment (used for TX49XX_ICACHE_INDEX_INV_WAR) */
+/* force code alignment (used for CONFIG_WAR_TX49XX_ICACHE_INDEX_INV) */
#define JUMP_TO_ALIGN(order) \
__asm__ __volatile__( \
"b\t1f\n\t" \
@@ -366,10 +367,11 @@ static void r4k_blast_icache_page_indexed_setup(void)
else if (ic_lsize == 16)
r4k_blast_icache_page_indexed = blast_icache16_page_indexed;
else if (ic_lsize == 32) {
- if (R4600_V1_INDEX_ICACHEOP_WAR && cpu_is_r4600_v1_x())
+ if (IS_ENABLED(CONFIG_WAR_R4600_V1_INDEX_ICACHEOP) &&
+ cpu_is_r4600_v1_x())
r4k_blast_icache_page_indexed =
blast_icache32_r4600_v1_page_indexed;
- else if (TX49XX_ICACHE_INDEX_INV_WAR)
+ else if (IS_ENABLED(CONFIG_WAR_TX49XX_ICACHE_INDEX_INV))
r4k_blast_icache_page_indexed =
tx49_blast_icache32_page_indexed;
else if (current_cpu_type() == CPU_LOONGSON2EF)
@@ -394,9 +396,10 @@ static void r4k_blast_icache_setup(void)
else if (ic_lsize == 16)
r4k_blast_icache = blast_icache16;
else if (ic_lsize == 32) {
- if (R4600_V1_INDEX_ICACHEOP_WAR && cpu_is_r4600_v1_x())
+ if (IS_ENABLED(CONFIG_WAR_R4600_V1_INDEX_ICACHEOP) &&
+ cpu_is_r4600_v1_x())
r4k_blast_icache = blast_r4600_v1_icache32;
- else if (TX49XX_ICACHE_INDEX_INV_WAR)
+ else if (IS_ENABLED(CONFIG_WAR_TX49XX_ICACHE_INDEX_INV))
r4k_blast_icache = tx49_blast_icache32;
else if (current_cpu_type() == CPU_LOONGSON2EF)
r4k_blast_icache = loongson2_blast_icache32;
diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c
index cd805b005509..504bc4047c4c 100644
--- a/arch/mips/mm/page.c
+++ b/arch/mips/mm/page.c
@@ -250,14 +250,16 @@ static inline void build_clear_pref(u32 **buf, int off)
if (cpu_has_cache_cdex_s) {
uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
} else if (cpu_has_cache_cdex_p) {
- if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) {
+ if (IS_ENABLED(CONFIG_WAR_R4600_V1_HIT_CACHEOP) &&
+ cpu_is_r4600_v1_x()) {
uasm_i_nop(buf);
uasm_i_nop(buf);
uasm_i_nop(buf);
uasm_i_nop(buf);
}
- if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
+ if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) &&
+ cpu_is_r4600_v2_x())
uasm_i_lw(buf, ZERO, ZERO, AT);
uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
@@ -302,7 +304,7 @@ void build_clear_page(void)
else
uasm_i_ori(&buf, A2, A0, off);
- if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
+ if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) && cpu_is_r4600_v2_x())
uasm_i_lui(&buf, AT, uasm_rel_hi(0xa0000000));
off = cache_line_size ? min(8, pref_bias_clear_store / cache_line_size)
@@ -402,14 +404,16 @@ static inline void build_copy_store_pref(u32 **buf, int off)
if (cpu_has_cache_cdex_s) {
uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
} else if (cpu_has_cache_cdex_p) {
- if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) {
+ if (IS_ENABLED(CONFIG_WAR_R4600_V1_HIT_CACHEOP) &&
+ cpu_is_r4600_v1_x()) {
uasm_i_nop(buf);
uasm_i_nop(buf);
uasm_i_nop(buf);
uasm_i_nop(buf);
}
- if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
+ if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) &&
+ cpu_is_r4600_v2_x())
uasm_i_lw(buf, ZERO, ZERO, AT);
uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
@@ -453,7 +457,7 @@ void build_copy_page(void)
else
uasm_i_ori(&buf, A2, A0, off);
- if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
+ if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) && cpu_is_r4600_v2_x())
uasm_i_lui(&buf, AT, uasm_rel_hi(0xa0000000));
off = cache_line_size ? min(8, pref_bias_copy_load / cache_line_size) *
diff --git a/arch/mips/mm/sc-mips.c b/arch/mips/mm/sc-mips.c
index 97dc0511e63f..dd0a5becaabd 100644
--- a/arch/mips/mm/sc-mips.c
+++ b/arch/mips/mm/sc-mips.c
@@ -228,6 +228,7 @@ static inline int __init mips_sc_probe(void)
* contradicted by all documentation.
*/
case MACH_INGENIC_JZ4770:
+ case MACH_INGENIC_JZ4775:
c->scache.ways = 4;
break;
@@ -236,6 +237,7 @@ static inline int __init mips_sc_probe(void)
* but that is contradicted by all documentation.
*/
case MACH_INGENIC_X1000:
+ case MACH_INGENIC_X1000E:
c->scache.sets = 256;
c->scache.ways = 4;
break;
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 14f8ba93367f..a7521b8f7658 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -83,14 +83,18 @@ static inline int r4k_250MHZhwbug(void)
return 0;
}
+extern int sb1250_m3_workaround_needed(void);
+
static inline int __maybe_unused bcm1250_m3_war(void)
{
- return BCM1250_M3_WAR;
+ if (IS_ENABLED(CONFIG_SB1_PASS_2_WORKAROUNDS))
+ return sb1250_m3_workaround_needed();
+ return 0;
}
static inline int __maybe_unused r10000_llsc_war(void)
{
- return R10000_LLSC_WAR;
+ return IS_ENABLED(CONFIG_WAR_R10000_LLSC);
}
static int use_bbit_insns(void)
diff --git a/arch/mips/mm/uasm.c b/arch/mips/mm/uasm.c
index c56f129c9a4b..81dd226d6b6b 100644
--- a/arch/mips/mm/uasm.c
+++ b/arch/mips/mm/uasm.c
@@ -394,7 +394,7 @@ I_u2u1u3(_lddir)
void uasm_i_pref(u32 **buf, unsigned int a, signed int b,
unsigned int c)
{
- if (CAVIUM_OCTEON_DCACHE_PREFETCH_WAR && a <= 24 && a != 5)
+ if (OCTEON_IS_MODEL(OCTEON_CN6XXX) && a <= 24 && a != 5)
/*
* As per erratum Core-14449, replace prefetches 0-4,
* 6-24 with 'pref 28'.
diff --git a/arch/mips/mti-malta/malta-setup.c b/arch/mips/mti-malta/malta-setup.c
index c4ad5a9b4bc1..e1fb8b534944 100644
--- a/arch/mips/mti-malta/malta-setup.c
+++ b/arch/mips/mti-malta/malta-setup.c
@@ -16,7 +16,6 @@
#include <asm/dma-coherence.h>
#include <asm/fw/fw.h>
-#include <asm/mach-malta/malta-dtshim.h>
#include <asm/mips-cps.h>
#include <asm/mips-boards/generic.h>
#include <asm/mips-boards/malta.h>
diff --git a/arch/mips/netlogic/xlp/setup.c b/arch/mips/netlogic/xlp/setup.c
index 6e3102bcd2f1..9adc0c1b4ffc 100644
--- a/arch/mips/netlogic/xlp/setup.c
+++ b/arch/mips/netlogic/xlp/setup.c
@@ -89,7 +89,7 @@ static void __init xlp_init_mem_from_bars(void)
if (map[i] > 0x10000000 && map[i] < 0x20000000)
map[i] = 0x20000000;
- add_memory_region(map[i], map[i+1] - map[i], BOOT_MEM_RAM);
+ memblock_add(map[i], map[i+1] - map[i]);
}
}
diff --git a/arch/mips/netlogic/xlr/setup.c b/arch/mips/netlogic/xlr/setup.c
index 72ceddc9a03f..627e88101316 100644
--- a/arch/mips/netlogic/xlr/setup.c
+++ b/arch/mips/netlogic/xlr/setup.c
@@ -34,6 +34,7 @@
#include <linux/kernel.h>
#include <linux/serial_8250.h>
+#include <linux/memblock.h>
#include <linux/pm.h>
#include <asm/idle.h>
@@ -149,7 +150,7 @@ static void prom_add_memory(void)
bootm = (void *)(long)nlm_prom_info.psb_mem_map;
for (i = 0; i < bootm->nr_map; i++) {
- if (bootm->map[i].type != BOOT_MEM_RAM)
+ if (bootm->map[i].type != NLM_BOOT_MEM_RAM)
continue;
start = bootm->map[i].addr;
size = bootm->map[i].size;
@@ -158,7 +159,7 @@ static void prom_add_memory(void)
if (i == 0 && start == 0 && size == 0x0c000000)
size = 0x0ff00000;
- add_memory_region(start, size - pref_backup, BOOT_MEM_RAM);
+ memblock_add(start, size - pref_backup);
}
}
diff --git a/arch/mips/pci/pci-ar2315.c b/arch/mips/pci/pci-ar2315.c
index cef4a47ab063..0b15730cef88 100644
--- a/arch/mips/pci/pci-ar2315.c
+++ b/arch/mips/pci/pci-ar2315.c
@@ -423,9 +423,8 @@ static int ar2315_pci_probe(struct platform_device *pdev)
return -EINVAL;
apc->irq = irq;
- res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
- "ar2315-pci-ctrl");
- apc->mmr_mem = devm_ioremap_resource(dev, res);
+ apc->mmr_mem = devm_platform_ioremap_resource_byname(pdev,
+ "ar2315-pci-ctrl");
if (IS_ERR(apc->mmr_mem))
return PTR_ERR(apc->mmr_mem);
diff --git a/arch/mips/pci/pci-ar71xx.c b/arch/mips/pci/pci-ar71xx.c
index a9f8e7c881bd..118760b3fa82 100644
--- a/arch/mips/pci/pci-ar71xx.c
+++ b/arch/mips/pci/pci-ar71xx.c
@@ -336,8 +336,8 @@ static int ar71xx_pci_probe(struct platform_device *pdev)
if (!apc)
return -ENOMEM;
- res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "cfg_base");
- apc->cfg_base = devm_ioremap_resource(&pdev->dev, res);
+ apc->cfg_base = devm_platform_ioremap_resource_byname(pdev,
+ "cfg_base");
if (IS_ERR(apc->cfg_base))
return PTR_ERR(apc->cfg_base);
diff --git a/arch/mips/pci/pci-ar724x.c b/arch/mips/pci/pci-ar724x.c
index 869d5c9a2f8d..807558b251ef 100644
--- a/arch/mips/pci/pci-ar724x.c
+++ b/arch/mips/pci/pci-ar724x.c
@@ -372,18 +372,15 @@ static int ar724x_pci_probe(struct platform_device *pdev)
if (!apc)
return -ENOMEM;
- res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ctrl_base");
- apc->ctrl_base = devm_ioremap_resource(&pdev->dev, res);
+ apc->ctrl_base = devm_platform_ioremap_resource_byname(pdev, "ctrl_base");
if (IS_ERR(apc->ctrl_base))
return PTR_ERR(apc->ctrl_base);
- res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "cfg_base");
- apc->devcfg_base = devm_ioremap_resource(&pdev->dev, res);
+ apc->devcfg_base = devm_platform_ioremap_resource_byname(pdev, "cfg_base");
if (IS_ERR(apc->devcfg_base))
return PTR_ERR(apc->devcfg_base);
- res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "crp_base");
- apc->crp_base = devm_ioremap_resource(&pdev->dev, res);
+ apc->crp_base = devm_platform_ioremap_resource_byname(pdev, "crp_base");
if (IS_ERR(apc->crp_base))
return PTR_ERR(apc->crp_base);
diff --git a/arch/mips/pnx833x/Makefile b/arch/mips/pnx833x/Makefile
deleted file mode 100644
index 927268a58237..000000000000
--- a/arch/mips/pnx833x/Makefile
+++ /dev/null
@@ -1,4 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_SOC_PNX833X) += common/
-obj-$(CONFIG_NXP_STB220) += stb22x/
-obj-$(CONFIG_NXP_STB225) += stb22x/
diff --git a/arch/mips/pnx833x/Platform b/arch/mips/pnx833x/Platform
deleted file mode 100644
index e5286a49fc3e..000000000000
--- a/arch/mips/pnx833x/Platform
+++ /dev/null
@@ -1,4 +0,0 @@
-# NXP STB225
-cflags-$(CONFIG_SOC_PNX833X) += -I$(srctree)/arch/mips/include/asm/mach-pnx833x
-load-$(CONFIG_NXP_STB220) += 0xffffffff80001000
-load-$(CONFIG_NXP_STB225) += 0xffffffff80001000
diff --git a/arch/mips/pnx833x/common/Makefile b/arch/mips/pnx833x/common/Makefile
deleted file mode 100644
index 9b4d394112b0..000000000000
--- a/arch/mips/pnx833x/common/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-obj-y := interrupts.o platform.o prom.o setup.o reset.o
diff --git a/arch/mips/pnx833x/common/interrupts.c b/arch/mips/pnx833x/common/interrupts.c
deleted file mode 100644
index 2fbbabcac386..000000000000
--- a/arch/mips/pnx833x/common/interrupts.c
+++ /dev/null
@@ -1,303 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * interrupts.c: Interrupt mappings for PNX833X.
- *
- * Copyright 2008 NXP Semiconductors
- * Chris Steel <chris.steel@nxp.com>
- * Daniel Laird <daniel.j.laird@nxp.com>
- */
-#include <linux/kernel.h>
-#include <linux/irq.h>
-#include <linux/hardirq.h>
-#include <linux/interrupt.h>
-#include <asm/mipsregs.h>
-#include <asm/irq_cpu.h>
-#include <asm/setup.h>
-#include <irq.h>
-#include <irq-mapping.h>
-#include <gpio.h>
-
-static int mips_cpu_timer_irq;
-
-static const unsigned int irq_prio[PNX833X_PIC_NUM_IRQ] =
-{
- 0, /* unused */
- 4, /* PNX833X_PIC_I2C0_INT 1 */
- 4, /* PNX833X_PIC_I2C1_INT 2 */
- 1, /* PNX833X_PIC_UART0_INT 3 */
- 1, /* PNX833X_PIC_UART1_INT 4 */
- 6, /* PNX833X_PIC_TS_IN0_DV_INT 5 */
- 6, /* PNX833X_PIC_TS_IN0_DMA_INT 6 */
- 7, /* PNX833X_PIC_GPIO_INT 7 */
- 4, /* PNX833X_PIC_AUDIO_DEC_INT 8 */
- 5, /* PNX833X_PIC_VIDEO_DEC_INT 9 */
- 4, /* PNX833X_PIC_CONFIG_INT 10 */
- 4, /* PNX833X_PIC_AOI_INT 11 */
- 9, /* PNX833X_PIC_SYNC_INT 12 */
- 9, /* PNX8335_PIC_SATA_INT 13 */
- 4, /* PNX833X_PIC_OSD_INT 14 */
- 9, /* PNX833X_PIC_DISP1_INT 15 */
- 4, /* PNX833X_PIC_DEINTERLACER_INT 16 */
- 9, /* PNX833X_PIC_DISPLAY2_INT 17 */
- 4, /* PNX833X_PIC_VC_INT 18 */
- 4, /* PNX833X_PIC_SC_INT 19 */
- 9, /* PNX833X_PIC_IDE_INT 20 */
- 9, /* PNX833X_PIC_IDE_DMA_INT 21 */
- 6, /* PNX833X_PIC_TS_IN1_DV_INT 22 */
- 6, /* PNX833X_PIC_TS_IN1_DMA_INT 23 */
- 4, /* PNX833X_PIC_SGDX_DMA_INT 24 */
- 4, /* PNX833X_PIC_TS_OUT_INT 25 */
- 4, /* PNX833X_PIC_IR_INT 26 */
- 3, /* PNX833X_PIC_VMSP1_INT 27 */
- 3, /* PNX833X_PIC_VMSP2_INT 28 */
- 4, /* PNX833X_PIC_PIBC_INT 29 */
- 4, /* PNX833X_PIC_TS_IN0_TRD_INT 30 */
- 4, /* PNX833X_PIC_SGDX_TPD_INT 31 */
- 5, /* PNX833X_PIC_USB_INT 32 */
- 4, /* PNX833X_PIC_TS_IN1_TRD_INT 33 */
- 4, /* PNX833X_PIC_CLOCK_INT 34 */
- 4, /* PNX833X_PIC_SGDX_PARSER_INT 35 */
- 4, /* PNX833X_PIC_VMSP_DMA_INT 36 */
-#if defined(CONFIG_SOC_PNX8335)
- 4, /* PNX8335_PIC_MIU_INT 37 */
- 4, /* PNX8335_PIC_AVCHIP_IRQ_INT 38 */
- 9, /* PNX8335_PIC_SYNC_HD_INT 39 */
- 9, /* PNX8335_PIC_DISP_HD_INT 40 */
- 9, /* PNX8335_PIC_DISP_SCALER_INT 41 */
- 4, /* PNX8335_PIC_OSD_HD1_INT 42 */
- 4, /* PNX8335_PIC_DTL_WRITER_Y_INT 43 */
- 4, /* PNX8335_PIC_DTL_WRITER_C_INT 44 */
- 4, /* PNX8335_PIC_DTL_EMULATOR_Y_IR_INT 45 */
- 4, /* PNX8335_PIC_DTL_EMULATOR_C_IR_INT 46 */
- 4, /* PNX8335_PIC_DENC_TTX_INT 47 */
- 4, /* PNX8335_PIC_MMI_SIF0_INT 48 */
- 4, /* PNX8335_PIC_MMI_SIF1_INT 49 */
- 4, /* PNX8335_PIC_MMI_CDMMU_INT 50 */
- 4, /* PNX8335_PIC_PIBCS_INT 51 */
- 12, /* PNX8335_PIC_ETHERNET_INT 52 */
- 3, /* PNX8335_PIC_VMSP1_0_INT 53 */
- 3, /* PNX8335_PIC_VMSP1_1_INT 54 */
- 4, /* PNX8335_PIC_VMSP1_DMA_INT 55 */
- 4, /* PNX8335_PIC_TDGR_DE_INT 56 */
- 4, /* PNX8335_PIC_IR1_IRQ_INT 57 */
-#endif
-};
-
-static void pnx833x_timer_dispatch(void)
-{
- do_IRQ(mips_cpu_timer_irq);
-}
-
-static void pic_dispatch(void)
-{
- unsigned int irq = PNX833X_REGFIELD(PIC_INT_SRC, INT_SRC);
-
- if ((irq >= 1) && (irq < (PNX833X_PIC_NUM_IRQ))) {
- unsigned long priority = PNX833X_PIC_INT_PRIORITY;
- PNX833X_PIC_INT_PRIORITY = irq_prio[irq];
-
- if (irq == PNX833X_PIC_GPIO_INT) {
- unsigned long mask = PNX833X_PIO_INT_STATUS & PNX833X_PIO_INT_ENABLE;
- int pin;
- while ((pin = ffs(mask & 0xffff))) {
- pin -= 1;
- do_IRQ(PNX833X_GPIO_IRQ_BASE + pin);
- mask &= ~(1 << pin);
- }
- } else {
- do_IRQ(irq + PNX833X_PIC_IRQ_BASE);
- }
-
- PNX833X_PIC_INT_PRIORITY = priority;
- } else {
- printk(KERN_ERR "plat_irq_dispatch: unexpected irq %u\n", irq);
- }
-}
-
-asmlinkage void plat_irq_dispatch(void)
-{
- unsigned int pending = read_c0_status() & read_c0_cause();
-
- if (pending & STATUSF_IP4)
- pic_dispatch();
- else if (pending & STATUSF_IP7)
- do_IRQ(PNX833X_TIMER_IRQ);
- else
- spurious_interrupt();
-}
-
-static inline void pnx833x_hard_enable_pic_irq(unsigned int irq)
-{
- /* Currently we do this by setting IRQ priority to 1.
- If priority support is being implemented, 1 should be repalced
- by a better value. */
- PNX833X_PIC_INT_REG(irq) = irq_prio[irq];
-}
-
-static inline void pnx833x_hard_disable_pic_irq(unsigned int irq)
-{
- /* Disable IRQ by writing setting it's priority to 0 */
- PNX833X_PIC_INT_REG(irq) = 0;
-}
-
-static DEFINE_RAW_SPINLOCK(pnx833x_irq_lock);
-
-static unsigned int pnx833x_startup_pic_irq(unsigned int irq)
-{
- unsigned long flags;
- unsigned int pic_irq = irq - PNX833X_PIC_IRQ_BASE;
-
- raw_spin_lock_irqsave(&pnx833x_irq_lock, flags);
- pnx833x_hard_enable_pic_irq(pic_irq);
- raw_spin_unlock_irqrestore(&pnx833x_irq_lock, flags);
- return 0;
-}
-
-static void pnx833x_enable_pic_irq(struct irq_data *d)
-{
- unsigned long flags;
- unsigned int pic_irq = d->irq - PNX833X_PIC_IRQ_BASE;
-
- raw_spin_lock_irqsave(&pnx833x_irq_lock, flags);
- pnx833x_hard_enable_pic_irq(pic_irq);
- raw_spin_unlock_irqrestore(&pnx833x_irq_lock, flags);
-}
-
-static void pnx833x_disable_pic_irq(struct irq_data *d)
-{
- unsigned long flags;
- unsigned int pic_irq = d->irq - PNX833X_PIC_IRQ_BASE;
-
- raw_spin_lock_irqsave(&pnx833x_irq_lock, flags);
- pnx833x_hard_disable_pic_irq(pic_irq);
- raw_spin_unlock_irqrestore(&pnx833x_irq_lock, flags);
-}
-
-static DEFINE_RAW_SPINLOCK(pnx833x_gpio_pnx833x_irq_lock);
-
-static void pnx833x_enable_gpio_irq(struct irq_data *d)
-{
- int pin = d->irq - PNX833X_GPIO_IRQ_BASE;
- unsigned long flags;
- raw_spin_lock_irqsave(&pnx833x_gpio_pnx833x_irq_lock, flags);
- pnx833x_gpio_enable_irq(pin);
- raw_spin_unlock_irqrestore(&pnx833x_gpio_pnx833x_irq_lock, flags);
-}
-
-static void pnx833x_disable_gpio_irq(struct irq_data *d)
-{
- int pin = d->irq - PNX833X_GPIO_IRQ_BASE;
- unsigned long flags;
- raw_spin_lock_irqsave(&pnx833x_gpio_pnx833x_irq_lock, flags);
- pnx833x_gpio_disable_irq(pin);
- raw_spin_unlock_irqrestore(&pnx833x_gpio_pnx833x_irq_lock, flags);
-}
-
-static int pnx833x_set_type_gpio_irq(struct irq_data *d, unsigned int flow_type)
-{
- int pin = d->irq - PNX833X_GPIO_IRQ_BASE;
- int gpio_mode;
-
- switch (flow_type) {
- case IRQ_TYPE_EDGE_RISING:
- gpio_mode = GPIO_INT_EDGE_RISING;
- break;
- case IRQ_TYPE_EDGE_FALLING:
- gpio_mode = GPIO_INT_EDGE_FALLING;
- break;
- case IRQ_TYPE_EDGE_BOTH:
- gpio_mode = GPIO_INT_EDGE_BOTH;
- break;
- case IRQ_TYPE_LEVEL_HIGH:
- gpio_mode = GPIO_INT_LEVEL_HIGH;
- break;
- case IRQ_TYPE_LEVEL_LOW:
- gpio_mode = GPIO_INT_LEVEL_LOW;
- break;
- default:
- gpio_mode = GPIO_INT_NONE;
- break;
- }
-
- pnx833x_gpio_setup_irq(gpio_mode, pin);
-
- return 0;
-}
-
-static struct irq_chip pnx833x_pic_irq_type = {
- .name = "PNX-PIC",
- .irq_enable = pnx833x_enable_pic_irq,
- .irq_disable = pnx833x_disable_pic_irq,
-};
-
-static struct irq_chip pnx833x_gpio_irq_type = {
- .name = "PNX-GPIO",
- .irq_enable = pnx833x_enable_gpio_irq,
- .irq_disable = pnx833x_disable_gpio_irq,
- .irq_set_type = pnx833x_set_type_gpio_irq,
-};
-
-void __init arch_init_irq(void)
-{
- unsigned int irq;
-
- /* setup standard internal cpu irqs */
- mips_cpu_irq_init();
-
- /* Set IRQ information in irq_desc */
- for (irq = PNX833X_PIC_IRQ_BASE; irq < (PNX833X_PIC_IRQ_BASE + PNX833X_PIC_NUM_IRQ); irq++) {
- pnx833x_hard_disable_pic_irq(irq);
- irq_set_chip_and_handler(irq, &pnx833x_pic_irq_type,
- handle_simple_irq);
- }
-
- for (irq = PNX833X_GPIO_IRQ_BASE; irq < (PNX833X_GPIO_IRQ_BASE + PNX833X_GPIO_NUM_IRQ); irq++)
- irq_set_chip_and_handler(irq, &pnx833x_gpio_irq_type,
- handle_simple_irq);
-
- /* Set PIC priority limiter register to 0 */
- PNX833X_PIC_INT_PRIORITY = 0;
-
- /* Setup GPIO IRQ dispatching */
- pnx833x_startup_pic_irq(PNX833X_PIC_GPIO_INT);
-
- /* Enable PIC IRQs (HWIRQ2) */
- if (cpu_has_vint)
- set_vi_handler(4, pic_dispatch);
-
- write_c0_status(read_c0_status() | IE_IRQ2);
-}
-
-unsigned int get_c0_compare_int(void)
-{
- if (cpu_has_vint)
- set_vi_handler(cp0_compare_irq, pnx833x_timer_dispatch);
-
- mips_cpu_timer_irq = MIPS_CPU_IRQ_BASE + cp0_compare_irq;
- return mips_cpu_timer_irq;
-}
-
-void __init plat_time_init(void)
-{
- /* calculate mips_hpt_frequency based on PNX833X_CLOCK_CPUCP_CTL reg */
-
- extern unsigned long mips_hpt_frequency;
- unsigned long reg = PNX833X_CLOCK_CPUCP_CTL;
-
- if (!(PNX833X_BIT(reg, CLOCK_CPUCP_CTL, EXIT_RESET))) {
- /* Functional clock is disabled so use crystal frequency */
- mips_hpt_frequency = 25;
- } else {
-#if defined(CONFIG_SOC_PNX8335)
- /* Functional clock is enabled, so get clock multiplier */
- mips_hpt_frequency = 90 + (10 * PNX8335_REGFIELD(CLOCK_PLL_CPU_CTL, FREQ));
-#else
- static const unsigned long int freq[4] = {240, 160, 120, 80};
- mips_hpt_frequency = freq[PNX833X_FIELD(reg, CLOCK_CPUCP_CTL, DIV_CLOCK)];
-#endif
- }
-
- printk(KERN_INFO "CPU clock is %ld MHz\n", mips_hpt_frequency);
-
- mips_hpt_frequency *= 500000;
-}
diff --git a/arch/mips/pnx833x/common/platform.c b/arch/mips/pnx833x/common/platform.c
deleted file mode 100644
index 5fa0373f1c9e..000000000000
--- a/arch/mips/pnx833x/common/platform.c
+++ /dev/null
@@ -1,224 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * platform.c: platform support for PNX833X.
- *
- * Copyright 2008 NXP Semiconductors
- * Chris Steel <chris.steel@nxp.com>
- * Daniel Laird <daniel.j.laird@nxp.com>
- *
- * Based on software written by:
- * Nikita Youshchenko <yoush@debian.org>, based on PNX8550 code.
- */
-#include <linux/device.h>
-#include <linux/dma-mapping.h>
-#include <linux/platform_device.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/resource.h>
-#include <linux/serial.h>
-#include <linux/serial_pnx8xxx.h>
-#include <linux/mtd/platnand.h>
-
-#include <irq.h>
-#include <irq-mapping.h>
-#include <pnx833x.h>
-
-static u64 uart_dmamask = DMA_BIT_MASK(32);
-
-static struct resource pnx833x_uart_resources[] = {
- [0] = {
- .start = PNX833X_UART0_PORTS_START,
- .end = PNX833X_UART0_PORTS_END,
- .flags = IORESOURCE_MEM,
- },
- [1] = {
- .start = PNX833X_PIC_UART0_INT,
- .end = PNX833X_PIC_UART0_INT,
- .flags = IORESOURCE_IRQ,
- },
- [2] = {
- .start = PNX833X_UART1_PORTS_START,
- .end = PNX833X_UART1_PORTS_END,
- .flags = IORESOURCE_MEM,
- },
- [3] = {
- .start = PNX833X_PIC_UART1_INT,
- .end = PNX833X_PIC_UART1_INT,
- .flags = IORESOURCE_IRQ,
- },
-};
-
-struct pnx8xxx_port pnx8xxx_ports[] = {
- [0] = {
- .port = {
- .type = PORT_PNX8XXX,
- .iotype = UPIO_MEM,
- .membase = (void __iomem *)PNX833X_UART0_PORTS_START,
- .mapbase = PNX833X_UART0_PORTS_START,
- .irq = PNX833X_PIC_UART0_INT,
- .uartclk = 3692300,
- .fifosize = 16,
- .flags = UPF_BOOT_AUTOCONF,
- .line = 0,
- },
- },
- [1] = {
- .port = {
- .type = PORT_PNX8XXX,
- .iotype = UPIO_MEM,
- .membase = (void __iomem *)PNX833X_UART1_PORTS_START,
- .mapbase = PNX833X_UART1_PORTS_START,
- .irq = PNX833X_PIC_UART1_INT,
- .uartclk = 3692300,
- .fifosize = 16,
- .flags = UPF_BOOT_AUTOCONF,
- .line = 1,
- },
- },
-};
-
-static struct platform_device pnx833x_uart_device = {
- .name = "pnx8xxx-uart",
- .id = -1,
- .dev = {
- .dma_mask = &uart_dmamask,
- .coherent_dma_mask = DMA_BIT_MASK(32),
- .platform_data = pnx8xxx_ports,
- },
- .num_resources = ARRAY_SIZE(pnx833x_uart_resources),
- .resource = pnx833x_uart_resources,
-};
-
-static u64 ehci_dmamask = DMA_BIT_MASK(32);
-
-static struct resource pnx833x_usb_ehci_resources[] = {
- [0] = {
- .start = PNX833X_USB_PORTS_START,
- .end = PNX833X_USB_PORTS_END,
- .flags = IORESOURCE_MEM,
- },
- [1] = {
- .start = PNX833X_PIC_USB_INT,
- .end = PNX833X_PIC_USB_INT,
- .flags = IORESOURCE_IRQ,
- },
-};
-
-static struct platform_device pnx833x_usb_ehci_device = {
- .name = "pnx833x-ehci",
- .id = -1,
- .dev = {
- .dma_mask = &ehci_dmamask,
- .coherent_dma_mask = DMA_BIT_MASK(32),
- },
- .num_resources = ARRAY_SIZE(pnx833x_usb_ehci_resources),
- .resource = pnx833x_usb_ehci_resources,
-};
-
-static u64 ethernet_dmamask = DMA_BIT_MASK(32);
-
-static struct resource pnx833x_ethernet_resources[] = {
- [0] = {
- .start = PNX8335_IP3902_PORTS_START,
- .end = PNX8335_IP3902_PORTS_END,
- .flags = IORESOURCE_MEM,
- },
-#ifdef CONFIG_SOC_PNX8335
- [1] = {
- .start = PNX8335_PIC_ETHERNET_INT,
- .end = PNX8335_PIC_ETHERNET_INT,
- .flags = IORESOURCE_IRQ,
- },
-#endif
-};
-
-static struct platform_device pnx833x_ethernet_device = {
- .name = "ip3902-eth",
- .id = -1,
- .dev = {
- .dma_mask = &ethernet_dmamask,
- .coherent_dma_mask = DMA_BIT_MASK(32),
- },
- .num_resources = ARRAY_SIZE(pnx833x_ethernet_resources),
- .resource = pnx833x_ethernet_resources,
-};
-
-static struct resource pnx833x_sata_resources[] = {
- [0] = {
- .start = PNX8335_SATA_PORTS_START,
- .end = PNX8335_SATA_PORTS_END,
- .flags = IORESOURCE_MEM,
- },
- [1] = {
- .start = PNX8335_PIC_SATA_INT,
- .end = PNX8335_PIC_SATA_INT,
- .flags = IORESOURCE_IRQ,
- },
-};
-
-static struct platform_device pnx833x_sata_device = {
- .name = "pnx833x-sata",
- .id = -1,
- .num_resources = ARRAY_SIZE(pnx833x_sata_resources),
- .resource = pnx833x_sata_resources,
-};
-
-static void
-pnx833x_flash_nand_cmd_ctrl(struct nand_chip *this, int cmd, unsigned int ctrl)
-{
- unsigned long nandaddr = (unsigned long)this->legacy.IO_ADDR_W;
-
- if (cmd == NAND_CMD_NONE)
- return;
-
- if (ctrl & NAND_CLE)
- writeb(cmd, (void __iomem *)(nandaddr + PNX8335_NAND_CLE_MASK));
- else
- writeb(cmd, (void __iomem *)(nandaddr + PNX8335_NAND_ALE_MASK));
-}
-
-static struct platform_nand_data pnx833x_flash_nand_data = {
- .chip = {
- .nr_chips = 1,
- .chip_delay = 25,
- },
- .ctrl = {
- .cmd_ctrl = pnx833x_flash_nand_cmd_ctrl
- }
-};
-
-/*
- * Set start to be the correct address (PNX8335_NAND_BASE with no 0xb!!),
- * 12 bytes more seems to be the standard that allows for NAND access.
- */
-static struct resource pnx833x_flash_nand_resource = {
- .start = PNX8335_NAND_BASE,
- .end = PNX8335_NAND_BASE + 12,
- .flags = IORESOURCE_MEM,
-};
-
-static struct platform_device pnx833x_flash_nand = {
- .name = "gen_nand",
- .id = -1,
- .num_resources = 1,
- .resource = &pnx833x_flash_nand_resource,
- .dev = {
- .platform_data = &pnx833x_flash_nand_data,
- },
-};
-
-static struct platform_device *pnx833x_platform_devices[] __initdata = {
- &pnx833x_uart_device,
- &pnx833x_usb_ehci_device,
- &pnx833x_ethernet_device,
- &pnx833x_sata_device,
- &pnx833x_flash_nand,
-};
-
-static int __init pnx833x_platform_init(void)
-{
- return platform_add_devices(pnx833x_platform_devices,
- ARRAY_SIZE(pnx833x_platform_devices));
-}
-
-arch_initcall(pnx833x_platform_init);
diff --git a/arch/mips/pnx833x/common/prom.c b/arch/mips/pnx833x/common/prom.c
deleted file mode 100644
index 12733ef25782..000000000000
--- a/arch/mips/pnx833x/common/prom.c
+++ /dev/null
@@ -1,51 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * prom.c:
- *
- * Copyright 2008 NXP Semiconductors
- * Chris Steel <chris.steel@nxp.com>
- * Daniel Laird <daniel.j.laird@nxp.com>
- *
- * Based on software written by:
- * Nikita Youshchenko <yoush@debian.org>, based on PNX8550 code.
- */
-#include <linux/init.h>
-#include <asm/bootinfo.h>
-#include <linux/string.h>
-
-void __init prom_init_cmdline(void)
-{
- int argc = fw_arg0;
- char **argv = (char **)fw_arg1;
- char *c = &(arcs_cmdline[0]);
- int i;
-
- for (i = 1; i < argc; i++) {
- strcpy(c, argv[i]);
- c += strlen(argv[i]);
- if (i < argc-1)
- *c++ = ' ';
- }
- *c = 0;
-}
-
-char __init *prom_getenv(char *envname)
-{
- extern char **prom_envp;
- char **env = prom_envp;
- int i;
-
- i = strlen(envname);
-
- while (*env) {
- if (strncmp(envname, *env, i) == 0 && *(*env+i) == '=')
- return *env + i + 1;
- env++;
- }
-
- return 0;
-}
-
-void __init prom_free_prom_memory(void)
-{
-}
diff --git a/arch/mips/pnx833x/common/reset.c b/arch/mips/pnx833x/common/reset.c
deleted file mode 100644
index b48e83bf912b..000000000000
--- a/arch/mips/pnx833x/common/reset.c
+++ /dev/null
@@ -1,31 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * reset.c: reset support for PNX833X.
- *
- * Copyright 2008 NXP Semiconductors
- * Chris Steel <chris.steel@nxp.com>
- * Daniel Laird <daniel.j.laird@nxp.com>
- *
- * Based on software written by:
- * Nikita Youshchenko <yoush@debian.org>, based on PNX8550 code.
- */
-#include <linux/reboot.h>
-#include <pnx833x.h>
-
-void pnx833x_machine_restart(char *command)
-{
- PNX833X_RESET_CONTROL_2 = 0;
- PNX833X_RESET_CONTROL = 0;
-}
-
-void pnx833x_machine_halt(void)
-{
- while (1)
- __asm__ __volatile__ ("wait");
-
-}
-
-void pnx833x_machine_power_off(void)
-{
- pnx833x_machine_halt();
-}
diff --git a/arch/mips/pnx833x/common/setup.c b/arch/mips/pnx833x/common/setup.c
deleted file mode 100644
index abf68d92ce4a..000000000000
--- a/arch/mips/pnx833x/common/setup.c
+++ /dev/null
@@ -1,48 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * setup.c: Setup PNX833X Soc.
- *
- * Copyright 2008 NXP Semiconductors
- * Chris Steel <chris.steel@nxp.com>
- * Daniel Laird <daniel.j.laird@nxp.com>
- *
- * Based on software written by:
- * Nikita Youshchenko <yoush@debian.org>, based on PNX8550 code.
- */
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/ioport.h>
-#include <linux/io.h>
-#include <linux/pci.h>
-#include <asm/reboot.h>
-#include <pnx833x.h>
-#include <gpio.h>
-
-extern void pnx833x_board_setup(void);
-extern void pnx833x_machine_restart(char *);
-extern void pnx833x_machine_halt(void);
-extern void pnx833x_machine_power_off(void);
-
-int __init plat_mem_setup(void)
-{
- /* set mips clock to 320MHz */
-#if defined(CONFIG_SOC_PNX8335)
- PNX8335_WRITEFIELD(0x17, CLOCK_PLL_CPU_CTL, FREQ);
-#endif
- pnx833x_gpio_init(); /* so it will be ready in board_setup() */
-
- pnx833x_board_setup();
-
- _machine_restart = pnx833x_machine_restart;
- _machine_halt = pnx833x_machine_halt;
- pm_power_off = pnx833x_machine_power_off;
-
- /* IO/MEM resources. */
- set_io_port_base(KSEG1);
- ioport_resource.start = 0;
- ioport_resource.end = ~0;
- iomem_resource.start = 0;
- iomem_resource.end = ~0;
-
- return 0;
-}
diff --git a/arch/mips/pnx833x/stb22x/Makefile b/arch/mips/pnx833x/stb22x/Makefile
deleted file mode 100644
index 7c5ddf36b735..000000000000
--- a/arch/mips/pnx833x/stb22x/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-obj-y := board.o
diff --git a/arch/mips/pnx833x/stb22x/board.c b/arch/mips/pnx833x/stb22x/board.c
deleted file mode 100644
index 93d8e7b73427..000000000000
--- a/arch/mips/pnx833x/stb22x/board.c
+++ /dev/null
@@ -1,120 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * board.c: STB225 board support.
- *
- * Copyright 2008 NXP Semiconductors
- * Chris Steel <chris.steel@nxp.com>
- * Daniel Laird <daniel.j.laird@nxp.com>
- *
- * Based on software written by:
- * Nikita Youshchenko <yoush@debian.org>, based on PNX8550 code.
- */
-#include <linux/init.h>
-#include <asm/bootinfo.h>
-#include <linux/mm.h>
-#include <pnx833x.h>
-#include <gpio.h>
-
-/* endianess twiddlers */
-#define PNX8335_DEBUG0 0x4400
-#define PNX8335_DEBUG1 0x4404
-#define PNX8335_DEBUG2 0x4408
-#define PNX8335_DEBUG3 0x440c
-#define PNX8335_DEBUG4 0x4410
-#define PNX8335_DEBUG5 0x4414
-#define PNX8335_DEBUG6 0x4418
-#define PNX8335_DEBUG7 0x441c
-
-int prom_argc;
-char **prom_argv, **prom_envp;
-
-extern void prom_init_cmdline(void);
-extern char *prom_getenv(char *envname);
-
-const char *get_system_type(void)
-{
- return "NXP STB22x";
-}
-
-static inline unsigned long env_or_default(char *env, unsigned long dfl)
-{
- char *str = prom_getenv(env);
- return str ? simple_strtol(str, 0, 0) : dfl;
-}
-
-void __init prom_init(void)
-{
- unsigned long memsize;
-
- prom_argc = fw_arg0;
- prom_argv = (char **)fw_arg1;
- prom_envp = (char **)fw_arg2;
-
- prom_init_cmdline();
-
- memsize = env_or_default("memsize", 0x02000000);
- add_memory_region(0, memsize, BOOT_MEM_RAM);
-}
-
-void __init pnx833x_board_setup(void)
-{
- pnx833x_gpio_select_function_alt(4);
- pnx833x_gpio_select_output(4);
- pnx833x_gpio_select_function_alt(5);
- pnx833x_gpio_select_input(5);
- pnx833x_gpio_select_function_alt(6);
- pnx833x_gpio_select_input(6);
- pnx833x_gpio_select_function_alt(7);
- pnx833x_gpio_select_output(7);
-
- pnx833x_gpio_select_function_alt(25);
- pnx833x_gpio_select_function_alt(26);
-
- pnx833x_gpio_select_function_alt(27);
- pnx833x_gpio_select_function_alt(28);
- pnx833x_gpio_select_function_alt(29);
- pnx833x_gpio_select_function_alt(30);
- pnx833x_gpio_select_function_alt(31);
- pnx833x_gpio_select_function_alt(32);
- pnx833x_gpio_select_function_alt(33);
-
-#if IS_ENABLED(CONFIG_MTD_NAND_PLATFORM)
- /* Setup MIU for NAND access on CS0...
- *
- * (it seems that we must also configure CS1 for reliable operation,
- * otherwise the first read ID command will fail if it's read as 4 bytes
- * but pass if it's read as 1 word.)
- */
-
- /* Setup MIU CS0 & CS1 timing */
- PNX833X_MIU_SEL0 = 0;
- PNX833X_MIU_SEL1 = 0;
- PNX833X_MIU_SEL0_TIMING = 0x50003081;
- PNX833X_MIU_SEL1_TIMING = 0x50003081;
-
- /* Setup GPIO 00 for use as MIU CS1 (CS0 is not multiplexed, so does not need this) */
- pnx833x_gpio_select_function_alt(0);
-
- /* Setup GPIO 04 to input NAND read/busy signal */
- pnx833x_gpio_select_function_io(4);
- pnx833x_gpio_select_input(4);
-
- /* Setup GPIO 05 to disable NAND write protect */
- pnx833x_gpio_select_function_io(5);
- pnx833x_gpio_select_output(5);
- pnx833x_gpio_write(1, 5);
-
-#elif IS_ENABLED(CONFIG_MTD_CFI)
-
- /* Set up MIU for 16-bit NOR access on CS0 and CS1... */
-
- /* Setup MIU CS0 & CS1 timing */
- PNX833X_MIU_SEL0 = 1;
- PNX833X_MIU_SEL1 = 1;
- PNX833X_MIU_SEL0_TIMING = 0x6A08D082;
- PNX833X_MIU_SEL1_TIMING = 0x6A08D082;
-
- /* Setup GPIO 00 for use as MIU CS1 (CS0 is not multiplexed, so does not need this) */
- pnx833x_gpio_select_function_alt(0);
-#endif
-}
diff --git a/arch/mips/ralink/of.c b/arch/mips/ralink/of.c
index 90c6d4a11c5d..cbae9d23ab7f 100644
--- a/arch/mips/ralink/of.c
+++ b/arch/mips/ralink/of.c
@@ -84,8 +84,7 @@ void __init plat_mem_setup(void)
if (memory_dtb)
of_scan_flat_dt(early_init_dt_scan_memory, NULL);
else if (soc_info.mem_size)
- add_memory_region(soc_info.mem_base, soc_info.mem_size * SZ_1M,
- BOOT_MEM_RAM);
+ memblock_add(soc_info.mem_base, soc_info.mem_size * SZ_1M);
else
detect_memory_region(soc_info.mem_base,
soc_info.mem_size_min * SZ_1M,
diff --git a/arch/mips/rb532/prom.c b/arch/mips/rb532/prom.c
index 303cc3dc1749..a9d1f2019dc3 100644
--- a/arch/mips/rb532/prom.c
+++ b/arch/mips/rb532/prom.c
@@ -126,5 +126,5 @@ void __init prom_init(void)
/* give all RAM to boot allocator,
* except for the first 0x400 and the last 0x200 bytes */
- add_memory_region(ddrbase + 0x400, memsize - 0x600, BOOT_MEM_RAM);
+ memblock_add(ddrbase + 0x400, memsize - 0x600);
}
diff --git a/arch/mips/sgi-ip30/ip30-common.h b/arch/mips/sgi-ip30/ip30-common.h
index d2bcaee712f3..7b5db24b6279 100644
--- a/arch/mips/sgi-ip30/ip30-common.h
+++ b/arch/mips/sgi-ip30/ip30-common.h
@@ -3,6 +3,20 @@
#ifndef __IP30_COMMON_H
#define __IP30_COMMON_H
+/*
+ * Power Switch is wired via BaseIO BRIDGE slot #6.
+ *
+ * ACFail is wired via BaseIO BRIDGE slot #7.
+ */
+#define IP30_POWER_IRQ HEART_L2_INT_POWER_BTN
+
+#define IP30_HEART_L0_IRQ (MIPS_CPU_IRQ_BASE + 2)
+#define IP30_HEART_L1_IRQ (MIPS_CPU_IRQ_BASE + 3)
+#define IP30_HEART_L2_IRQ (MIPS_CPU_IRQ_BASE + 4)
+#define IP30_HEART_TIMER_IRQ (MIPS_CPU_IRQ_BASE + 5)
+#define IP30_HEART_ERR_IRQ (MIPS_CPU_IRQ_BASE + 6)
+
+extern void __init ip30_install_ipi(void);
extern struct plat_smp_ops ip30_smp_ops;
extern void __init ip30_per_cpu_init(void);
diff --git a/arch/mips/sgi-ip30/ip30-irq.c b/arch/mips/sgi-ip30/ip30-irq.c
index c2ffcb920250..e8374e4c705b 100644
--- a/arch/mips/sgi-ip30/ip30-irq.c
+++ b/arch/mips/sgi-ip30/ip30-irq.c
@@ -14,6 +14,8 @@
#include <asm/irq_cpu.h>
#include <asm/sgi/heart.h>
+#include "ip30-common.h"
+
struct heart_irq_data {
u64 *irq_mask;
int cpu;
diff --git a/arch/mips/sgi-ip32/ip32-memory.c b/arch/mips/sgi-ip32/ip32-memory.c
index 62b956cc2d1d..0f53fed39da6 100644
--- a/arch/mips/sgi-ip32/ip32-memory.c
+++ b/arch/mips/sgi-ip32/ip32-memory.c
@@ -9,6 +9,7 @@
#include <linux/types.h>
#include <linux/init.h>
#include <linux/kernel.h>
+#include <linux/memblock.h>
#include <linux/mm.h>
#include <asm/ip32/crime.h>
@@ -36,7 +37,7 @@ void __init prom_meminit(void)
printk("CRIME MC: bank %u base 0x%016Lx size %LuMiB\n",
bank, base, size >> 20);
- add_memory_region(base, size, BOOT_MEM_RAM);
+ memblock_add(base, size);
}
}
diff --git a/arch/mips/sgi-ip32/ip32-setup.c b/arch/mips/sgi-ip32/ip32-setup.c
index 3abd1465ec02..8019dae1721a 100644
--- a/arch/mips/sgi-ip32/ip32-setup.c
+++ b/arch/mips/sgi-ip32/ip32-setup.c
@@ -12,12 +12,10 @@
#include <linux/console.h>
#include <linux/init.h>
#include <linux/interrupt.h>
-#include <linux/mc146818rtc.h>
#include <linux/param.h>
#include <linux/sched.h>
#include <asm/bootinfo.h>
-#include <asm/mc146818-time.h>
#include <asm/mipsregs.h>
#include <asm/mmu_context.h>
#include <asm/sgialib.h>
diff --git a/arch/mips/sibyte/common/cfe.c b/arch/mips/sibyte/common/cfe.c
index cbf5939ed53a..89f7fca45152 100644
--- a/arch/mips/sibyte/common/cfe.c
+++ b/arch/mips/sibyte/common/cfe.c
@@ -114,16 +114,14 @@ static __init void prom_meminit(void)
if (initrd_start) {
if ((initrd_pstart > addr) &&
(initrd_pstart < (addr + size))) {
- add_memory_region(addr,
- initrd_pstart - addr,
- BOOT_MEM_RAM);
+ memblock_add(addr,
+ initrd_pstart - addr);
rd_flag = 1;
}
if ((initrd_pend > addr) &&
(initrd_pend < (addr + size))) {
- add_memory_region(initrd_pend,
- (addr + size) - initrd_pend,
- BOOT_MEM_RAM);
+ memblock_add(initrd_pend,
+ (addr + size) - initrd_pend);
rd_flag = 1;
}
}
@@ -142,7 +140,7 @@ static __init void prom_meminit(void)
*/
if (size > 512)
size -= 512;
- add_memory_region(addr, size, BOOT_MEM_RAM);
+ memblock_add(addr, size);
}
board_mem_region_addrs[board_mem_region_count] = addr;
board_mem_region_sizes[board_mem_region_count] = size;
@@ -158,8 +156,8 @@ static __init void prom_meminit(void)
}
#ifdef CONFIG_BLK_DEV_INITRD
if (initrd_start) {
- add_memory_region(initrd_pstart, initrd_pend - initrd_pstart,
- BOOT_MEM_RESERVED);
+ memblock_add(initrd_pstart, initrd_pend - initrd_pstart);
+ memblock_reserve(initrd_pstart, initrd_pend - initrd_pstart);
}
#endif
}
diff --git a/arch/mips/txx9/generic/setup_tx4939.c b/arch/mips/txx9/generic/setup_tx4939.c
index 360c388f4c82..bf8a3cdababf 100644
--- a/arch/mips/txx9/generic/setup_tx4939.c
+++ b/arch/mips/txx9/generic/setup_tx4939.c
@@ -22,7 +22,6 @@
#include <linux/mtd/physmap.h>
#include <linux/platform_device.h>
#include <linux/platform_data/txx9/ndfmc.h>
-#include <asm/bootinfo.h>
#include <asm/reboot.h>
#include <asm/traps.h>
#include <asm/txx9irq.h>
@@ -94,22 +93,6 @@ static struct resource tx4939_sdram_resource[4];
static struct resource tx4939_sram_resource;
#define TX4939_SRAM_SIZE 0x800
-void __init tx4939_add_memory_regions(void)
-{
- int i;
- unsigned long start, size;
- u64 win;
-
- for (i = 0; i < 4; i++) {
- if (!((__u32)____raw_readq(&tx4939_ddrcptr->winen) & (1 << i)))
- continue;
- win = ____raw_readq(&tx4939_ddrcptr->win[i]);
- start = (unsigned long)(win >> 48);
- size = (((unsigned long)(win >> 32) & 0xffff) + 1) - start;
- add_memory_region(start << 20, size << 20, BOOT_MEM_RAM);
- }
-}
-
void __init tx4939_setup(void)
{
int i;
diff --git a/arch/mips/txx9/jmr3927/prom.c b/arch/mips/txx9/jmr3927/prom.c
index 68a96473c134..53c68de54d30 100644
--- a/arch/mips/txx9/jmr3927/prom.c
+++ b/arch/mips/txx9/jmr3927/prom.c
@@ -37,7 +37,7 @@
*/
#include <linux/init.h>
#include <linux/kernel.h>
-#include <asm/bootinfo.h>
+#include <linux/memblock.h>
#include <asm/txx9/generic.h>
#include <asm/txx9/jmr3927.h>
@@ -47,6 +47,6 @@ void __init jmr3927_prom_init(void)
if ((tx3927_ccfgptr->ccfg & TX3927_CCFG_TLBOFF) == 0)
pr_err("TX3927 TLB off\n");
- add_memory_region(0, JMR3927_SDRAM_SIZE, BOOT_MEM_RAM);
+ memblock_add(0, JMR3927_SDRAM_SIZE);
txx9_sio_putchar_init(TX3927_SIO_REG(1));
}
diff --git a/arch/mips/txx9/rbtx4927/prom.c b/arch/mips/txx9/rbtx4927/prom.c
index fe6d0b54763f..9b4acff826eb 100644
--- a/arch/mips/txx9/rbtx4927/prom.c
+++ b/arch/mips/txx9/rbtx4927/prom.c
@@ -29,13 +29,14 @@
* with this program; if not, write to the Free Software Foundation, Inc.,
* 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+
#include <linux/init.h>
-#include <asm/bootinfo.h>
+#include <linux/memblock.h>
#include <asm/txx9/generic.h>
#include <asm/txx9/rbtx4927.h>
void __init rbtx4927_prom_init(void)
{
- add_memory_region(0, tx4927_get_mem_size(), BOOT_MEM_RAM);
+ memblock_add(0, tx4927_get_mem_size());
txx9_sio_putchar_init(TX4927_SIO_REG(0) & 0xfffffffffULL);
}
diff --git a/arch/mips/txx9/rbtx4938/prom.c b/arch/mips/txx9/rbtx4938/prom.c
index 2b36a2ee744c..0de84716a428 100644
--- a/arch/mips/txx9/rbtx4938/prom.c
+++ b/arch/mips/txx9/rbtx4938/prom.c
@@ -12,12 +12,11 @@
#include <linux/init.h>
#include <linux/memblock.h>
-#include <asm/bootinfo.h>
#include <asm/txx9/generic.h>
#include <asm/txx9/rbtx4938.h>
void __init rbtx4938_prom_init(void)
{
- add_memory_region(0, tx4938_get_mem_size(), BOOT_MEM_RAM);
+ memblock_add(0, tx4938_get_mem_size());
txx9_sio_putchar_init(TX4938_SIO_REG(0) & 0xfffffffffULL);
}
diff --git a/arch/mips/txx9/rbtx4939/prom.c b/arch/mips/txx9/rbtx4939/prom.c
index bd277ecb4ad6..ba25ba1bd2ec 100644
--- a/arch/mips/txx9/rbtx4939/prom.c
+++ b/arch/mips/txx9/rbtx4939/prom.c
@@ -7,11 +7,23 @@
*/
#include <linux/init.h>
+#include <linux/memblock.h>
#include <asm/txx9/generic.h>
#include <asm/txx9/rbtx4939.h>
void __init rbtx4939_prom_init(void)
{
- tx4939_add_memory_regions();
+ unsigned long start, size;
+ u64 win;
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ if (!((__u32)____raw_readq(&tx4939_ddrcptr->winen) & (1 << i)))
+ continue;
+ win = ____raw_readq(&tx4939_ddrcptr->win[i]);
+ start = (unsigned long)(win >> 48);
+ size = (((unsigned long)(win >> 32) & 0xffff) + 1) - start;
+ memblock_add(start << 20, size << 20);
+ }
txx9_sio_putchar_init(TX4939_SIO_REG(0) & 0xfffffffffULL);
}
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index ae3dab371f6f..38c63e5404bc 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -437,3 +437,4 @@
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
+440 common process_madvise sys_process_madvise
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 8a4428499138..20dfdd30b9af 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -59,7 +59,10 @@ config HAVE_SETUP_PER_CPU_AREA
def_bool PPC64
config NEED_PER_CPU_EMBED_FIRST_CHUNK
- def_bool PPC64
+ def_bool y if PPC64
+
+config NEED_PER_CPU_PAGE_FIRST_CHUNK
+ def_bool y if PPC64
config NR_IRQS
int "Number of virtual interrupt numbers"
@@ -149,6 +152,7 @@ config PPC
select ARCH_USE_QUEUED_RWLOCKS if PPC_QUEUED_SPINLOCKS
select ARCH_USE_QUEUED_SPINLOCKS if PPC_QUEUED_SPINLOCKS
select ARCH_WANT_IPC_PARSE_VERSION
+ select ARCH_WANT_IRQS_OFF_ACTIVATE_MM
select ARCH_WEAK_RELEASE_ACQUIRE
select BINFMT_ELF
select BUILDTIME_TABLE_SORT
@@ -965,7 +969,7 @@ config PPC_MEM_KEYS
config PPC_SECURE_BOOT
prompt "Enable secure boot support"
bool
- depends on PPC_POWERNV
+ depends on PPC_POWERNV || PPC_PSERIES
depends on IMA_ARCH_POLICY
imply IMA_SECURE_AND_OR_TRUSTED_BOOT
help
@@ -985,6 +989,19 @@ config PPC_SECVAR_SYSFS
read/write operations on these variables. Say Y if you have
secure boot enabled and want to expose variables to userspace.
+config PPC_RTAS_FILTER
+ bool "Enable filtering of RTAS syscalls"
+ default y
+ depends on PPC_RTAS
+ help
+ The RTAS syscall API has security issues that could be used to
+ compromise system integrity. This option enforces restrictions on the
+ RTAS calls and arguments passed by userspace programs to mitigate
+ these issues.
+
+ Say Y unless you know what you are doing and the filter is causing
+ problems for you.
+
endmenu
config ISA_DMA_API
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 3e8da9cf2eb9..c4f9dbd12577 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -264,7 +264,8 @@ KBUILD_CFLAGS += $(cpu-as-y)
KBUILD_AFLAGS += $(aflags-y)
KBUILD_CFLAGS += $(cflags-y)
-head-y := arch/powerpc/kernel/head_$(BITS).o
+head-$(CONFIG_PPC64) := arch/powerpc/kernel/head_64.o
+head-$(CONFIG_PPC_BOOK3S_32) := arch/powerpc/kernel/head_book3s_32.o
head-$(CONFIG_PPC_8xx) := arch/powerpc/kernel/head_8xx.o
head-$(CONFIG_40x) := arch/powerpc/kernel/head_40x.o
head-$(CONFIG_44x) := arch/powerpc/kernel/head_44x.o
diff --git a/arch/powerpc/Makefile.postlink b/arch/powerpc/Makefile.postlink
index 2268396ff4bb..a6c77f4d32b2 100644
--- a/arch/powerpc/Makefile.postlink
+++ b/arch/powerpc/Makefile.postlink
@@ -18,7 +18,7 @@ quiet_cmd_relocs_check = CHKREL $@
ifdef CONFIG_PPC_BOOK3S_64
cmd_relocs_check = \
$(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$(NM)" "$@" ; \
- $(BASH) $(srctree)/arch/powerpc/tools/unrel_branch_check.sh "$(OBJDUMP)" "$@"
+ $(BASH) $(srctree)/arch/powerpc/tools/unrel_branch_check.sh "$(OBJDUMP)" "$(NM)" "$@"
else
cmd_relocs_check = \
$(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$(NM)" "$@"
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index b88fd27a45f0..f8ce6d2dde7b 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -7,7 +7,7 @@
# Based on coffboot by Paul Mackerras
# Simplified for ppc64 by Todd Inglett
#
-# NOTE: this code is built for 32 bit in ELF32 format even though
+# NOTE: this code may be built for 32 bit in ELF32 format even though
# it packages a 64 bit kernel. We do this to simplify the
# bootloader and increase compatibility with OpenFirmware.
#
diff --git a/arch/powerpc/boot/dts/fsl/t1024rdb.dts b/arch/powerpc/boot/dts/fsl/t1024rdb.dts
index 73a645324bc1..dbcd31cc35dc 100644
--- a/arch/powerpc/boot/dts/fsl/t1024rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/t1024rdb.dts
@@ -161,7 +161,6 @@
rtc@68 {
compatible = "dallas,ds1339";
reg = <0x68>;
- interrupts = <0x1 0x1 0 0>;
};
};
diff --git a/arch/powerpc/boot/dts/fsl/t4240rdb.dts b/arch/powerpc/boot/dts/fsl/t4240rdb.dts
index a56a705d41f7..145896f2eef6 100644
--- a/arch/powerpc/boot/dts/fsl/t4240rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/t4240rdb.dts
@@ -144,7 +144,6 @@
rtc@68 {
compatible = "dallas,ds1374";
reg = <0x68>;
- interrupts = <0x1 0x1 0 0>;
};
};
diff --git a/arch/powerpc/boot/util.S b/arch/powerpc/boot/util.S
index f11f0589a669..d03cdb7606dc 100644
--- a/arch/powerpc/boot/util.S
+++ b/arch/powerpc/boot/util.S
@@ -18,7 +18,7 @@
.text
-/* udelay (on non-601 processors) needs to know the period of the
+/* udelay needs to know the period of the
* timebase in nanoseconds. This used to be hardcoded to be 60ns
* (period of 66MHz/4). Now a variable is used that is initialized to
* 60 for backward compatibility, but it can be overridden as necessary
@@ -37,19 +37,6 @@ timebase_period_ns:
*/
.globl udelay
udelay:
- mfspr r4,SPRN_PVR
- srwi r4,r4,16
- cmpwi 0,r4,1 /* 601 ? */
- bne .Ludelay_not_601
-00: li r0,86 /* Instructions / microsecond? */
- mtctr r0
-10: addi r0,r0,0 /* NOP */
- bdnz 10b
- subic. r3,r3,1
- bne 00b
- blr
-
-.Ludelay_not_601:
mulli r4,r3,1000 /* nanoseconds */
/* Change r4 to be the number of ticks using:
* (nanoseconds + (timebase_period_ns - 1 )) / timebase_period_ns
diff --git a/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig b/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig
index 0683d8c292a8..cea72e85ed26 100644
--- a/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig
+++ b/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig
@@ -29,9 +29,9 @@ CONFIG_SYN_COOKIES=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_IDE=y
-CONFIG_BLK_DEV_GENERIC=y
-CONFIG_BLK_DEV_VIA82CXXX=y
+CONFIG_ATA=y
+CONFIG_ATA_GENERIC=y
+CONFIG_PATA_VIA=y
CONFIG_NETDEVICES=y
CONFIG_GIANFAR=y
CONFIG_E1000=y
diff --git a/arch/powerpc/configs/85xx/tqm8540_defconfig b/arch/powerpc/configs/85xx/tqm8540_defconfig
index 98982a0e82d8..bbf040aa1f9a 100644
--- a/arch/powerpc/configs/85xx/tqm8540_defconfig
+++ b/arch/powerpc/configs/85xx/tqm8540_defconfig
@@ -30,9 +30,9 @@ CONFIG_MTD_CFI_AMDSTD=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_IDE=y
-CONFIG_BLK_DEV_GENERIC=y
-CONFIG_BLK_DEV_VIA82CXXX=y
+CONFIG_ATA=y
+CONFIG_ATA_GENERIC=y
+CONFIG_PATA_VIA=y
CONFIG_NETDEVICES=y
CONFIG_GIANFAR=y
CONFIG_E100=y
diff --git a/arch/powerpc/configs/85xx/tqm8541_defconfig b/arch/powerpc/configs/85xx/tqm8541_defconfig
index a6e21db1dafe..523ad8dcfd9d 100644
--- a/arch/powerpc/configs/85xx/tqm8541_defconfig
+++ b/arch/powerpc/configs/85xx/tqm8541_defconfig
@@ -30,9 +30,9 @@ CONFIG_MTD_CFI_AMDSTD=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_IDE=y
-CONFIG_BLK_DEV_GENERIC=y
-CONFIG_BLK_DEV_VIA82CXXX=y
+CONFIG_ATA=y
+CONFIG_ATA_GENERIC=y
+CONFIG_PATA_VIA=y
CONFIG_NETDEVICES=y
CONFIG_GIANFAR=y
CONFIG_E100=y
diff --git a/arch/powerpc/configs/85xx/tqm8555_defconfig b/arch/powerpc/configs/85xx/tqm8555_defconfig
index ca1de3979474..0032ce1e8c9c 100644
--- a/arch/powerpc/configs/85xx/tqm8555_defconfig
+++ b/arch/powerpc/configs/85xx/tqm8555_defconfig
@@ -30,9 +30,9 @@ CONFIG_MTD_CFI_AMDSTD=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_IDE=y
-CONFIG_BLK_DEV_GENERIC=y
-CONFIG_BLK_DEV_VIA82CXXX=y
+CONFIG_ATA=y
+CONFIG_ATA_GENERIC=y
+CONFIG_PATA_VIA=y
CONFIG_NETDEVICES=y
CONFIG_GIANFAR=y
CONFIG_E100=y
diff --git a/arch/powerpc/configs/85xx/tqm8560_defconfig b/arch/powerpc/configs/85xx/tqm8560_defconfig
index ca3b8c8ef30f..a80b971f7d6e 100644
--- a/arch/powerpc/configs/85xx/tqm8560_defconfig
+++ b/arch/powerpc/configs/85xx/tqm8560_defconfig
@@ -30,9 +30,9 @@ CONFIG_MTD_CFI_AMDSTD=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_IDE=y
-CONFIG_BLK_DEV_GENERIC=y
-CONFIG_BLK_DEV_VIA82CXXX=y
+CONFIG_ATA=y
+CONFIG_ATA_GENERIC=y
+CONFIG_PATA_VIA=y
CONFIG_NETDEVICES=y
CONFIG_GIANFAR=y
CONFIG_E100=y
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index de14b1a34d56..d0b832cbbec8 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -67,6 +67,7 @@ void single_step_exception(struct pt_regs *regs);
void program_check_exception(struct pt_regs *regs);
void alignment_exception(struct pt_regs *regs);
void StackOverflow(struct pt_regs *regs);
+void stack_overflow_exception(struct pt_regs *regs);
void kernel_fp_unavailable_exception(struct pt_regs *regs);
void altivec_unavailable_exception(struct pt_regs *regs);
void vsx_unavailable_exception(struct pt_regs *regs);
@@ -144,7 +145,9 @@ void _kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr);
void _kvmppc_save_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr);
/* Patch sites */
-extern s32 patch__call_flush_branch_caches;
+extern s32 patch__call_flush_branch_caches1;
+extern s32 patch__call_flush_branch_caches2;
+extern s32 patch__call_flush_branch_caches3;
extern s32 patch__flush_count_cache_return;
extern s32 patch__flush_link_stack_return;
extern s32 patch__call_kvm_flush_link_stack;
diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index 082b98808701..b6ac4f86c87b 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -13,20 +13,24 @@
*/
#define MAX_EA_BITS_PER_CONTEXT 46
-#define REGION_SHIFT (MAX_EA_BITS_PER_CONTEXT - 2)
/*
- * Our page table limit us to 64TB. Hence for the kernel mapping,
- * each MAP area is limited to 16 TB.
- * The four map areas are: linear mapping, vmap, IO and vmemmap
+ * Our page table limit us to 64TB. For 64TB physical memory, we only need 64GB
+ * of vmemmap space. To better support sparse memory layout, we use 61TB
+ * linear map range, 1TB of vmalloc, 1TB of I/O and 1TB of vmememmap.
*/
+#define REGION_SHIFT (40)
#define H_KERN_MAP_SIZE (ASM_CONST(1) << REGION_SHIFT)
/*
- * Define the address range of the kernel non-linear virtual area
- * 16TB
+ * Limits the linear mapping range
*/
-#define H_KERN_VIRT_START ASM_CONST(0xc000100000000000)
+#define H_MAX_PHYSMEM_BITS 46
+
+/*
+ * Define the address range of the kernel non-linear virtual area (61TB)
+ */
+#define H_KERN_VIRT_START ASM_CONST(0xc0003d0000000000)
#ifndef __ASSEMBLY__
#define H_PTE_TABLE_SIZE (sizeof(pte_t) << H_PTE_INDEX_SIZE)
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index f20de1149ebe..338e62fbea0b 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -7,6 +7,19 @@
#define H_PUD_INDEX_SIZE 10 // size: 8B << 10 = 8KB, maps 2^10 x 16GB = 16TB
#define H_PGD_INDEX_SIZE 8 // size: 8B << 8 = 2KB, maps 2^8 x 16TB = 4PB
+/*
+ * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS
+ * if we increase SECTIONS_WIDTH we will not store node details in page->flags and
+ * page_to_nid does a page->section->node lookup
+ * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce
+ * memory requirements with large number of sections.
+ * 51 bits is the max physical real address on POWER9
+ */
+#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME)
+#define H_MAX_PHYSMEM_BITS 51
+#else
+#define H_MAX_PHYSMEM_BITS 46
+#endif
/*
* Each context is 512TB size. SLB miss for first context/default context
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 93d18da5e7ec..683a9c7d1b03 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -577,8 +577,8 @@ extern void slb_set_size(u16 size);
* For vmalloc and memmap, we use just one context with 512TB. With 64 byte
* struct page size, we need ony 32 TB in memmap for 2PB (51 bits (MAX_PHYSMEM_BITS)).
*/
-#if (MAX_PHYSMEM_BITS > MAX_EA_BITS_PER_CONTEXT)
-#define MAX_KERNEL_CTX_CNT (1UL << (MAX_PHYSMEM_BITS - MAX_EA_BITS_PER_CONTEXT))
+#if (H_MAX_PHYSMEM_BITS > MAX_EA_BITS_PER_CONTEXT)
+#define MAX_KERNEL_CTX_CNT (1UL << (H_MAX_PHYSMEM_BITS - MAX_EA_BITS_PER_CONTEXT))
#else
#define MAX_KERNEL_CTX_CNT 1
#endif
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index b392384a3b15..e0b52940e43c 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -27,21 +27,6 @@ struct mmu_psize_def {
extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
#endif /* __ASSEMBLY__ */
-/*
- * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS
- * if we increase SECTIONS_WIDTH we will not store node details in page->flags and
- * page_to_nid does a page->section->node lookup
- * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce
- * memory requirements with large number of sections.
- * 51 bits is the max physical real address on POWER9
- */
-#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) && \
- defined(CONFIG_PPC_64K_PAGES)
-#define MAX_PHYSMEM_BITS 51
-#else
-#define MAX_PHYSMEM_BITS 46
-#endif
-
/* 64-bit classic hash table MMU */
#include <asm/book3s/64/mmu-hash.h>
@@ -85,7 +70,7 @@ extern unsigned int mmu_base_pid;
/*
* memory block size used with radix translation.
*/
-extern unsigned int __ro_after_init radix_mem_block_size;
+extern unsigned long __ro_after_init radix_mem_block_size;
#define PRTB_SIZE_SHIFT (mmu_pid_bits + 4)
#define PRTB_ENTRIES (1ul << mmu_pid_bits)
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 495fc0ccb453..cd3feeac6e87 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -294,6 +294,13 @@ extern unsigned long pci_io_base;
#include <asm/book3s/64/hash.h>
#include <asm/book3s/64/radix.h>
+#if H_MAX_PHYSMEM_BITS > R_MAX_PHYSMEM_BITS
+#define MAX_PHYSMEM_BITS H_MAX_PHYSMEM_BITS
+#else
+#define MAX_PHYSMEM_BITS R_MAX_PHYSMEM_BITS
+#endif
+
+
#ifdef CONFIG_PPC_64K_PAGES
#include <asm/book3s/64/pgtable-64k.h>
#else
@@ -615,7 +622,7 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
VM_BUG_ON(pfn >> (64 - PAGE_SHIFT));
VM_BUG_ON((pfn << PAGE_SHIFT) & ~PTE_RPN_MASK);
- return __pte(((pte_basic_t)pfn << PAGE_SHIFT) | pgprot_val(pgprot));
+ return __pte(((pte_basic_t)pfn << PAGE_SHIFT) | pgprot_val(pgprot) | _PAGE_PTE);
}
static inline unsigned long pte_pfn(pte_t pte)
@@ -651,11 +658,6 @@ static inline pte_t pte_mkexec(pte_t pte)
return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_EXEC));
}
-static inline pte_t pte_mkpte(pte_t pte)
-{
- return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PTE));
-}
-
static inline pte_t pte_mkwrite(pte_t pte)
{
/*
@@ -819,6 +821,14 @@ static inline int pte_none(pte_t pte)
static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte, int percpu)
{
+
+ VM_WARN_ON(!(pte_raw(pte) & cpu_to_be64(_PAGE_PTE)));
+ /*
+ * Keep the _PAGE_PTE added till we are sure we handle _PAGE_PTE
+ * in all the callers.
+ */
+ pte = __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PTE));
+
if (radix_enabled())
return radix__set_pte_at(mm, addr, ptep, pte, percpu);
return hash__set_pte_at(mm, addr, ptep, pte, percpu);
@@ -866,6 +876,13 @@ static inline bool pte_ci(pte_t pte)
static inline void pmd_clear(pmd_t *pmdp)
{
+ if (IS_ENABLED(CONFIG_DEBUG_VM) && !radix_enabled()) {
+ /*
+ * Don't use this if we can possibly have a hash page table
+ * entry mapping this.
+ */
+ WARN_ON((pmd_val(*pmdp) & (H_PAGE_HASHPTE | _PAGE_PTE)) == (H_PAGE_HASHPTE | _PAGE_PTE));
+ }
*pmdp = __pmd(0);
}
@@ -914,6 +931,13 @@ static inline int pmd_bad(pmd_t pmd)
static inline void pud_clear(pud_t *pudp)
{
+ if (IS_ENABLED(CONFIG_DEBUG_VM) && !radix_enabled()) {
+ /*
+ * Don't use this if we can possibly have a hash page table
+ * entry mapping this.
+ */
+ WARN_ON((pud_val(*pudp) & (H_PAGE_HASHPTE | _PAGE_PTE)) == (H_PAGE_HASHPTE | _PAGE_PTE));
+ }
*pudp = __pud(0);
}
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 0cba794c4fb8..c7813dc628fc 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -91,6 +91,22 @@
* +------------------------------+ Kernel linear (0xc.....)
*/
+
+/*
+ * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS
+ * if we increase SECTIONS_WIDTH we will not store node details in page->flags and
+ * page_to_nid does a page->section->node lookup
+ * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce
+ * memory requirements with large number of sections.
+ * 51 bits is the max physical real address on POWER9
+ */
+
+#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME)
+#define R_MAX_PHYSMEM_BITS 51
+#else
+#define R_MAX_PHYSMEM_BITS 46
+#endif
+
#define RADIX_KERN_VIRT_START ASM_CONST(0xc008000000000000)
/*
* 49 = MAX_EA_BITS_PER_CONTEXT (hash specific). To make sure we pick
diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index 54764c6e922d..138e46d8c04e 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -98,6 +98,16 @@ static inline void invalidate_dcache_range(unsigned long start,
mb(); /* sync */
}
+#ifdef CONFIG_4xx
+static inline void flush_instruction_cache(void)
+{
+ iccci((void *)KERNELBASE);
+ isync();
+}
+#else
+void flush_instruction_cache(void);
+#endif
+
#include <asm-generic/cacheflush.h>
#endif /* _ASM_POWERPC_CACHEFLUSH_H */
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 32a15dc49e8c..93bc70d4c9a1 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -9,11 +9,6 @@
#ifndef __ASSEMBLY__
-/*
- * Added to include __machine_check_early_realmode_* functions
- */
-#include <asm/mce.h>
-
/* This structure can grow, it's real size is used by head.S code
* via the mkdefs mechanism.
*/
@@ -170,6 +165,7 @@ static inline void cpu_feature_keys_init(void) { }
#else /* CONFIG_PPC32 */
/* Define these to 0 for the sake of tests in common code */
#define CPU_FTR_PPC_LE (0)
+#define CPU_FTR_SPE (0)
#endif
/*
@@ -299,8 +295,6 @@ static inline void cpu_feature_keys_init(void) { }
#define CPU_FTR_MAYBE_CAN_NAP 0
#endif
-#define CPU_FTRS_PPC601 (CPU_FTR_COMMON | \
- CPU_FTR_COHERENT_ICACHE)
#define CPU_FTRS_603 (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE | CPU_FTR_NOEXECUTE)
#define CPU_FTRS_604 (CPU_FTR_COMMON | CPU_FTR_PPC_LE)
@@ -516,10 +510,8 @@ static inline void cpu_feature_keys_init(void) { }
#else
enum {
CPU_FTRS_POSSIBLE =
-#ifdef CONFIG_PPC_BOOK3S_601
- CPU_FTRS_PPC601 |
-#elif defined(CONFIG_PPC_BOOK3S_32)
- CPU_FTRS_PPC601 | CPU_FTRS_603 | CPU_FTRS_604 | CPU_FTRS_740_NOTAU |
+#ifdef CONFIG_PPC_BOOK3S_32
+ CPU_FTRS_603 | CPU_FTRS_604 | CPU_FTRS_740_NOTAU |
CPU_FTRS_740 | CPU_FTRS_750 | CPU_FTRS_750FX1 |
CPU_FTRS_750FX2 | CPU_FTRS_750FX | CPU_FTRS_750GX |
CPU_FTRS_7400_NOTAU | CPU_FTRS_7400 | CPU_FTRS_7450_20 |
@@ -594,9 +586,7 @@ enum {
#else
enum {
CPU_FTRS_ALWAYS =
-#ifdef CONFIG_PPC_BOOK3S_601
- CPU_FTRS_PPC601 &
-#elif defined(CONFIG_PPC_BOOK3S_32)
+#ifdef CONFIG_PPC_BOOK3S_32
CPU_FTRS_603 & CPU_FTRS_604 & CPU_FTRS_740_NOTAU &
CPU_FTRS_740 & CPU_FTRS_750 & CPU_FTRS_750FX1 &
CPU_FTRS_750FX2 & CPU_FTRS_750FX & CPU_FTRS_750GX &
diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h
index deb99fd6e060..98c8bd155bf9 100644
--- a/arch/powerpc/include/asm/cputhreads.h
+++ b/arch/powerpc/include/asm/cputhreads.h
@@ -23,7 +23,6 @@
extern int threads_per_core;
extern int threads_per_subcore;
extern int threads_shift;
-extern bool has_big_cores;
extern cpumask_t threads_core_mask;
#else
#define threads_per_core 1
diff --git a/arch/powerpc/include/asm/delay.h b/arch/powerpc/include/asm/delay.h
index 66963f7d3e64..51bb8c1476c7 100644
--- a/arch/powerpc/include/asm/delay.h
+++ b/arch/powerpc/include/asm/delay.h
@@ -54,7 +54,7 @@ extern void udelay(unsigned long usecs);
({ \
typeof(condition) __ret; \
unsigned long __loops = tb_ticks_per_usec * timeout; \
- unsigned long __start = get_tbl(); \
+ unsigned long __start = mftb(); \
\
if (delay) { \
while (!(__ret = (condition)) && \
diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
index 17ccc6474ab6..bf2402fed3e0 100644
--- a/arch/powerpc/include/asm/drmem.h
+++ b/arch/powerpc/include/asm/drmem.h
@@ -8,26 +8,39 @@
#ifndef _ASM_POWERPC_LMB_H
#define _ASM_POWERPC_LMB_H
+#include <linux/sched.h>
+
struct drmem_lmb {
u64 base_addr;
u32 drc_index;
u32 aa_index;
u32 flags;
-#ifdef CONFIG_MEMORY_HOTPLUG
- int nid;
-#endif
};
struct drmem_lmb_info {
struct drmem_lmb *lmbs;
int n_lmbs;
- u32 lmb_size;
+ u64 lmb_size;
};
extern struct drmem_lmb_info *drmem_info;
+static inline struct drmem_lmb *drmem_lmb_next(struct drmem_lmb *lmb,
+ const struct drmem_lmb *start)
+{
+ /*
+ * DLPAR code paths can take several milliseconds per element
+ * when interacting with firmware. Ensure that we don't
+ * unfairly monopolize the CPU.
+ */
+ if (((++lmb - start) % 16) == 0)
+ cond_resched();
+
+ return lmb;
+}
+
#define for_each_drmem_lmb_in_range(lmb, start, end) \
- for ((lmb) = (start); (lmb) < (end); (lmb)++)
+ for ((lmb) = (start); (lmb) < (end); lmb = drmem_lmb_next(lmb, start))
#define for_each_drmem_lmb(lmb) \
for_each_drmem_lmb_in_range((lmb), \
@@ -67,7 +80,7 @@ struct of_drconf_cell_v2 {
#define DRCONF_MEM_RESERVED 0x00000080
#define DRCONF_MEM_HOTREMOVABLE 0x00000100
-static inline u32 drmem_lmb_size(void)
+static inline u64 drmem_lmb_size(void)
{
return drmem_info->lmb_size;
}
@@ -105,22 +118,4 @@ static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb)
lmb->aa_index = 0xffffffff;
}
-#ifdef CONFIG_MEMORY_HOTPLUG
-static inline void lmb_set_nid(struct drmem_lmb *lmb)
-{
- lmb->nid = memory_add_physaddr_to_nid(lmb->base_addr);
-}
-static inline void lmb_clear_nid(struct drmem_lmb *lmb)
-{
- lmb->nid = -1;
-}
-#else
-static inline void lmb_set_nid(struct drmem_lmb *lmb)
-{
-}
-static inline void lmb_clear_nid(struct drmem_lmb *lmb)
-{
-}
-#endif
-
#endif /* _ASM_POWERPC_LMB_H */
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index d5f369bcd130..b1a5bba2e0b9 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -27,7 +27,6 @@ struct pci_dn;
#define EEH_FORCE_DISABLED 0x02 /* EEH disabled */
#define EEH_PROBE_MODE_DEV 0x04 /* From PCI device */
#define EEH_PROBE_MODE_DEVTREE 0x08 /* From device tree */
-#define EEH_VALID_PE_ZERO 0x10 /* PE#0 is valid */
#define EEH_ENABLE_IO_FOR_LOG 0x20 /* Enable IO for log */
#define EEH_EARLY_DUMP_LOG 0x40 /* Dump log immediately */
@@ -74,7 +73,6 @@ struct pci_dn;
struct eeh_pe {
int type; /* PE type: PHB/Bus/Device */
int state; /* PE EEH dependent mode */
- int config_addr; /* Traditional PCI address */
int addr; /* PE configuration address */
struct pci_controller *phb; /* Associated PHB */
struct pci_bus *bus; /* Top PCI bus for bus PE */
@@ -216,7 +214,6 @@ enum {
struct eeh_ops {
char *name;
- int (*init)(void);
struct eeh_dev *(*probe)(struct pci_dev *pdev);
int (*set_option)(struct eeh_pe *pe, int option);
int (*get_state)(struct eeh_pe *pe, int *delay);
@@ -281,8 +278,7 @@ int eeh_phb_pe_create(struct pci_controller *phb);
int eeh_wait_state(struct eeh_pe *pe, int max_wait);
struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb);
struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, struct eeh_pe *root);
-struct eeh_pe *eeh_pe_get(struct pci_controller *phb,
- int pe_no, int config_addr);
+struct eeh_pe *eeh_pe_get(struct pci_controller *phb, int pe_no);
int eeh_pe_tree_insert(struct eeh_dev *edev, struct eeh_pe *new_pe_parent);
int eeh_pe_tree_remove(struct eeh_dev *edev);
void eeh_pe_update_time_stamp(struct eeh_pe *pe);
@@ -295,8 +291,7 @@ const char *eeh_pe_loc_get(struct eeh_pe *pe);
struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe);
void eeh_show_enabled(void);
-int __init eeh_ops_register(struct eeh_ops *ops);
-int __exit eeh_ops_unregister(const char *name);
+int __init eeh_init(struct eeh_ops *ops);
int eeh_check_failure(const volatile void __iomem *token);
int eeh_dev_check_failure(struct eeh_dev *edev);
void eeh_addr_cache_init(void);
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index fbb377055471..c1fbccb04390 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -375,11 +375,13 @@
#define H_CPU_CHAR_THREAD_RECONFIG_CTRL (1ull << 57) // IBM bit 6
#define H_CPU_CHAR_COUNT_CACHE_DISABLED (1ull << 56) // IBM bit 7
#define H_CPU_CHAR_BCCTR_FLUSH_ASSIST (1ull << 54) // IBM bit 9
+#define H_CPU_CHAR_BCCTR_LINK_FLUSH_ASSIST (1ull << 52) // IBM bit 11
#define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0
#define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1
#define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ull << 61) // IBM bit 2
#define H_CPU_BEHAV_FLUSH_COUNT_CACHE (1ull << 58) // IBM bit 5
+#define H_CPU_BEHAV_FLUSH_LINK_STACK (1ull << 57) // IBM bit 6
/* Flag values used in H_REGISTER_PROC_TBL hcall */
#define PROC_TABLE_OP_MASK 0x18
@@ -560,6 +562,42 @@ struct hv_guest_state {
/* Latest version of hv_guest_state structure */
#define HV_GUEST_STATE_VERSION 1
+/*
+ * From the document "H_GetPerformanceCounterInfo Interface" v1.07
+ *
+ * H_GET_PERF_COUNTER_INFO argument
+ */
+struct hv_get_perf_counter_info_params {
+ __be32 counter_request; /* I */
+ __be32 starting_index; /* IO */
+ __be16 secondary_index; /* IO */
+ __be16 returned_values; /* O */
+ __be32 detail_rc; /* O, only needed when called via *_norets() */
+
+ /*
+ * O, size each of counter_value element in bytes, only set for version
+ * >= 0x3
+ */
+ __be16 cv_element_size;
+
+ /* I, 0 (zero) for versions < 0x3 */
+ __u8 counter_info_version_in;
+
+ /* O, 0 (zero) if version < 0x3. Must be set to 0 when making hcall */
+ __u8 counter_info_version_out;
+ __u8 reserved[0xC];
+ __u8 counter_value[];
+} __packed;
+
+#define HGPCI_REQ_BUFFER_SIZE 4096
+#define HGPCI_MAX_DATA_BYTES \
+ (HGPCI_REQ_BUFFER_SIZE - sizeof(struct hv_get_perf_counter_info_params))
+
+struct hv_gpci_request_buffer {
+ struct hv_get_perf_counter_info_params params;
+ uint8_t bytes[HGPCI_MAX_DATA_BYTES];
+} __packed;
+
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_HVCALL_H */
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h
index db206a7f38e2..abebfbee5b1c 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -10,6 +10,7 @@
#define _PPC_BOOK3S_64_HW_BREAKPOINT_H
#include <asm/cpu_has_feature.h>
+#include <asm/inst.h>
#ifdef __KERNEL__
struct arch_hw_breakpoint {
@@ -17,6 +18,7 @@ struct arch_hw_breakpoint {
u16 type;
u16 len; /* length of the target data symbol */
u16 hw_len; /* length programmed in hw */
+ u8 flags;
};
/* Note: Don't change the first 6 bits below as they are in the same order
@@ -36,12 +38,15 @@ struct arch_hw_breakpoint {
#define HW_BRK_TYPE_PRIV_ALL (HW_BRK_TYPE_USER | HW_BRK_TYPE_KERNEL | \
HW_BRK_TYPE_HYP)
+#define HW_BRK_FLAG_DISABLED 0x1
+
/* Minimum granularity */
#ifdef CONFIG_PPC_8xx
#define HW_BREAKPOINT_SIZE 0x4
#else
#define HW_BREAKPOINT_SIZE 0x8
#endif
+#define HW_BREAKPOINT_SIZE_QUADWORD 0x10
#define DABR_MAX_LEN 8
#define DAWR_MAX_LEN 512
@@ -51,6 +56,13 @@ static inline int nr_wp_slots(void)
return cpu_has_feature(CPU_FTR_DAWR1) ? 2 : 1;
}
+bool wp_check_constraints(struct pt_regs *regs, struct ppc_inst instr,
+ unsigned long ea, int type, int size,
+ struct arch_hw_breakpoint *info);
+
+void wp_get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr,
+ int *type, int *size, unsigned long *ea);
+
#ifdef CONFIG_HAVE_HW_BREAKPOINT
#include <linux/kdebug.h>
#include <asm/reg.h>
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 35060be09073..0363734ff56e 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -25,9 +25,8 @@
#define PACA_IRQ_DBELL 0x02
#define PACA_IRQ_EE 0x04
#define PACA_IRQ_DEC 0x08 /* Or FIT */
-#define PACA_IRQ_EE_EDGE 0x10 /* BookE only */
-#define PACA_IRQ_HMI 0x20
-#define PACA_IRQ_PMI 0x40
+#define PACA_IRQ_HMI 0x10
+#define PACA_IRQ_PMI 0x20
/*
* Some soft-masked interrupts must be hard masked until they are replayed
@@ -369,12 +368,6 @@ static inline void may_hard_irq_enable(void) { }
#define ARCH_IRQ_INIT_FLAGS IRQ_NOREQUEST
-/*
- * interrupt-retrigger: should we handle this via lost interrupts and IPIs
- * or should we not care like we do now ? --BenH.
- */
-struct irq_chip;
-
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_HW_IRQ_H */
diff --git a/arch/powerpc/include/asm/icswx.h b/arch/powerpc/include/asm/icswx.h
index b0c70a35fd0e..f6599ccb3012 100644
--- a/arch/powerpc/include/asm/icswx.h
+++ b/arch/powerpc/include/asm/icswx.h
@@ -156,8 +156,7 @@ struct coprocessor_request_block {
u8 reserved[32];
struct coprocessor_status_block csb;
-} __packed;
-
+} __aligned(128);
/* RFC02167 Initiate Coprocessor Instructions document
* Chapter 8.2.1.1.1 RS
@@ -188,6 +187,9 @@ static inline int icswx(__be32 ccw, struct coprocessor_request_block *crb)
__be64 ccw_reg = ccw;
u32 cr;
+ /* NB: the same structures are used by VAS-NX */
+ BUILD_BUG_ON(sizeof(*crb) != 128);
+
__asm__ __volatile__(
PPC_ICSWX(%1,0,%2) "\n"
"mfcr %0\n"
diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index 814dfab7e392..4f983ca4030a 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -35,7 +35,6 @@ static __inline__ int irq_canonicalize(int irq)
extern int distribute_irqs;
-struct irqaction;
struct pt_regs;
#define __ARCH_HAS_DO_SOFTIRQ
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index a90b892f0bfe..95081078aa8a 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -65,7 +65,6 @@ struct machdep_calls {
void __noreturn (*restart)(char *cmd);
void __noreturn (*halt)(void);
void (*panic)(char *str);
- void (*cpu_die)(void);
long (*time_init)(void); /* Optional, may be NULL */
@@ -222,8 +221,6 @@ struct machdep_calls {
extern void e500_idle(void);
extern void power4_idle(void);
-extern void power7_idle(void);
-extern void power9_idle(void);
extern void ppc6xx_idle(void);
extern void book3e_idle(void);
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 7f3658a97384..e02aa793420b 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -244,7 +244,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
*/
static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
{
- switch_mm(prev, next, current);
+ switch_mm_irqs_off(prev, next, current);
}
/* We don't currently use enter_lazy_tlb() for anything */
diff --git a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
index e752a5807a59..39be9aea86db 100644
--- a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
@@ -65,4 +65,18 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
pte_update(mm, addr, ptep, clr, set, 1);
}
+#ifdef CONFIG_PPC_4K_PAGES
+static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
+ struct page *page, int writable)
+{
+ size_t size = huge_page_size(hstate_vma(vma));
+
+ if (size == SZ_16K)
+ return __pte(pte_val(entry) & ~_PAGE_HUGE);
+ else
+ return entry;
+}
+#define arch_make_huge_pte arch_make_huge_pte
+#endif
+
#endif /* _ASM_POWERPC_NOHASH_32_HUGETLB_8XX_H */
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index b9e134d0f03a..ee2243ba96cf 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -227,6 +227,19 @@ static inline void pmd_clear(pmd_t *pmdp)
*/
#ifdef CONFIG_PPC_8xx
static pmd_t *pmd_off(struct mm_struct *mm, unsigned long addr);
+static int hugepd_ok(hugepd_t hpd);
+
+static int number_of_cells_per_pte(pmd_t *pmd, pte_basic_t val, int huge)
+{
+ if (!huge)
+ return PAGE_SIZE / SZ_4K;
+ else if (hugepd_ok(*((hugepd_t *)pmd)))
+ return 1;
+ else if (IS_ENABLED(CONFIG_PPC_4K_PAGES) && !(val & _PAGE_HUGE))
+ return SZ_16K / SZ_4K;
+ else
+ return SZ_512K / SZ_4K;
+}
static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p,
unsigned long clr, unsigned long set, int huge)
@@ -237,12 +250,7 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p
int num, i;
pmd_t *pmd = pmd_off(mm, addr);
- if (!huge)
- num = PAGE_SIZE / SZ_4K;
- else if ((pmd_val(*pmd) & _PMD_PAGE_MASK) != _PMD_PAGE_8M)
- num = SZ_512K / SZ_4K;
- else
- num = 1;
+ num = number_of_cells_per_pte(pmd, new, huge);
for (i = 0; i < num; i++, entry++, new += SZ_4K)
*entry = new;
diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h
index 4b7c3472eab1..6277e7596ae5 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -140,11 +140,6 @@ static inline pte_t pte_mkold(pte_t pte)
return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
}
-static inline pte_t pte_mkpte(pte_t pte)
-{
- return pte;
-}
-
static inline pte_t pte_mkspecial(pte_t pte)
{
return __pte(pte_val(pte) | _PAGE_SPECIAL);
diff --git a/arch/powerpc/include/asm/pnv-ocxl.h b/arch/powerpc/include/asm/pnv-ocxl.h
index ee79d2cd9fb6..d37ededca3ee 100644
--- a/arch/powerpc/include/asm/pnv-ocxl.h
+++ b/arch/powerpc/include/asm/pnv-ocxl.h
@@ -28,7 +28,4 @@ int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask, void **p
void pnv_ocxl_spa_release(void *platform_data);
int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle);
-int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr);
-void pnv_ocxl_free_xive_irq(u32 irq);
-
#endif /* _ASM_PNV_OCXL_H */
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index b4cc6608131c..511786f0e40d 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -382,16 +382,6 @@ n:
#endif
/* various errata or part fixups */
-#ifdef CONFIG_PPC601_SYNC_FIX
-#define SYNC sync; isync
-#define SYNC_601 sync
-#define ISYNC_601 isync
-#else
-#define SYNC
-#define SYNC_601
-#define ISYNC_601
-#endif
-
#if defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_FSL_BOOK3E)
#define MFTB(dest) \
90: mfspr dest, SPRN_TBRL; \
@@ -411,8 +401,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96)
#define MFTBU(dest) mfspr dest, SPRN_TBRU
#endif
-/* tlbsync is not implemented on 601 */
-#if !defined(CONFIG_SMP) || defined(CONFIG_PPC_BOOK3S_601)
+#ifndef CONFIG_SMP
#define TLBSYNC
#else
#define TLBSYNC tlbsync; sync
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index ed0d633ab5aa..365290b9a24b 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -220,6 +220,7 @@ struct thread_struct {
unsigned long tm_tar;
unsigned long tm_ppr;
unsigned long tm_dscr;
+ unsigned long tm_amr;
/*
* Checkpointed FP and VSX 0-31 register set.
@@ -432,16 +433,10 @@ enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF};
extern int powersave_nap; /* set if nap mode can be used in idle loop */
extern void power7_idle_type(unsigned long type);
-extern void power9_idle_type(unsigned long stop_psscr_val,
+extern void arch300_idle_type(unsigned long stop_psscr_val,
unsigned long stop_psscr_mask);
-extern void flush_instruction_cache(void);
-extern void hard_reset_now(void);
-extern void poweroff_now(void);
extern int fix_alignment(struct pt_regs *);
-extern void cvt_fd(float *from, double *to);
-extern void cvt_df(double *from, float *to);
-extern void _nmask_and_or_msr(unsigned long nmask, unsigned long or_val);
#ifdef CONFIG_PPC64
/*
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 155a197c0aa1..e2c778c176a3 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -243,11 +243,7 @@ static inline void set_trap_norestart(struct pt_regs *regs)
}
#define arch_has_single_step() (1)
-#ifndef CONFIG_PPC_BOOK3S_601
#define arch_has_block_step() (true)
-#else
-#define arch_has_block_step() (false)
-#endif
#define ARCH_HAS_USER_SINGLE_STEP_REPORT
/*
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 88fb88491fe9..f877a576b338 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -521,6 +521,8 @@
#define SPRN_TSCR 0x399 /* Thread Switch Control Register */
#define SPRN_DEC 0x016 /* Decrement Register */
+#define SPRN_PIT 0x3DB /* Programmable Interval Timer (40x/BOOKE) */
+
#define SPRN_DER 0x095 /* Debug Enable Register */
#define DER_RSTE 0x40000000 /* Reset Interrupt */
#define DER_CHSTPE 0x20000000 /* Check Stop */
@@ -817,7 +819,7 @@
#define THRM1_TIN (1 << 31)
#define THRM1_TIV (1 << 30)
#define THRM1_THRES(x) ((x&0x7f)<<23)
-#define THRM3_SITV(x) ((x&0x3fff)<<1)
+#define THRM3_SITV(x) ((x & 0x1fff) << 1)
#define THRM1_TID (1<<2)
#define THRM1_TIE (1<<1)
#define THRM1_V (1<<0)
@@ -1353,6 +1355,7 @@
#define PVR_POWER8NVL 0x004C
#define PVR_POWER8 0x004D
#define PVR_POWER9 0x004E
+#define PVR_POWER10 0x0080
#define PVR_BE 0x0070
#define PVR_PA6T 0x0090
@@ -1416,8 +1419,7 @@ static inline void msr_check_and_clear(unsigned long bits)
__msr_check_and_clear(bits);
}
-#ifdef __powerpc64__
-#if defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_FSL_BOOK3E)
+#if defined(CONFIG_PPC_CELL) || defined(CONFIG_E500)
#define mftb() ({unsigned long rval; \
asm volatile( \
"90: mfspr %0, %2;\n" \
@@ -1427,29 +1429,23 @@ static inline void msr_check_and_clear(unsigned long bits)
: "=r" (rval) \
: "i" (CPU_FTR_CELL_TB_BUG), "i" (SPRN_TBRL) : "cr0"); \
rval;})
+#elif defined(CONFIG_PPC_8xx)
+#define mftb() ({unsigned long rval; \
+ asm volatile("mftbl %0" : "=r" (rval)); rval;})
#else
#define mftb() ({unsigned long rval; \
asm volatile("mfspr %0, %1" : \
"=r" (rval) : "i" (SPRN_TBRL)); rval;})
#endif /* !CONFIG_PPC_CELL */
-#else /* __powerpc64__ */
-
#if defined(CONFIG_PPC_8xx)
-#define mftbl() ({unsigned long rval; \
- asm volatile("mftbl %0" : "=r" (rval)); rval;})
#define mftbu() ({unsigned long rval; \
asm volatile("mftbu %0" : "=r" (rval)); rval;})
#else
-#define mftbl() ({unsigned long rval; \
- asm volatile("mfspr %0, %1" : "=r" (rval) : \
- "i" (SPRN_TBRL)); rval;})
#define mftbu() ({unsigned long rval; \
asm volatile("mfspr %0, %1" : "=r" (rval) : \
"i" (SPRN_TBRU)); rval;})
#endif
-#define mftb() mftbl()
-#endif /* !__powerpc64__ */
#define mttbl(v) asm volatile("mttbl %0":: "r"(v))
#define mttbu(v) asm volatile("mttbu %0":: "r"(v))
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index ff30f1076162..29a948e0c0f2 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -174,7 +174,6 @@
#define SPRN_L1CSR1 0x3F3 /* L1 Cache Control and Status Register 1 */
#define SPRN_MMUCSR0 0x3F4 /* MMU Control and Status Register 0 */
#define SPRN_MMUCFG 0x3F7 /* MMU Configuration Register */
-#define SPRN_PIT 0x3DB /* Programmable Interval Timer */
#define SPRN_BUCSR 0x3F5 /* Branch Unit Control and Status */
#define SPRN_L2CSR0 0x3F9 /* L2 Data Cache Control and Status Register 0 */
#define SPRN_L2CSR1 0x3FA /* L2 Data Cache Control and Status Register 1 */
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 49a25e2400f2..b2035b2f57ce 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -28,8 +28,8 @@
extern int boot_cpuid;
extern int spinning_secondaries;
extern u32 *cpu_to_phys_id;
+extern bool coregroup_enabled;
-extern void cpu_die(void);
extern int cpu_to_chip_id(int cpu);
#ifdef CONFIG_SMP
@@ -50,6 +50,9 @@ struct smp_ops_t {
int (*cpu_disable)(void);
void (*cpu_die)(unsigned int nr);
int (*cpu_bootable)(unsigned int nr);
+#ifdef CONFIG_HOTPLUG_CPU
+ void (*cpu_offline_self)(void);
+#endif
};
extern int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us);
@@ -118,11 +121,6 @@ static inline struct cpumask *cpu_sibling_mask(int cpu)
return per_cpu(cpu_sibling_map, cpu);
}
-static inline struct cpumask *cpu_core_mask(int cpu)
-{
- return per_cpu(cpu_core_map, cpu);
-}
-
static inline struct cpumask *cpu_l2_cache_mask(int cpu)
{
return per_cpu(cpu_l2_cache_map, cpu);
@@ -135,6 +133,19 @@ static inline struct cpumask *cpu_smallcore_mask(int cpu)
extern int cpu_to_core_id(int cpu);
+extern bool has_big_cores;
+
+#define cpu_smt_mask cpu_smt_mask
+#ifdef CONFIG_SCHED_SMT
+static inline const struct cpumask *cpu_smt_mask(int cpu)
+{
+ if (has_big_cores)
+ return per_cpu(cpu_smallcore_map, cpu);
+
+ return per_cpu(cpu_sibling_map, cpu);
+}
+#endif /* CONFIG_SCHED_SMT */
+
/* Since OpenPIC has only 4 IPIs, we use slightly different message numbers.
*
* Make sure this matches openpic_request_IPIs in open_pic.c, or what shows up
@@ -243,7 +254,6 @@ extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
* 64-bit but defining them all here doesn't harm
*/
extern void generic_secondary_smp_init(void);
-extern void generic_secondary_thread_init(void);
extern unsigned long __secondary_hold_spinloop;
extern unsigned long __secondary_hold_acknowledge;
extern char __secondary_hold;
diff --git a/arch/powerpc/include/asm/svm.h b/arch/powerpc/include/asm/svm.h
index 85580b30aba4..7546402d796a 100644
--- a/arch/powerpc/include/asm/svm.h
+++ b/arch/powerpc/include/asm/svm.h
@@ -15,6 +15,8 @@ static inline bool is_secure_guest(void)
return mfmsr() & MSR_S;
}
+void __init svm_swiotlb_init(void);
+
void dtl_cache_ctor(void *addr);
#define get_dtl_cache_ctor() (is_secure_guest() ? dtl_cache_ctor : NULL)
@@ -25,6 +27,8 @@ static inline bool is_secure_guest(void)
return false;
}
+static inline void svm_swiotlb_init(void) {}
+
#define get_dtl_cache_ctor() NULL
#endif /* CONFIG_PPC_SVM */
diff --git a/arch/powerpc/include/asm/synch.h b/arch/powerpc/include/asm/synch.h
index aca70fb43147..1d67bc8d7bc6 100644
--- a/arch/powerpc/include/asm/synch.h
+++ b/arch/powerpc/include/asm/synch.h
@@ -3,8 +3,9 @@
#define _ASM_POWERPC_SYNCH_H
#ifdef __KERNEL__
+#include <asm/cputable.h>
#include <asm/feature-fixups.h>
-#include <asm/asm-const.h>
+#include <asm/ppc-opcode.h>
#ifndef __ASSEMBLY__
extern unsigned int __start___lwsync_fixup, __stop___lwsync_fixup;
@@ -20,6 +21,22 @@ static inline void isync(void)
{
__asm__ __volatile__ ("isync" : : : "memory");
}
+
+static inline void ppc_after_tlbiel_barrier(void)
+{
+ asm volatile("ptesync": : :"memory");
+ /*
+ * POWER9, POWER10 need a cp_abort after tlbiel to ensure the copy is
+ * invalidated correctly. If this is not done, the paste can take data
+ * from the physical address that was translated at copy time.
+ *
+ * POWER9 in practice does not need this, because address spaces with
+ * accelerators mapped will use tlbie (which does invalidate the copy)
+ * to invalidate translations. It's not possible to limit POWER10 this
+ * way due to local copy-paste.
+ */
+ asm volatile(ASM_FTR_IFSET(PPC_CP_ABORT, "", %0) : : "i" (CPU_FTR_ARCH_31) : "memory");
+}
#endif /* __ASSEMBLY__ */
#if defined(__powerpc64__)
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index cb326720a8a1..2f566c1a754c 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -38,44 +38,10 @@ struct div_result {
u64 result_low;
};
-/* Accessor functions for the timebase (RTC on 601) registers. */
-#define __USE_RTC() (IS_ENABLED(CONFIG_PPC_BOOK3S_601))
-
-#ifdef CONFIG_PPC64
-
/* For compatibility, get_tbl() is defined as get_tb() on ppc64 */
-#define get_tbl get_tb
-
-#else
-
static inline unsigned long get_tbl(void)
{
- return mftbl();
-}
-
-static inline unsigned int get_tbu(void)
-{
- return mftbu();
-}
-#endif /* !CONFIG_PPC64 */
-
-static inline unsigned int get_rtcl(void)
-{
- unsigned int rtcl;
-
- asm volatile("mfrtcl %0" : "=r" (rtcl));
- return rtcl;
-}
-
-static inline u64 get_rtc(void)
-{
- unsigned int hi, lo, hi2;
-
- do {
- asm volatile("mfrtcu %0; mfrtcl %1; mfrtcu %2"
- : "=r" (hi), "=r" (lo), "=r" (hi2));
- } while (hi2 != hi);
- return (u64)hi * 1000000000 + lo;
+ return mftb();
}
static inline u64 get_vtb(void)
@@ -87,30 +53,21 @@ static inline u64 get_vtb(void)
return 0;
}
-#ifdef CONFIG_PPC64
-static inline u64 get_tb(void)
-{
- return mftb();
-}
-#else /* CONFIG_PPC64 */
static inline u64 get_tb(void)
{
unsigned int tbhi, tblo, tbhi2;
+ if (IS_ENABLED(CONFIG_PPC64))
+ return mftb();
+
do {
- tbhi = get_tbu();
- tblo = get_tbl();
- tbhi2 = get_tbu();
+ tbhi = mftbu();
+ tblo = mftb();
+ tbhi2 = mftbu();
} while (tbhi != tbhi2);
return ((u64)tbhi << 32) | tblo;
}
-#endif /* !CONFIG_PPC64 */
-
-static inline u64 get_tb_or_rtc(void)
-{
- return __USE_RTC() ? get_rtc() : get_tb();
-}
static inline void set_tb(unsigned int upper, unsigned int lower)
{
@@ -127,11 +84,10 @@ static inline void set_tb(unsigned int upper, unsigned int lower)
*/
static inline u64 get_dec(void)
{
-#if defined(CONFIG_40x)
- return (mfspr(SPRN_PIT));
-#else
- return (mfspr(SPRN_DEC));
-#endif
+ if (IS_ENABLED(CONFIG_40x))
+ return mfspr(SPRN_PIT);
+
+ return mfspr(SPRN_DEC);
}
/*
@@ -141,23 +97,17 @@ static inline u64 get_dec(void)
*/
static inline void set_dec(u64 val)
{
-#if defined(CONFIG_40x)
- mtspr(SPRN_PIT, (u32) val);
-#else
-#ifndef CONFIG_BOOKE
- --val;
-#endif
- mtspr(SPRN_DEC, val);
-#endif /* not 40x */
+ if (IS_ENABLED(CONFIG_40x))
+ mtspr(SPRN_PIT, (u32)val);
+ else if (IS_ENABLED(CONFIG_BOOKE))
+ mtspr(SPRN_DEC, val);
+ else
+ mtspr(SPRN_DEC, val - 1);
}
static inline unsigned long tb_ticks_since(unsigned long tstamp)
{
- if (__USE_RTC()) {
- int delta = get_rtcl() - (unsigned int) tstamp;
- return delta < 0 ? delta + 1000000000 : delta;
- }
- return get_tbl() - tstamp;
+ return mftb() - tstamp;
}
#define mulhwu(x,y) \
diff --git a/arch/powerpc/include/asm/timex.h b/arch/powerpc/include/asm/timex.h
index 6047402b0a4d..95988870a57b 100644
--- a/arch/powerpc/include/asm/timex.h
+++ b/arch/powerpc/include/asm/timex.h
@@ -17,9 +17,6 @@ typedef unsigned long cycles_t;
static inline cycles_t get_cycles(void)
{
- if (IS_ENABLED(CONFIG_PPC_BOOK3S_601))
- return 0;
-
return mftb();
}
diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
index fbc6f3002f23..d97f061fecac 100644
--- a/arch/powerpc/include/asm/tlb.h
+++ b/arch/powerpc/include/asm/tlb.h
@@ -66,19 +66,6 @@ static inline int mm_is_thread_local(struct mm_struct *mm)
return false;
return cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm));
}
-static inline void mm_reset_thread_local(struct mm_struct *mm)
-{
- WARN_ON(atomic_read(&mm->context.copros) > 0);
- /*
- * It's possible for mm_access to take a reference on mm_users to
- * access the remote mm from another thread, but it's not allowed
- * to set mm_cpumask, so mm_users may be > 1 here.
- */
- WARN_ON(current->mm != mm);
- atomic_set(&mm->context.active_cpus, 1);
- cpumask_clear(mm_cpumask(mm));
- cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
-}
#else /* CONFIG_PPC_BOOK3S_64 */
static inline int mm_is_thread_local(struct mm_struct *mm)
{
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index f0b6300e7dd3..8728590f514a 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -86,14 +86,27 @@ static inline int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
#endif /* CONFIG_NUMA */
+struct drmem_lmb;
+int of_drconf_to_nid_single(struct drmem_lmb *lmb);
+
#if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
extern int find_and_online_cpu_nid(int cpu);
+extern int cpu_to_coregroup_id(int cpu);
#else
static inline int find_and_online_cpu_nid(int cpu)
{
return 0;
}
+static inline int cpu_to_coregroup_id(int cpu)
+{
+#ifdef CONFIG_SMP
+ return cpu_to_core_id(cpu);
+#else
+ return 0;
+#endif
+}
+
#endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */
#include <asm-generic/topology.h>
@@ -104,15 +117,10 @@ static inline int find_and_online_cpu_nid(int cpu)
#ifdef CONFIG_PPC64
#include <asm/smp.h>
-#ifdef CONFIG_PPC_SPLPAR
-int get_physical_package_id(int cpu);
-#define topology_physical_package_id(cpu) (get_physical_package_id(cpu))
-#else
#define topology_physical_package_id(cpu) (cpu_to_chip_id(cpu))
-#endif
#define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu))
-#define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu))
+#define topology_core_cpumask(cpu) (cpu_cpu_mask(cpu))
#define topology_core_id(cpu) (cpu_to_core_id(cpu))
#endif
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 20a35373cafc..604d705f1bb8 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -151,52 +151,16 @@ static inline int __access_ok(unsigned long addr, unsigned long size,
extern long __put_user_bad(void);
-/*
- * We don't tell gcc that we are accessing memory, but this is OK
- * because we do not write to any memory gcc knows about, so there
- * are no aliasing issues.
- */
-#define __put_user_asm(x, addr, err, op) \
- __asm__ __volatile__( \
- "1: " op " %1,0(%2) # put_user\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: li %0,%3\n" \
- " b 2b\n" \
- ".previous\n" \
- EX_TABLE(1b, 3b) \
- : "=r" (err) \
- : "r" (x), "b" (addr), "i" (-EFAULT), "0" (err))
-
-#ifdef __powerpc64__
-#define __put_user_asm2(x, ptr, retval) \
- __put_user_asm(x, ptr, retval, "std")
-#else /* __powerpc64__ */
-#define __put_user_asm2(x, addr, err) \
- __asm__ __volatile__( \
- "1: stw %1,0(%2)\n" \
- "2: stw %1+1,4(%2)\n" \
- "3:\n" \
- ".section .fixup,\"ax\"\n" \
- "4: li %0,%3\n" \
- " b 3b\n" \
- ".previous\n" \
- EX_TABLE(1b, 4b) \
- EX_TABLE(2b, 4b) \
- : "=r" (err) \
- : "r" (x), "b" (addr), "i" (-EFAULT), "0" (err))
-#endif /* __powerpc64__ */
-
#define __put_user_size_allowed(x, ptr, size, retval) \
do { \
+ __label__ __pu_failed; \
+ \
retval = 0; \
- switch (size) { \
- case 1: __put_user_asm(x, ptr, retval, "stb"); break; \
- case 2: __put_user_asm(x, ptr, retval, "sth"); break; \
- case 4: __put_user_asm(x, ptr, retval, "stw"); break; \
- case 8: __put_user_asm2(x, ptr, retval); break; \
- default: __put_user_bad(); \
- } \
+ __put_user_size_goto(x, ptr, size, __pu_failed); \
+ break; \
+ \
+__pu_failed: \
+ retval = -EFAULT; \
} while (0)
#define __put_user_size(x, ptr, size, retval) \
@@ -249,12 +213,17 @@ do { \
})
+/*
+ * We don't tell gcc that we are accessing memory, but this is OK
+ * because we do not write to any memory gcc knows about, so there
+ * are no aliasing issues.
+ */
#define __put_user_asm_goto(x, addr, label, op) \
asm volatile goto( \
"1: " op "%U1%X1 %0,%1 # put_user\n" \
EX_TABLE(1b, %l2) \
: \
- : "r" (x), "m" (*addr) \
+ : "r" (x), "m<>" (*addr) \
: \
: label)
@@ -316,7 +285,7 @@ extern long __get_user_bad(void);
#define __get_user_asm(x, addr, err, op) \
__asm__ __volatile__( \
- "1: "op" %1,0(%2) # get_user\n" \
+ "1: "op"%U2%X2 %1, %2 # get_user\n" \
"2:\n" \
".section .fixup,\"ax\"\n" \
"3: li %0,%3\n" \
@@ -325,7 +294,7 @@ extern long __get_user_bad(void);
".previous\n" \
EX_TABLE(1b, 3b) \
: "=r" (err), "=r" (x) \
- : "b" (addr), "i" (-EFAULT), "0" (err))
+ : "m<>" (*addr), "i" (-EFAULT), "0" (err))
#ifdef __powerpc64__
#define __get_user_asm2(x, addr, err) \
@@ -333,8 +302,8 @@ extern long __get_user_bad(void);
#else /* __powerpc64__ */
#define __get_user_asm2(x, addr, err) \
__asm__ __volatile__( \
- "1: lwz %1,0(%2)\n" \
- "2: lwz %1+1,4(%2)\n" \
+ "1: lwz%X2 %1, %2\n" \
+ "2: lwz%X2 %L1, %L2\n" \
"3:\n" \
".section .fixup,\"ax\"\n" \
"4: li %0,%3\n" \
@@ -345,7 +314,7 @@ extern long __get_user_bad(void);
EX_TABLE(1b, 4b) \
EX_TABLE(2b, 4b) \
: "=r" (err), "=&r" (x) \
- : "b" (addr), "i" (-EFAULT), "0" (err))
+ : "m" (*addr), "i" (-EFAULT), "0" (err))
#endif /* __powerpc64__ */
#define __get_user_size_allowed(x, ptr, size, retval) \
@@ -355,10 +324,10 @@ do { \
if (size > sizeof(x)) \
(x) = __get_user_bad(); \
switch (size) { \
- case 1: __get_user_asm(x, ptr, retval, "lbz"); break; \
- case 2: __get_user_asm(x, ptr, retval, "lhz"); break; \
- case 4: __get_user_asm(x, ptr, retval, "lwz"); break; \
- case 8: __get_user_asm2(x, ptr, retval); break; \
+ case 1: __get_user_asm(x, (u8 __user *)ptr, retval, "lbz"); break; \
+ case 2: __get_user_asm(x, (u16 __user *)ptr, retval, "lhz"); break; \
+ case 4: __get_user_asm(x, (u32 __user *)ptr, retval, "lwz"); break; \
+ case 8: __get_user_asm2(x, (u64 __user *)ptr, retval); break; \
default: (x) = __get_user_bad(); \
} \
} while (0)
diff --git a/arch/powerpc/include/uapi/asm/ptrace.h b/arch/powerpc/include/uapi/asm/ptrace.h
index f5f1ccc740fc..7004cfea3f5f 100644
--- a/arch/powerpc/include/uapi/asm/ptrace.h
+++ b/arch/powerpc/include/uapi/asm/ptrace.h
@@ -222,6 +222,7 @@ struct ppc_debug_info {
#define PPC_DEBUG_FEATURE_DATA_BP_RANGE 0x0000000000000004
#define PPC_DEBUG_FEATURE_DATA_BP_MASK 0x0000000000000008
#define PPC_DEBUG_FEATURE_DATA_BP_DAWR 0x0000000000000010
+#define PPC_DEBUG_FEATURE_DATA_BP_ARCH_31 0x0000000000000020
#ifndef __ASSEMBLY__
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index cbf41fb4ee89..bf0bf1b900d2 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -45,7 +45,8 @@ obj-y := cputable.o syscalls.o \
signal.o sysfs.o cacheinfo.o time.o \
prom.o traps.o setup-common.o \
udbg.o misc.o io.o misc_$(BITS).o \
- of_platform.o prom_parse.o firmware.o
+ of_platform.o prom_parse.o firmware.o \
+ hw_breakpoint_constraints.o
obj-y += ptrace/
obj-$(CONFIG_PPC64) += setup_64.o \
paca.o nvram_64.o note.o syscall_64.o
@@ -94,7 +95,8 @@ obj-$(CONFIG_PPC_FSL_BOOK3E) += cpu_setup_fsl_booke.o
obj-$(CONFIG_PPC_DOORBELL) += dbell.o
obj-$(CONFIG_JUMP_LABEL) += jump_label.o
-extra-y := head_$(BITS).o
+extra-$(CONFIG_PPC64) := head_64.o
+extra-$(CONFIG_PPC_BOOK3S_32) := head_book3s_32.o
extra-$(CONFIG_40x) := head_40x.o
extra-$(CONFIG_44x) := head_44x.o
extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 8711c2164b45..c2722ff36e98 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -176,6 +176,7 @@ int main(void)
OFFSET(THREAD_TM_TAR, thread_struct, tm_tar);
OFFSET(THREAD_TM_PPR, thread_struct, tm_ppr);
OFFSET(THREAD_TM_DSCR, thread_struct, tm_dscr);
+ OFFSET(THREAD_TM_AMR, thread_struct, tm_amr);
OFFSET(PT_CKPT_REGS, thread_struct, ckpt_regs);
OFFSET(THREAD_CKVRSTATE, thread_struct, ckvr_state.vr);
OFFSET(THREAD_CKVRSAVE, thread_struct, ckvrsave);
diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
index 02300edc6989..c22a8e0dbc93 100644
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -95,19 +95,10 @@ void __init btext_prepare_BAT(void)
boot_text_mapped = 0;
return;
}
- if (PVR_VER(mfspr(SPRN_PVR)) != 1) {
- /* 603, 604, G3, G4, ... */
- lowbits = addr & ~0xFF000000UL;
- addr &= 0xFF000000UL;
- disp_BAT[0] = vaddr | (BL_16M<<2) | 2;
- disp_BAT[1] = addr | (_PAGE_NO_CACHE | _PAGE_GUARDED | BPP_RW);
- } else {
- /* 601 */
- lowbits = addr & ~0xFF800000UL;
- addr &= 0xFF800000UL;
- disp_BAT[0] = vaddr | (_PAGE_NO_CACHE | PP_RWXX) | 4;
- disp_BAT[1] = addr | BL_8M | 0x40;
- }
+ lowbits = addr & ~0xFF000000UL;
+ addr &= 0xFF000000UL;
+ disp_BAT[0] = vaddr | (BL_16M<<2) | 2;
+ disp_BAT[1] = addr | (_PAGE_NO_CACHE | _PAGE_GUARDED | BPP_RW);
logicalDisplayBase = (void *) (vaddr + lowbits);
}
#endif
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 2aa89c6b2896..492c0b36aff6 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -16,6 +16,7 @@
#include <asm/oprofile_impl.h>
#include <asm/cputable.h>
#include <asm/prom.h> /* for PTRRELOC on ARCH=ppc */
+#include <asm/mce.h>
#include <asm/mmu.h>
#include <asm/setup.h>
@@ -608,21 +609,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
#endif /* CONFIG_PPC_BOOK3S_64 */
#ifdef CONFIG_PPC32
-#ifdef CONFIG_PPC_BOOK3S_601
- { /* 601 */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00010000,
- .cpu_name = "601",
- .cpu_features = CPU_FTRS_PPC601,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_601_INSTR |
- PPC_FEATURE_UNIFIED_CACHE | PPC_FEATURE_NO_TB,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_generic,
- .platform = "ppc601",
- },
-#endif /* CONFIG_PPC_BOOK3S_601 */
#ifdef CONFIG_PPC_BOOK3S_6xx
{ /* 603 */
.pvr_mask = 0xffff0000,
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index f204ad79b6b5..1098863e17ee 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -17,6 +17,7 @@
#include <asm/cputable.h>
#include <asm/dt_cpu_ftrs.h>
+#include <asm/mce.h>
#include <asm/mmu.h>
#include <asm/oprofile_impl.h>
#include <asm/prom.h>
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 94682382fc8c..0e160dffcb86 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -466,7 +466,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
return 0;
}
- if (!pe->addr && !pe->config_addr) {
+ if (!pe->addr) {
eeh_stats.no_cfg_addr++;
return 0;
}
@@ -929,56 +929,6 @@ void eeh_save_bars(struct eeh_dev *edev)
edev->config_space[1] |= PCI_COMMAND_MASTER;
}
-/**
- * eeh_ops_register - Register platform dependent EEH operations
- * @ops: platform dependent EEH operations
- *
- * Register the platform dependent EEH operation callback
- * functions. The platform should call this function before
- * any other EEH operations.
- */
-int __init eeh_ops_register(struct eeh_ops *ops)
-{
- if (!ops->name) {
- pr_warn("%s: Invalid EEH ops name for %p\n",
- __func__, ops);
- return -EINVAL;
- }
-
- if (eeh_ops && eeh_ops != ops) {
- pr_warn("%s: EEH ops of platform %s already existing (%s)\n",
- __func__, eeh_ops->name, ops->name);
- return -EEXIST;
- }
-
- eeh_ops = ops;
-
- return 0;
-}
-
-/**
- * eeh_ops_unregister - Unreigster platform dependent EEH operations
- * @name: name of EEH platform operations
- *
- * Unregister the platform dependent EEH operation callback
- * functions.
- */
-int __exit eeh_ops_unregister(const char *name)
-{
- if (!name || !strlen(name)) {
- pr_warn("%s: Invalid EEH ops name\n",
- __func__);
- return -EINVAL;
- }
-
- if (eeh_ops && !strcmp(eeh_ops->name, name)) {
- eeh_ops = NULL;
- return 0;
- }
-
- return -EEXIST;
-}
-
static int eeh_reboot_notifier(struct notifier_block *nb,
unsigned long action, void *unused)
{
@@ -990,54 +940,6 @@ static struct notifier_block eeh_reboot_nb = {
.notifier_call = eeh_reboot_notifier,
};
-/**
- * eeh_init - EEH initialization
- *
- * Initialize EEH by trying to enable it for all of the adapters in the system.
- * As a side effect we can determine here if eeh is supported at all.
- * Note that we leave EEH on so failed config cycles won't cause a machine
- * check. If a user turns off EEH for a particular adapter they are really
- * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't
- * grant access to a slot if EEH isn't enabled, and so we always enable
- * EEH for all slots/all devices.
- *
- * The eeh-force-off option disables EEH checking globally, for all slots.
- * Even if force-off is set, the EEH hardware is still enabled, so that
- * newer systems can boot.
- */
-static int eeh_init(void)
-{
- struct pci_controller *hose, *tmp;
- int ret = 0;
-
- /* Register reboot notifier */
- ret = register_reboot_notifier(&eeh_reboot_nb);
- if (ret) {
- pr_warn("%s: Failed to register notifier (%d)\n",
- __func__, ret);
- return ret;
- }
-
- /* call platform initialization function */
- if (!eeh_ops) {
- pr_warn("%s: Platform EEH operation not found\n",
- __func__);
- return -EEXIST;
- } else if ((ret = eeh_ops->init()))
- return ret;
-
- /* Initialize PHB PEs */
- list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
- eeh_phb_pe_create(hose);
-
- eeh_addr_cache_init();
-
- /* Initialize EEH event */
- return eeh_event_init();
-}
-
-core_initcall_sync(eeh_init);
-
static int eeh_device_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
@@ -1062,12 +964,47 @@ static struct notifier_block eeh_device_nb = {
.notifier_call = eeh_device_notifier,
};
-static __init int eeh_set_bus_notifier(void)
+/**
+ * eeh_init - System wide EEH initialization
+ *
+ * It's the platform's job to call this from an arch_initcall().
+ */
+int eeh_init(struct eeh_ops *ops)
{
- bus_register_notifier(&pci_bus_type, &eeh_device_nb);
- return 0;
+ struct pci_controller *hose, *tmp;
+ int ret = 0;
+
+ /* the platform should only initialise EEH once */
+ if (WARN_ON(eeh_ops))
+ return -EEXIST;
+ if (WARN_ON(!ops))
+ return -ENOENT;
+ eeh_ops = ops;
+
+ /* Register reboot notifier */
+ ret = register_reboot_notifier(&eeh_reboot_nb);
+ if (ret) {
+ pr_warn("%s: Failed to register reboot notifier (%d)\n",
+ __func__, ret);
+ return ret;
+ }
+
+ ret = bus_register_notifier(&pci_bus_type, &eeh_device_nb);
+ if (ret) {
+ pr_warn("%s: Failed to register bus notifier (%d)\n",
+ __func__, ret);
+ return ret;
+ }
+
+ /* Initialize PHB PEs */
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
+ eeh_phb_pe_create(hose);
+
+ eeh_addr_cache_init();
+
+ /* Initialize EEH event */
+ return eeh_event_init();
}
-arch_initcall(eeh_set_bus_notifier);
/**
* eeh_probe_device() - Perform EEH initialization for the indicated pci device
@@ -1720,7 +1657,7 @@ static ssize_t eeh_force_recover_write(struct file *filp,
return -ENODEV;
/* Retrieve PE */
- pe = eeh_pe_get(hose, pe_no, 0);
+ pe = eeh_pe_get(hose, pe_no);
if (!pe)
return -ENODEV;
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index d2aaaa73fdd5..845e024321d4 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -251,43 +251,21 @@ void eeh_pe_dev_traverse(struct eeh_pe *root,
/**
* __eeh_pe_get - Check the PE address
- * @data: EEH PE
- * @flag: EEH device
*
* For one particular PE, it can be identified by PE address
* or tranditional BDF address. BDF address is composed of
* Bus/Device/Function number. The extra data referred by flag
* indicates which type of address should be used.
*/
-struct eeh_pe_get_flag {
- int pe_no;
- int config_addr;
-};
-
static void *__eeh_pe_get(struct eeh_pe *pe, void *flag)
{
- struct eeh_pe_get_flag *tmp = (struct eeh_pe_get_flag *) flag;
+ int *target_pe = flag;
- /* Unexpected PHB PE */
+ /* PHB PEs are special and should be ignored */
if (pe->type & EEH_PE_PHB)
return NULL;
- /*
- * We prefer PE address. For most cases, we should
- * have non-zero PE address
- */
- if (eeh_has_flag(EEH_VALID_PE_ZERO)) {
- if (tmp->pe_no == pe->addr)
- return pe;
- } else {
- if (tmp->pe_no &&
- (tmp->pe_no == pe->addr))
- return pe;
- }
-
- /* Try BDF address */
- if (tmp->config_addr &&
- (tmp->config_addr == pe->config_addr))
+ if (*target_pe == pe->addr)
return pe;
return NULL;
@@ -297,7 +275,6 @@ static void *__eeh_pe_get(struct eeh_pe *pe, void *flag)
* eeh_pe_get - Search PE based on the given address
* @phb: PCI controller
* @pe_no: PE number
- * @config_addr: Config address
*
* Search the corresponding PE based on the specified address which
* is included in the eeh device. The function is used to check if
@@ -306,16 +283,11 @@ static void *__eeh_pe_get(struct eeh_pe *pe, void *flag)
* which is composed of PCI bus/device/function number, or unified
* PE address.
*/
-struct eeh_pe *eeh_pe_get(struct pci_controller *phb,
- int pe_no, int config_addr)
+struct eeh_pe *eeh_pe_get(struct pci_controller *phb, int pe_no)
{
struct eeh_pe *root = eeh_phb_pe_get(phb);
- struct eeh_pe_get_flag tmp = { pe_no, config_addr };
- struct eeh_pe *pe;
- pe = eeh_pe_traverse(root, __eeh_pe_get, &tmp);
-
- return pe;
+ return eeh_pe_traverse(root, __eeh_pe_get, &pe_no);
}
/**
@@ -336,19 +308,13 @@ int eeh_pe_tree_insert(struct eeh_dev *edev, struct eeh_pe *new_pe_parent)
struct pci_controller *hose = edev->controller;
struct eeh_pe *pe, *parent;
- /* Check if the PE number is valid */
- if (!eeh_has_flag(EEH_VALID_PE_ZERO) && !edev->pe_config_addr) {
- eeh_edev_err(edev, "PE#0 is invalid for this PHB!\n");
- return -EINVAL;
- }
-
/*
* Search the PE has been existing or not according
* to the PE address. If that has been existing, the
* PE should be composed of PCI bus and its subordinate
* components.
*/
- pe = eeh_pe_get(hose, edev->pe_config_addr, edev->bdfn);
+ pe = eeh_pe_get(hose, edev->pe_config_addr);
if (pe) {
if (pe->type & EEH_PE_INVALID) {
list_add_tail(&edev->entry, &pe->edevs);
@@ -388,8 +354,8 @@ int eeh_pe_tree_insert(struct eeh_dev *edev, struct eeh_pe *new_pe_parent)
pr_err("%s: out of memory!\n", __func__);
return -ENOMEM;
}
- pe->addr = edev->pe_config_addr;
- pe->config_addr = edev->bdfn;
+
+ pe->addr = edev->pe_config_addr;
/*
* Put the new EEH PE into hierarchy tree. If the parent
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index f4d0af8e1136..8cdc8bcde703 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -234,7 +234,6 @@ transfer_to_handler_cont:
mtspr SPRN_SRR0,r11
mtspr SPRN_SRR1,r10
mtlr r9
- SYNC
RFI /* jump to handler, enable MMU */
#if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
@@ -264,7 +263,6 @@ _ASM_NOKPROBE_SYMBOL(transfer_to_handler_cont)
LOAD_REG_IMMEDIATE(r0, MSR_KERNEL)
mtspr SPRN_SRR0,r12
mtspr SPRN_SRR1,r0
- SYNC
RFI
reenable_mmu:
@@ -323,7 +321,6 @@ stack_ovf:
#endif
mtspr SPRN_SRR0,r9
mtspr SPRN_SRR1,r10
- SYNC
RFI
_ASM_NOKPROBE_SYMBOL(stack_ovf)
#endif
@@ -411,7 +408,6 @@ ret_from_syscall:
/* disable interrupts so current_thread_info()->flags can't change */
LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) /* doesn't include MSR_EE */
/* Note: We don't bother telling lockdep about it */
- SYNC
mtmsr r10
lwz r9,TI_FLAGS(r2)
li r8,-MAX_ERRNO
@@ -474,7 +470,6 @@ syscall_exit_finish:
#endif
mtspr SPRN_SRR0,r7
mtspr SPRN_SRR1,r8
- SYNC
RFI
_ASM_NOKPROBE_SYMBOL(syscall_exit_finish)
#ifdef CONFIG_44x
@@ -567,7 +562,6 @@ syscall_exit_work:
* lockdep as we are supposed to have IRQs on at this point
*/
ori r10,r10,MSR_EE
- SYNC
mtmsr r10
/* Save NVGPRS if they're not saved already */
@@ -606,7 +600,6 @@ ret_from_kernel_syscall:
#endif
mtspr SPRN_SRR0, r9
mtspr SPRN_SRR1, r10
- SYNC
RFI
_ASM_NOKPROBE_SYMBOL(ret_from_kernel_syscall)
@@ -810,7 +803,6 @@ fast_exception_return:
REST_GPR(9, r11)
REST_GPR(12, r11)
lwz r11,GPR11(r11)
- SYNC
RFI
_ASM_NOKPROBE_SYMBOL(fast_exception_return)
@@ -819,19 +811,11 @@ _ASM_NOKPROBE_SYMBOL(fast_exception_return)
1: lis r3,exc_exit_restart_end@ha
addi r3,r3,exc_exit_restart_end@l
cmplw r12,r3
-#ifdef CONFIG_PPC_BOOK3S_601
- bge 2b
-#else
bge 3f
-#endif
lis r4,exc_exit_restart@ha
addi r4,r4,exc_exit_restart@l
cmplw r12,r4
-#ifdef CONFIG_PPC_BOOK3S_601
- blt 2b
-#else
blt 3f
-#endif
lis r3,fee_restarts@ha
tophys(r3,r3)
lwz r5,fee_restarts@l(r3)
@@ -848,7 +832,6 @@ fee_restarts:
/* aargh, a nonrecoverable interrupt, panic */
/* aargh, we don't know which trap this is */
-/* but the 601 doesn't implement the RI bit, so assume it's OK */
3:
li r10,-1
stw r10,_TRAP(r11)
@@ -872,7 +855,6 @@ ret_from_except:
* from the interrupt. */
/* Note: We don't bother telling lockdep about it */
LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
- SYNC /* Some chip revs have problems here... */
mtmsr r10 /* disable interrupts */
lwz r3,_MSR(r1) /* Returning to user mode? */
@@ -1035,7 +1017,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
* exc_exit_restart below. -- paulus
*/
LOAD_REG_IMMEDIATE(r10,MSR_KERNEL & ~MSR_RI)
- SYNC
mtmsr r10 /* clear the RI bit */
.globl exc_exit_restart
exc_exit_restart:
@@ -1046,7 +1027,6 @@ exc_exit_restart:
lwz r1,GPR1(r1)
.globl exc_exit_restart_end
exc_exit_restart_end:
- SYNC
RFI
_ASM_NOKPROBE_SYMBOL(exc_exit_restart)
_ASM_NOKPROBE_SYMBOL(exc_exit_restart_end)
@@ -1274,7 +1254,6 @@ do_resched: /* r10 contains MSR_KERNEL here */
mfmsr r10
#endif
ori r10,r10,MSR_EE
- SYNC
mtmsr r10 /* hard-enable interrupts */
bl schedule
recheck:
@@ -1283,7 +1262,6 @@ recheck:
* TI_FLAGS aren't advertised.
*/
LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
- SYNC
mtmsr r10 /* disable interrupts */
lwz r9,TI_FLAGS(r2)
andi. r0,r9,_TIF_NEED_RESCHED
@@ -1292,7 +1270,6 @@ recheck:
beq restore_user
do_user_signal: /* r10 contains MSR_KERNEL here */
ori r10,r10,MSR_EE
- SYNC
mtmsr r10 /* hard-enable interrupts */
/* save r13-r31 in the exception frame, if not already done */
lwz r3,_TRAP(r1)
@@ -1316,19 +1293,11 @@ nonrecoverable:
lis r10,exc_exit_restart_end@ha
addi r10,r10,exc_exit_restart_end@l
cmplw r12,r10
-#ifdef CONFIG_PPC_BOOK3S_601
- bgelr
-#else
bge 3f
-#endif
lis r11,exc_exit_restart@ha
addi r11,r11,exc_exit_restart@l
cmplw r12,r11
-#ifdef CONFIG_PPC_BOOK3S_601
- bltlr
-#else
blt 3f
-#endif
lis r10,ee_restarts@ha
lwz r12,ee_restarts@l(r10)
addi r12,r12,1
@@ -1336,7 +1305,6 @@ nonrecoverable:
mr r12,r11 /* restart at exc_exit_restart */
blr
3: /* OK, we can't recover, kill this process */
- /* but the 601 doesn't implement the RI bit, so assume it's OK */
lwz r3,_TRAP(r1)
andi. r0,r3,1
beq 5f
@@ -1382,8 +1350,7 @@ _GLOBAL(enter_rtas)
mfmsr r9
stw r9,8(r1)
LOAD_REG_IMMEDIATE(r0,MSR_KERNEL)
- SYNC /* disable interrupts so SRR0/1 */
- mtmsr r0 /* don't get trashed */
+ mtmsr r0 /* disable interrupts so SRR0/1 don't get trashed */
li r9,MSR_KERNEL & ~(MSR_IR|MSR_DR)
mtlr r6
stw r7, THREAD + RTAS_SP(r2)
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 733e40eba4eb..2f3846192ec7 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -430,7 +430,11 @@ _ASM_NOKPROBE_SYMBOL(save_nvgprs);
#define FLUSH_COUNT_CACHE \
1: nop; \
- patch_site 1b, patch__call_flush_branch_caches
+ patch_site 1b, patch__call_flush_branch_caches1; \
+1: nop; \
+ patch_site 1b, patch__call_flush_branch_caches2; \
+1: nop; \
+ patch_site 1b, patch__call_flush_branch_caches3
.macro nops number
.rept \number
@@ -512,7 +516,7 @@ _GLOBAL(_switch)
kuap_check_amr r9, r10
- FLUSH_COUNT_CACHE
+ FLUSH_COUNT_CACHE /* Clobbers r9, ctr */
/*
* On SMP kernels, care must be taken because a task may be
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index d9ed79415100..f579ce46eef2 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -988,7 +988,6 @@ kernel_dbg_exc:
.endm
masked_interrupt_book3e_0x500:
- // XXX When adding support for EPR, use PACA_IRQ_EE_EDGE
masked_interrupt_book3e PACA_IRQ_EE 1
masked_interrupt_book3e_0x900:
@@ -1303,16 +1302,6 @@ fast_exception_return:
addi r3,r1,STACK_FRAME_OVERHEAD;
bl do_IRQ
b ret_from_except
-1: cmpwi cr0,r3,0xf00
- bne 1f
- addi r3,r1,STACK_FRAME_OVERHEAD;
- bl performance_monitor_exception
- b ret_from_except
-1: cmpwi cr0,r3,0xe60
- bne 1f
- addi r3,r1,STACK_FRAME_OVERHEAD;
- bl handle_hmi_exception
- b ret_from_except
1: cmpwi cr0,r3,0x900
bne 1f
addi r3,r1,STACK_FRAME_OVERHEAD;
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 5cdf4168a61a..8482739d42f3 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -754,10 +754,8 @@ u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
void fadump_update_elfcore_header(char *bufp)
{
- struct elfhdr *elf;
struct elf_phdr *phdr;
- elf = (struct elfhdr *)bufp;
bufp += sizeof(struct elfhdr);
/* First note is a place holder for cpu notes info. */
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 4ae39db70044..3ff9a8fafa46 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -87,7 +87,6 @@ BEGIN_FTR_SECTION
oris r5,r5,MSR_VSX@h
END_FTR_SECTION_IFSET(CPU_FTR_VSX)
#endif
- SYNC
MTMSRD(r5) /* enable use of fpu now */
isync
/* enable use of FP after return */
@@ -134,18 +133,3 @@ _GLOBAL(save_fpu)
mffs fr0
stfd fr0,FPSTATE_FPSCR(r6)
blr
-
-/*
- * These are used in the alignment trap handler when emulating
- * single-precision loads and stores.
- */
-
-_GLOBAL(cvt_fd)
- lfs 0,0(r3)
- stfd 0,0(r4)
- blr
-
-_GLOBAL(cvt_df)
- lfd 0,0(r3)
- stfs 0,0(r4)
- blr
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 9abec6cd099c..7c767765071d 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -40,48 +40,52 @@
.macro EXCEPTION_PROLOG_1 for_rtas=0
#ifdef CONFIG_VMAP_STACK
- .ifeq \for_rtas
- li r11, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */
- mtmsr r11
- isync
- .endif
- subi r11, r1, INT_FRAME_SIZE /* use r1 if kernel */
+ mr r11, r1
+ subi r1, r1, INT_FRAME_SIZE /* use r1 if kernel */
+ beq 1f
+ mfspr r1,SPRN_SPRG_THREAD
+ lwz r1,TASK_STACK-THREAD(r1)
+ addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE
#else
- tophys(r11,r1) /* use tophys(r1) if kernel */
- subi r11, r11, INT_FRAME_SIZE /* alloc exc. frame */
-#endif
+ subi r11, r1, INT_FRAME_SIZE /* use r1 if kernel */
beq 1f
mfspr r11,SPRN_SPRG_THREAD
- tovirt_vmstack r11, r11
lwz r11,TASK_STACK-THREAD(r11)
addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE
- tophys_novmstack r11, r11
+#endif
1:
+ tophys_novmstack r11, r11
#ifdef CONFIG_VMAP_STACK
- mtcrf 0x7f, r11
+ mtcrf 0x7f, r1
bt 32 - THREAD_ALIGN_SHIFT, stack_overflow
#endif
.endm
.macro EXCEPTION_PROLOG_2 handle_dar_dsisr=0
-#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S)
-BEGIN_MMU_FTR_SECTION
+#ifdef CONFIG_VMAP_STACK
mtcr r10
-FTR_SECTION_ELSE
- stw r10, _CCR(r11)
-ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE)
+ li r10, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */
+ mtmsr r10
+ isync
#else
stw r10,_CCR(r11) /* save registers */
#endif
mfspr r10, SPRN_SPRG_SCRATCH0
+#ifdef CONFIG_VMAP_STACK
+ stw r11,GPR1(r1)
+ stw r11,0(r1)
+ mr r11, r1
+#else
+ stw r1,GPR1(r11)
+ stw r1,0(r11)
+ tovirt(r1, r11) /* set new kernel sp */
+#endif
stw r12,GPR12(r11)
stw r9,GPR9(r11)
stw r10,GPR10(r11)
-#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S)
-BEGIN_MMU_FTR_SECTION
+#ifdef CONFIG_VMAP_STACK
mfcr r10
stw r10, _CCR(r11)
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
#endif
mfspr r12,SPRN_SPRG_SCRATCH1
stw r12,GPR11(r11)
@@ -97,19 +101,12 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
stw r10, _DSISR(r11)
.endif
lwz r9, SRR1(r12)
-#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S)
-BEGIN_MMU_FTR_SECTION
andi. r10, r9, MSR_PR
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
-#endif
lwz r12, SRR0(r12)
#else
mfspr r12,SPRN_SRR0
mfspr r9,SPRN_SRR1
#endif
- stw r1,GPR1(r11)
- stw r1,0(r11)
- tovirt_novmstack r1, r11 /* set new kernel sp */
#ifdef CONFIG_40x
rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
#else
@@ -225,7 +222,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
#endif
mtspr SPRN_SRR1,r10
mtspr SPRN_SRR0,r11
- SYNC
RFI /* jump to handler, enable MMU */
99: b ret_from_kernel_syscall
.endm
@@ -327,20 +323,19 @@ label:
.macro vmap_stack_overflow_exception
#ifdef CONFIG_VMAP_STACK
#ifdef CONFIG_SMP
- mfspr r11, SPRN_SPRG_THREAD
- tovirt(r11, r11)
- lwz r11, TASK_CPU - THREAD(r11)
- slwi r11, r11, 3
- addis r11, r11, emergency_ctx@ha
+ mfspr r1, SPRN_SPRG_THREAD
+ lwz r1, TASK_CPU - THREAD(r1)
+ slwi r1, r1, 3
+ addis r1, r1, emergency_ctx@ha
#else
- lis r11, emergency_ctx@ha
+ lis r1, emergency_ctx@ha
#endif
- lwz r11, emergency_ctx@l(r11)
- cmpwi cr1, r11, 0
+ lwz r1, emergency_ctx@l(r1)
+ cmpwi cr1, r1, 0
bne cr1, 1f
- lis r11, init_thread_union@ha
- addi r11, r11, init_thread_union@l
-1: addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE
+ lis r1, init_thread_union@ha
+ addi r1, r1, init_thread_union@l
+1: addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE
EXCEPTION_PROLOG_2
SAVE_NVGPRS(r11)
addi r3, r1, STACK_FRAME_OVERHEAD
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 5b282d9965a5..44c9018aed1b 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -72,7 +72,6 @@ turn_on_mmu:
lis r0,start_here@h
ori r0,r0,start_here@l
mtspr SPRN_SRR0,r0
- SYNC
rfi /* enables MMU */
b . /* prevent prefetch past rfi */
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 0e05a9a47a4b..1510b2a56669 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -300,9 +300,6 @@ _GLOBAL(fsl_secondary_thread_init)
rlwimi r3, r3, 30, 2, 30
mtspr SPRN_PIR, r3
1:
-#endif
-
-_GLOBAL(generic_secondary_thread_init)
mr r24,r3
/* turn on 64-bit mode */
@@ -312,13 +309,13 @@ _GLOBAL(generic_secondary_thread_init)
bl relative_toc
tovirt(r2,r2)
-#ifdef CONFIG_PPC_BOOK3E
/* Book3E initialization */
mr r3,r24
bl book3e_secondary_thread_init
-#endif
b generic_secondary_common_init
+#endif /* CONFIG_PPC_BOOK3E */
+
/*
* On pSeries and most other platforms, secondary processors spin
* in the following code.
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_book3s_32.S
index f3ab94d73936..5eb9eedac920 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -34,16 +34,6 @@
#include "head_32.h"
-/* 601 only have IBAT */
-#ifdef CONFIG_PPC_BOOK3S_601
-#define LOAD_BAT(n, reg, RA, RB) \
- li RA,0; \
- mtspr SPRN_IBAT##n##U,RA; \
- lwz RA,(n*16)+0(reg); \
- lwz RB,(n*16)+4(reg); \
- mtspr SPRN_IBAT##n##U,RA; \
- mtspr SPRN_IBAT##n##L,RB
-#else
#define LOAD_BAT(n, reg, RA, RB) \
/* see the comment for clear_bats() -- Cort */ \
li RA,0; \
@@ -57,11 +47,10 @@
lwz RB,(n*16)+12(reg); \
mtspr SPRN_DBAT##n##U,RA; \
mtspr SPRN_DBAT##n##L,RB
-#endif
__HEAD
.stabs "arch/powerpc/kernel/",N_SO,0,0,0f
- .stabs "head_32.S",N_SO,0,0,0f
+ .stabs "head_book3s_32.S",N_SO,0,0,0f
0:
_ENTRY(_stext);
@@ -166,9 +155,9 @@ __after_mmu_off:
bl initial_bats
bl load_segment_registers
-#ifdef CONFIG_KASAN
+BEGIN_MMU_FTR_SECTION
bl early_hash_table
-#endif
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
#if defined(CONFIG_BOOTX_TEXT)
bl setup_disp_bat
#endif
@@ -185,10 +174,8 @@ __after_mmu_off:
bl reloc_offset
li r24,0 /* cpu# */
bl call_setup_cpu /* Call setup_cpu for this CPU */
-#ifdef CONFIG_PPC_BOOK3S_32
bl reloc_offset
bl init_idle_6xx
-#endif /* CONFIG_PPC_BOOK3S_32 */
/*
@@ -219,7 +206,6 @@ turn_on_mmu:
lis r0,start_here@h
ori r0,r0,start_here@l
mtspr SPRN_SRR0,r0
- SYNC
RFI /* enables MMU */
/*
@@ -274,14 +260,8 @@ __secondary_hold_acknowledge:
DO_KVM 0x200
MachineCheck:
EXCEPTION_PROLOG_0
-#ifdef CONFIG_VMAP_STACK
- li r11, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */
- mtmsr r11
- isync
-#endif
#ifdef CONFIG_PPC_CHRP
mfspr r11, SPRN_SPRG_THREAD
- tovirt_vmstack r11, r11
lwz r11, RTAS_SP(r11)
cmpwi cr1, r11, 0
bne cr1, 7f
@@ -439,7 +419,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE)
SystemCall:
SYSCALL_ENTRY 0xc00
-/* Single step - not used on 601 */
EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD)
EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_STD)
@@ -790,14 +769,12 @@ fast_hash_page_return:
mtcr r11
lwz r11, THR11(r10)
mfspr r10, SPRN_SPRG_SCRATCH0
- SYNC
RFI
1: /* ISI */
mtcr r11
mfspr r11, SPRN_SPRG_SCRATCH1
mfspr r10, SPRN_SPRG_SCRATCH0
- SYNC
RFI
stack_overflow:
@@ -888,7 +865,6 @@ __secondary_start_pmac_0:
set to map the 0xf0000000 - 0xffffffff region */
mfmsr r0
rlwinm r0,r0,0,28,26 /* clear DR (0x10) */
- SYNC
mtmsr r0
isync
@@ -900,10 +876,8 @@ __secondary_start:
lis r3,-KERNELBASE@h
mr r4,r24
bl call_setup_cpu /* Call setup_cpu for this CPU */
-#ifdef CONFIG_PPC_BOOK3S_32
lis r3,-KERNELBASE@h
bl init_idle_6xx
-#endif /* CONFIG_PPC_BOOK3S_32 */
/* get current's stack and current */
lis r2,secondary_current@ha
@@ -936,7 +910,6 @@ __secondary_start:
ori r3,r3,start_secondary@l
mtspr SPRN_SRR0,r3
mtspr SPRN_SRR1,r4
- SYNC
RFI
#endif /* CONFIG_SMP */
@@ -945,21 +918,9 @@ __secondary_start:
#endif
/*
- * Those generic dummy functions are kept for CPUs not
- * included in CONFIG_PPC_BOOK3S_32
- */
-#if !defined(CONFIG_PPC_BOOK3S_32)
-_ENTRY(__save_cpu_setup)
- blr
-_ENTRY(__restore_cpu_setup)
- blr
-#endif /* !defined(CONFIG_PPC_BOOK3S_32) */
-
-/*
* Load stuff into the MMU. Intended to be called with
* IR=0 and DR=0.
*/
-#ifdef CONFIG_KASAN
early_hash_table:
sync /* Force all PTE updates to finish */
isync
@@ -970,8 +931,10 @@ early_hash_table:
lis r6, early_hash - PAGE_OFFSET@h
ori r6, r6, 3 /* 256kB table */
mtspr SPRN_SDR1, r6
+ lis r6, early_hash@h
+ lis r3, Hash@ha
+ stw r6, Hash@l(r3)
blr
-#endif
load_up_mmu:
sync /* Force all PTE updates to finish */
@@ -985,8 +948,7 @@ load_up_mmu:
lwz r6,_SDR1@l(r6)
mtspr SPRN_SDR1,r6
-/* Load the BAT registers with the values set up by MMU_init.
- MMU_init takes care of whether we're on a 601 or not. */
+/* Load the BAT registers with the values set up by MMU_init. */
lis r3,BATS@ha
addi r3,r3,BATS@l
tophys(r3,r3)
@@ -1002,7 +964,7 @@ BEGIN_MMU_FTR_SECTION
END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
blr
-load_segment_registers:
+_GLOBAL(load_segment_registers)
li r0, NUM_USER_SEGMENTS /* load up user segment register values */
mtctr r0 /* for context 0 */
li r3, 0 /* Kp = 0, Ks = 0, VSID = 0 */
@@ -1061,11 +1023,7 @@ start_here:
bl machine_init
bl __save_cpu_setup
bl MMU_init
-#ifdef CONFIG_KASAN
-BEGIN_MMU_FTR_SECTION
bl MMU_init_hw_patch
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
-#endif
/*
* Go back to running unmapped so we can load up new values
@@ -1080,7 +1038,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
.align 4
mtspr SPRN_SRR0,r4
mtspr SPRN_SRR1,r3
- SYNC
RFI
/* Load up the kernel context */
2: bl load_up_mmu
@@ -1092,7 +1049,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
*/
lis r5, abatron_pteptrs@h
ori r5, r5, abatron_pteptrs@l
- stw r5, 0xf0(r0) /* This much match your Abatron config */
+ stw r5, 0xf0(0) /* This much match your Abatron config */
lis r6, swapper_pg_dir@h
ori r6, r6, swapper_pg_dir@l
tophys(r5, r5)
@@ -1105,7 +1062,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
ori r3,r3,start_kernel@l
mtspr SPRN_SRR0,r3
mtspr SPRN_SRR1,r4
- SYNC
RFI
/*
@@ -1165,7 +1121,6 @@ EXPORT_SYMBOL(switch_mmu_context)
clear_bats:
li r10,0
-#ifndef CONFIG_PPC_BOOK3S_601
mtspr SPRN_DBAT0U,r10
mtspr SPRN_DBAT0L,r10
mtspr SPRN_DBAT1U,r10
@@ -1174,7 +1129,6 @@ clear_bats:
mtspr SPRN_DBAT2L,r10
mtspr SPRN_DBAT3U,r10
mtspr SPRN_DBAT3L,r10
-#endif
mtspr SPRN_IBAT0U,r10
mtspr SPRN_IBAT0L,r10
mtspr SPRN_IBAT1U,r10
@@ -1223,7 +1177,6 @@ _ENTRY(update_bats)
.align 4
mtspr SPRN_SRR0, r4
mtspr SPRN_SRR1, r3
- SYNC
RFI
1: bl clear_bats
lis r3, BATS@ha
@@ -1243,7 +1196,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
mtmsr r3
mtspr SPRN_SRR0, r7
mtspr SPRN_SRR1, r6
- SYNC
RFI
flush_tlbs:
@@ -1267,26 +1219,9 @@ mmu_off:
sync
RFI
-/*
- * On 601, we use 3 BATs to map up to 24M of RAM at _PAGE_OFFSET
- * (we keep one for debugging) and on others, we use one 256M BAT.
- */
+/* We use one BAT to map up to 256M of RAM at _PAGE_OFFSET */
initial_bats:
lis r11,PAGE_OFFSET@h
-#ifdef CONFIG_PPC_BOOK3S_601
- ori r11,r11,4 /* set up BAT registers for 601 */
- li r8,0x7f /* valid, block length = 8MB */
- mtspr SPRN_IBAT0U,r11 /* N.B. 601 has valid bit in */
- mtspr SPRN_IBAT0L,r8 /* lower BAT register */
- addis r11,r11,0x800000@h
- addis r8,r8,0x800000@h
- mtspr SPRN_IBAT1U,r11
- mtspr SPRN_IBAT1L,r8
- addis r11,r11,0x800000@h
- addis r8,r8,0x800000@h
- mtspr SPRN_IBAT2U,r11
- mtspr SPRN_IBAT2L,r8
-#else
tophys(r8,r11)
#ifdef CONFIG_SMP
ori r8,r8,0x12 /* R/W access, M=1 */
@@ -1295,11 +1230,10 @@ initial_bats:
#endif /* CONFIG_SMP */
ori r11,r11,BL_256M<<2|0x2 /* set up BAT registers for 604 */
- mtspr SPRN_DBAT0L,r8 /* N.B. 6xx (not 601) have valid */
+ mtspr SPRN_DBAT0L,r8 /* N.B. 6xx have valid */
mtspr SPRN_DBAT0U,r11 /* bit in upper BAT register */
mtspr SPRN_IBAT0L,r8
mtspr SPRN_IBAT0U,r11
-#endif
isync
blr
@@ -1317,13 +1251,8 @@ setup_disp_bat:
beqlr
lwz r11,0(r8)
lwz r8,4(r8)
-#ifndef CONFIG_PPC_BOOK3S_601
mtspr SPRN_DBAT3L,r8
mtspr SPRN_DBAT3U,r11
-#else
- mtspr SPRN_IBAT3L,r8
- mtspr SPRN_IBAT3U,r11
-#endif
blr
#endif /* CONFIG_BOOTX_TEXT */
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 18f87bf9e32b..71c359d438b5 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -176,7 +176,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
#endif
mtspr SPRN_SRR1,r10
mtspr SPRN_SRR0,r11
- SYNC
RFI /* jump to handler, enable MMU */
99: b ret_from_kernel_syscall
.endm
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 1f4a1efa0074..f4e8f21046f5 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -494,151 +494,6 @@ reset:
}
}
-static bool dar_in_user_range(unsigned long dar, struct arch_hw_breakpoint *info)
-{
- return ((info->address <= dar) && (dar - info->address < info->len));
-}
-
-static bool ea_user_range_overlaps(unsigned long ea, int size,
- struct arch_hw_breakpoint *info)
-{
- return ((ea < info->address + info->len) &&
- (ea + size > info->address));
-}
-
-static bool dar_in_hw_range(unsigned long dar, struct arch_hw_breakpoint *info)
-{
- unsigned long hw_start_addr, hw_end_addr;
-
- hw_start_addr = ALIGN_DOWN(info->address, HW_BREAKPOINT_SIZE);
- hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE);
-
- return ((hw_start_addr <= dar) && (hw_end_addr > dar));
-}
-
-static bool ea_hw_range_overlaps(unsigned long ea, int size,
- struct arch_hw_breakpoint *info)
-{
- unsigned long hw_start_addr, hw_end_addr;
-
- hw_start_addr = ALIGN_DOWN(info->address, HW_BREAKPOINT_SIZE);
- hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE);
-
- return ((ea < hw_end_addr) && (ea + size > hw_start_addr));
-}
-
-/*
- * If hw has multiple DAWR registers, we also need to check all
- * dawrx constraint bits to confirm this is _really_ a valid event.
- * If type is UNKNOWN, but privilege level matches, consider it as
- * a positive match.
- */
-static bool check_dawrx_constraints(struct pt_regs *regs, int type,
- struct arch_hw_breakpoint *info)
-{
- if (OP_IS_LOAD(type) && !(info->type & HW_BRK_TYPE_READ))
- return false;
-
- /*
- * The Cache Management instructions other than dcbz never
- * cause a match. i.e. if type is CACHEOP, the instruction
- * is dcbz, and dcbz is treated as Store.
- */
- if ((OP_IS_STORE(type) || type == CACHEOP) && !(info->type & HW_BRK_TYPE_WRITE))
- return false;
-
- if (is_kernel_addr(regs->nip) && !(info->type & HW_BRK_TYPE_KERNEL))
- return false;
-
- if (user_mode(regs) && !(info->type & HW_BRK_TYPE_USER))
- return false;
-
- return true;
-}
-
-/*
- * Return true if the event is valid wrt dawr configuration,
- * including extraneous exception. Otherwise return false.
- */
-static bool check_constraints(struct pt_regs *regs, struct ppc_inst instr,
- unsigned long ea, int type, int size,
- struct arch_hw_breakpoint *info)
-{
- bool in_user_range = dar_in_user_range(regs->dar, info);
- bool dawrx_constraints;
-
- /*
- * 8xx supports only one breakpoint and thus we can
- * unconditionally return true.
- */
- if (IS_ENABLED(CONFIG_PPC_8xx)) {
- if (!in_user_range)
- info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
- return true;
- }
-
- if (unlikely(ppc_inst_equal(instr, ppc_inst(0)))) {
- if (cpu_has_feature(CPU_FTR_ARCH_31) &&
- !dar_in_hw_range(regs->dar, info))
- return false;
-
- return true;
- }
-
- dawrx_constraints = check_dawrx_constraints(regs, type, info);
-
- if (type == UNKNOWN) {
- if (cpu_has_feature(CPU_FTR_ARCH_31) &&
- !dar_in_hw_range(regs->dar, info))
- return false;
-
- return dawrx_constraints;
- }
-
- if (ea_user_range_overlaps(ea, size, info))
- return dawrx_constraints;
-
- if (ea_hw_range_overlaps(ea, size, info)) {
- if (dawrx_constraints) {
- info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
- return true;
- }
- }
- return false;
-}
-
-static int cache_op_size(void)
-{
-#ifdef __powerpc64__
- return ppc64_caches.l1d.block_size;
-#else
- return L1_CACHE_BYTES;
-#endif
-}
-
-static void get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr,
- int *type, int *size, unsigned long *ea)
-{
- struct instruction_op op;
-
- if (__get_user_instr_inatomic(*instr, (void __user *)regs->nip))
- return;
-
- analyse_instr(&op, regs, *instr);
- *type = GETTYPE(op.type);
- *ea = op.ea;
-#ifdef __powerpc64__
- if (!(regs->msr & MSR_64BIT))
- *ea &= 0xffffffffUL;
-#endif
-
- *size = GETSIZE(op.type);
- if (*type == CACHEOP) {
- *size = cache_op_size();
- *ea &= ~(*size - 1);
- }
-}
-
static bool is_larx_stcx_instr(int type)
{
return type == LARX || type == STCX;
@@ -722,7 +577,7 @@ int hw_breakpoint_handler(struct die_args *args)
rcu_read_lock();
if (!IS_ENABLED(CONFIG_PPC_8xx))
- get_instr_detail(regs, &instr, &type, &size, &ea);
+ wp_get_instr_detail(regs, &instr, &type, &size, &ea);
for (i = 0; i < nr_wp_slots(); i++) {
bp[i] = __this_cpu_read(bp_per_reg[i]);
@@ -732,7 +587,7 @@ int hw_breakpoint_handler(struct die_args *args)
info[i] = counter_arch_bp(bp[i]);
info[i]->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ;
- if (check_constraints(regs, instr, ea, type, size, info[i])) {
+ if (wp_check_constraints(regs, instr, ea, type, size, info[i])) {
if (!IS_ENABLED(CONFIG_PPC_8xx) &&
ppc_inst_equal(instr, ppc_inst(0))) {
handler_error(bp[i], info[i]);
diff --git a/arch/powerpc/kernel/hw_breakpoint_constraints.c b/arch/powerpc/kernel/hw_breakpoint_constraints.c
new file mode 100644
index 000000000000..867ee4aa026a
--- /dev/null
+++ b/arch/powerpc/kernel/hw_breakpoint_constraints.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0+
+#include <linux/kernel.h>
+#include <linux/uaccess.h>
+#include <linux/sched.h>
+#include <asm/hw_breakpoint.h>
+#include <asm/sstep.h>
+#include <asm/cache.h>
+
+static bool dar_in_user_range(unsigned long dar, struct arch_hw_breakpoint *info)
+{
+ return ((info->address <= dar) && (dar - info->address < info->len));
+}
+
+static bool ea_user_range_overlaps(unsigned long ea, int size,
+ struct arch_hw_breakpoint *info)
+{
+ return ((ea < info->address + info->len) &&
+ (ea + size > info->address));
+}
+
+static bool dar_in_hw_range(unsigned long dar, struct arch_hw_breakpoint *info)
+{
+ unsigned long hw_start_addr, hw_end_addr;
+
+ hw_start_addr = ALIGN_DOWN(info->address, HW_BREAKPOINT_SIZE);
+ hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE);
+
+ return ((hw_start_addr <= dar) && (hw_end_addr > dar));
+}
+
+static bool ea_hw_range_overlaps(unsigned long ea, int size,
+ struct arch_hw_breakpoint *info)
+{
+ unsigned long hw_start_addr, hw_end_addr;
+ unsigned long align_size = HW_BREAKPOINT_SIZE;
+
+ /*
+ * On p10 predecessors, quadword is handle differently then
+ * other instructions.
+ */
+ if (!cpu_has_feature(CPU_FTR_ARCH_31) && size == 16)
+ align_size = HW_BREAKPOINT_SIZE_QUADWORD;
+
+ hw_start_addr = ALIGN_DOWN(info->address, align_size);
+ hw_end_addr = ALIGN(info->address + info->len, align_size);
+
+ return ((ea < hw_end_addr) && (ea + size > hw_start_addr));
+}
+
+/*
+ * If hw has multiple DAWR registers, we also need to check all
+ * dawrx constraint bits to confirm this is _really_ a valid event.
+ * If type is UNKNOWN, but privilege level matches, consider it as
+ * a positive match.
+ */
+static bool check_dawrx_constraints(struct pt_regs *regs, int type,
+ struct arch_hw_breakpoint *info)
+{
+ if (OP_IS_LOAD(type) && !(info->type & HW_BRK_TYPE_READ))
+ return false;
+
+ /*
+ * The Cache Management instructions other than dcbz never
+ * cause a match. i.e. if type is CACHEOP, the instruction
+ * is dcbz, and dcbz is treated as Store.
+ */
+ if ((OP_IS_STORE(type) || type == CACHEOP) && !(info->type & HW_BRK_TYPE_WRITE))
+ return false;
+
+ if (is_kernel_addr(regs->nip) && !(info->type & HW_BRK_TYPE_KERNEL))
+ return false;
+
+ if (user_mode(regs) && !(info->type & HW_BRK_TYPE_USER))
+ return false;
+
+ return true;
+}
+
+/*
+ * Return true if the event is valid wrt dawr configuration,
+ * including extraneous exception. Otherwise return false.
+ */
+bool wp_check_constraints(struct pt_regs *regs, struct ppc_inst instr,
+ unsigned long ea, int type, int size,
+ struct arch_hw_breakpoint *info)
+{
+ bool in_user_range = dar_in_user_range(regs->dar, info);
+ bool dawrx_constraints;
+
+ /*
+ * 8xx supports only one breakpoint and thus we can
+ * unconditionally return true.
+ */
+ if (IS_ENABLED(CONFIG_PPC_8xx)) {
+ if (!in_user_range)
+ info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
+ return true;
+ }
+
+ if (unlikely(ppc_inst_equal(instr, ppc_inst(0)))) {
+ if (cpu_has_feature(CPU_FTR_ARCH_31) &&
+ !dar_in_hw_range(regs->dar, info))
+ return false;
+
+ return true;
+ }
+
+ dawrx_constraints = check_dawrx_constraints(regs, type, info);
+
+ if (type == UNKNOWN) {
+ if (cpu_has_feature(CPU_FTR_ARCH_31) &&
+ !dar_in_hw_range(regs->dar, info))
+ return false;
+
+ return dawrx_constraints;
+ }
+
+ if (ea_user_range_overlaps(ea, size, info))
+ return dawrx_constraints;
+
+ if (ea_hw_range_overlaps(ea, size, info)) {
+ if (dawrx_constraints) {
+ info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
+ return true;
+ }
+ }
+ return false;
+}
+
+static int cache_op_size(void)
+{
+#ifdef __powerpc64__
+ return ppc64_caches.l1d.block_size;
+#else
+ return L1_CACHE_BYTES;
+#endif
+}
+
+void wp_get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr,
+ int *type, int *size, unsigned long *ea)
+{
+ struct instruction_op op;
+
+ if (__get_user_instr_inatomic(*instr, (void __user *)regs->nip))
+ return;
+
+ analyse_instr(&op, regs, *instr);
+ *type = GETTYPE(op.type);
+ *ea = op.ea;
+#ifdef __powerpc64__
+ if (!(regs->msr & MSR_64BIT))
+ *ea &= 0xffffffffUL;
+#endif
+
+ *size = GETSIZE(op.type);
+ if (*type == CACHEOP) {
+ *size = cache_op_size();
+ *ea &= ~(*size - 1);
+ } else if (*type == LOAD_VMX || *type == STORE_VMX) {
+ *ea &= ~(*size - 1);
+ }
+}
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index 422e31d2f5a2..ae0e2632393d 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -41,14 +41,6 @@ static int __init powersave_off(char *arg)
}
__setup("powersave=off", powersave_off);
-#ifdef CONFIG_HOTPLUG_CPU
-void arch_cpu_idle_dead(void)
-{
- sched_preempt_enable_no_resched();
- cpu_die();
-}
-#endif
-
void arch_cpu_idle(void)
{
ppc64_runlatch_off();
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index bf21ebd36190..7d0f7682d01d 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -104,7 +104,7 @@ static inline notrace unsigned long get_irq_happened(void)
static inline notrace int decrementer_check_overflow(void)
{
- u64 now = get_tb_or_rtc();
+ u64 now = get_tb();
u64 *next_tb = this_cpu_ptr(&decrementers_next_tb);
return now >= *next_tb;
@@ -113,7 +113,7 @@ static inline notrace int decrementer_check_overflow(void)
#ifdef CONFIG_PPC_BOOK3E
/* This is called whenever we are re-enabling interrupts
- * and returns either 0 (nothing to do) or 500/900/280/a00/e80 if
+ * and returns either 0 (nothing to do) or 500/900/280 if
* there's an EE, DEC or DBELL to generate.
*
* This is called in two contexts: From arch_local_irq_restore()
@@ -181,16 +181,6 @@ notrace unsigned int __check_irq_replay(void)
return 0x500;
}
- /*
- * Check if an EPR external interrupt happened this bit is typically
- * set if we need to handle another "edge" interrupt from within the
- * MPIC "EPR" handler.
- */
- if (happened & PACA_IRQ_EE_EDGE) {
- local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE;
- return 0x500;
- }
-
if (happened & PACA_IRQ_DBELL) {
local_paca->irq_happened &= ~PACA_IRQ_DBELL;
return 0x280;
@@ -201,6 +191,25 @@ notrace unsigned int __check_irq_replay(void)
return 0;
}
+
+/*
+ * This is specifically called by assembly code to re-enable interrupts
+ * if they are currently disabled. This is typically called before
+ * schedule() or do_signal() when returning to userspace. We do it
+ * in C to avoid the burden of dealing with lockdep etc...
+ *
+ * NOTE: This is called with interrupts hard disabled but not marked
+ * as such in paca->irq_happened, so we need to resync this.
+ */
+void notrace restore_interrupts(void)
+{
+ if (irqs_disabled()) {
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+ local_irq_enable();
+ } else
+ __hard_irq_enable();
+}
+
#endif /* CONFIG_PPC_BOOK3E */
void replay_soft_interrupts(void)
@@ -214,7 +223,7 @@ void replay_soft_interrupts(void)
struct pt_regs regs;
ppc_save_regs(&regs);
- regs.softe = IRQS_ALL_DISABLED;
+ regs.softe = IRQS_ENABLED;
again:
if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
@@ -270,19 +279,6 @@ again:
hard_irq_disable();
}
- /*
- * Check if an EPR external interrupt happened this bit is typically
- * set if we need to handle another "edge" interrupt from within the
- * MPIC "EPR" handler.
- */
- if (IS_ENABLED(CONFIG_PPC_BOOK3E) && (happened & PACA_IRQ_EE_EDGE)) {
- local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE;
- regs.trap = 0x500;
- do_IRQ(&regs);
- if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS))
- hard_irq_disable();
- }
-
if (IS_ENABLED(CONFIG_PPC_DOORBELL) && (happened & PACA_IRQ_DBELL)) {
local_paca->irq_happened &= ~PACA_IRQ_DBELL;
if (IS_ENABLED(CONFIG_PPC_BOOK3E))
@@ -368,6 +364,12 @@ notrace void arch_local_irq_restore(unsigned long mask)
}
}
+ /*
+ * Disable preempt here, so that the below preempt_enable will
+ * perform resched if required (a replayed interrupt may set
+ * need_resched).
+ */
+ preempt_disable();
irq_soft_mask_set(IRQS_ALL_DISABLED);
trace_hardirqs_off();
@@ -377,28 +379,11 @@ notrace void arch_local_irq_restore(unsigned long mask)
trace_hardirqs_on();
irq_soft_mask_set(IRQS_ENABLED);
__hard_irq_enable();
+ preempt_enable();
}
EXPORT_SYMBOL(arch_local_irq_restore);
/*
- * This is specifically called by assembly code to re-enable interrupts
- * if they are currently disabled. This is typically called before
- * schedule() or do_signal() when returning to userspace. We do it
- * in C to avoid the burden of dealing with lockdep etc...
- *
- * NOTE: This is called with interrupts hard disabled but not marked
- * as such in paca->irq_happened, so we need to resync this.
- */
-void notrace restore_interrupts(void)
-{
- if (irqs_disabled()) {
- local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
- local_irq_enable();
- } else
- __hard_irq_enable();
-}
-
-/*
* This is a helper to use when about to go into idle low-power
* when the latter has the side effect of re-enabling interrupts
* (such as calling H_CEDE under pHyp).
diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S
index 5f07aa5e9851..225511d73bef 100644
--- a/arch/powerpc/kernel/l2cr_6xx.S
+++ b/arch/powerpc/kernel/l2cr_6xx.S
@@ -256,7 +256,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450)
sync
/* Restore MSR (restores EE and DR bits to original state) */
- SYNC
mtmsr r7
isync
@@ -377,7 +376,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L3CR)
1: bdnz 1b
/* Restore MSR (restores EE and DR bits to original state) */
-4: SYNC
+4:
mtmsr r7
isync
blr
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index b24f866fef81..717e658b90fd 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -215,19 +215,6 @@ _GLOBAL(low_choose_7447a_dfs)
#endif /* CONFIG_CPU_FREQ_PMAC && CONFIG_PPC_BOOK3S_32 */
-/*
- * complement mask on the msr then "or" some values on.
- * _nmask_and_or_msr(nmask, value_to_or)
- */
-_GLOBAL(_nmask_and_or_msr)
- mfmsr r0 /* Get current msr */
- andc r0,r0,r3 /* And off the bits set in r3 (first parm) */
- or r0,r0,r4 /* Or on the bits in r4 (second parm) */
- SYNC /* Some chip revs have problems here... */
- mtmsr r0 /* Update machine state */
- isync
- blr /* Done */
-
#ifdef CONFIG_40x
/*
@@ -268,41 +255,6 @@ _ASM_NOKPROBE_SYMBOL(real_writeb)
#endif /* CONFIG_40x */
-
-/*
- * Flush instruction cache.
- * This is a no-op on the 601.
- */
-#ifndef CONFIG_PPC_8xx
-_GLOBAL(flush_instruction_cache)
-#if defined(CONFIG_4xx)
- lis r3, KERNELBASE@h
- iccci 0,r3
-#elif defined(CONFIG_FSL_BOOKE)
-#ifdef CONFIG_E200
- mfspr r3,SPRN_L1CSR0
- ori r3,r3,L1CSR0_CFI|L1CSR0_CLFC
- /* msync; isync recommended here */
- mtspr SPRN_L1CSR0,r3
- isync
- blr
-#endif
- mfspr r3,SPRN_L1CSR1
- ori r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR
- mtspr SPRN_L1CSR1,r3
-#elif defined(CONFIG_PPC_BOOK3S_601)
- blr /* for 601, do nothing */
-#else
- /* 603/604 processor - use invalidate-all bit in HID0 */
- mfspr r3,SPRN_HID0
- ori r3,r3,HID0_ICFI
- mtspr SPRN_HID0,r3
-#endif /* CONFIG_4xx */
- isync
- blr
-EXPORT_SYMBOL(flush_instruction_cache)
-#endif /* CONFIG_PPC_8xx */
-
/*
* Copy a whole page. We use the dcbz instruction on the destination
* to reduce memory traffic (it eliminates the unnecessary reads of
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 7bb46ad98207..070465825c21 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -365,7 +365,6 @@ _GLOBAL(kexec_smp_wait)
li r4,KEXEC_STATE_REAL_MODE
stb r4,PACAKEXECSTATE(r13)
- SYNC
b kexec_wait
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 73a57043ee66..d421a2c7f822 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -124,10 +124,8 @@ unsigned long notrace msr_check_and_set(unsigned long bits)
newmsr = oldmsr | bits;
-#ifdef CONFIG_VSX
if (cpu_has_feature(CPU_FTR_VSX) && (bits & MSR_FP))
newmsr |= MSR_VSX;
-#endif
if (oldmsr != newmsr)
mtmsr_isync(newmsr);
@@ -144,10 +142,8 @@ void notrace __msr_check_and_clear(unsigned long bits)
newmsr = oldmsr & ~bits;
-#ifdef CONFIG_VSX
if (cpu_has_feature(CPU_FTR_VSX) && (bits & MSR_FP))
newmsr &= ~MSR_VSX;
-#endif
if (oldmsr != newmsr)
mtmsr_isync(newmsr);
@@ -162,10 +158,8 @@ static void __giveup_fpu(struct task_struct *tsk)
save_fpu(tsk);
msr = tsk->thread.regs->msr;
msr &= ~(MSR_FP|MSR_FE0|MSR_FE1);
-#ifdef CONFIG_VSX
if (cpu_has_feature(CPU_FTR_VSX))
msr &= ~MSR_VSX;
-#endif
tsk->thread.regs->msr = msr;
}
@@ -235,6 +229,8 @@ void enable_kernel_fp(void)
}
}
EXPORT_SYMBOL(enable_kernel_fp);
+#else
+static inline void __giveup_fpu(struct task_struct *tsk) { }
#endif /* CONFIG_PPC_FPU */
#ifdef CONFIG_ALTIVEC
@@ -245,10 +241,8 @@ static void __giveup_altivec(struct task_struct *tsk)
save_altivec(tsk);
msr = tsk->thread.regs->msr;
msr &= ~MSR_VEC;
-#ifdef CONFIG_VSX
if (cpu_has_feature(CPU_FTR_VSX))
msr &= ~MSR_VSX;
-#endif
tsk->thread.regs->msr = msr;
}
@@ -414,21 +408,14 @@ static unsigned long msr_all_available;
static int __init init_msr_all_available(void)
{
-#ifdef CONFIG_PPC_FPU
- msr_all_available |= MSR_FP;
-#endif
-#ifdef CONFIG_ALTIVEC
+ if (IS_ENABLED(CONFIG_PPC_FPU))
+ msr_all_available |= MSR_FP;
if (cpu_has_feature(CPU_FTR_ALTIVEC))
msr_all_available |= MSR_VEC;
-#endif
-#ifdef CONFIG_VSX
if (cpu_has_feature(CPU_FTR_VSX))
msr_all_available |= MSR_VSX;
-#endif
-#ifdef CONFIG_SPE
if (cpu_has_feature(CPU_FTR_SPE))
msr_all_available |= MSR_SPE;
-#endif
return 0;
}
@@ -452,18 +439,12 @@ void giveup_all(struct task_struct *tsk)
WARN_ON((usermsr & MSR_VSX) && !((usermsr & MSR_FP) && (usermsr & MSR_VEC)));
-#ifdef CONFIG_PPC_FPU
if (usermsr & MSR_FP)
__giveup_fpu(tsk);
-#endif
-#ifdef CONFIG_ALTIVEC
if (usermsr & MSR_VEC)
__giveup_altivec(tsk);
-#endif
-#ifdef CONFIG_SPE
if (usermsr & MSR_SPE)
__giveup_spe(tsk);
-#endif
msr_check_and_clear(msr_all_available);
}
@@ -509,19 +490,18 @@ static bool should_restore_altivec(void) { return false; }
static void do_restore_altivec(void) { }
#endif /* CONFIG_ALTIVEC */
-#ifdef CONFIG_VSX
static bool should_restore_vsx(void)
{
if (cpu_has_feature(CPU_FTR_VSX))
return true;
return false;
}
+#ifdef CONFIG_VSX
static void do_restore_vsx(void)
{
current->thread.used_vsr = 1;
}
#else
-static bool should_restore_vsx(void) { return false; }
static void do_restore_vsx(void) { }
#endif /* CONFIG_VSX */
@@ -581,7 +561,7 @@ void notrace restore_math(struct pt_regs *regs)
regs->msr |= new_msr | fpexc_mode;
}
}
-#endif
+#endif /* CONFIG_PPC_BOOK3S_64 */
static void save_all(struct task_struct *tsk)
{
@@ -642,6 +622,44 @@ void do_send_trap(struct pt_regs *regs, unsigned long address,
(void __user *)address);
}
#else /* !CONFIG_PPC_ADV_DEBUG_REGS */
+
+static void do_break_handler(struct pt_regs *regs)
+{
+ struct arch_hw_breakpoint null_brk = {0};
+ struct arch_hw_breakpoint *info;
+ struct ppc_inst instr = ppc_inst(0);
+ int type = 0;
+ int size = 0;
+ unsigned long ea;
+ int i;
+
+ /*
+ * If underneath hw supports only one watchpoint, we know it
+ * caused exception. 8xx also falls into this category.
+ */
+ if (nr_wp_slots() == 1) {
+ __set_breakpoint(0, &null_brk);
+ current->thread.hw_brk[0] = null_brk;
+ current->thread.hw_brk[0].flags |= HW_BRK_FLAG_DISABLED;
+ return;
+ }
+
+ /* Otherwise findout which DAWR caused exception and disable it. */
+ wp_get_instr_detail(regs, &instr, &type, &size, &ea);
+
+ for (i = 0; i < nr_wp_slots(); i++) {
+ info = &current->thread.hw_brk[i];
+ if (!info->address)
+ continue;
+
+ if (wp_check_constraints(regs, instr, ea, type, size, info)) {
+ __set_breakpoint(i, &null_brk);
+ current->thread.hw_brk[i] = null_brk;
+ current->thread.hw_brk[i].flags |= HW_BRK_FLAG_DISABLED;
+ }
+ }
+}
+
void do_break (struct pt_regs *regs, unsigned long address,
unsigned long error_code)
{
@@ -653,6 +671,16 @@ void do_break (struct pt_regs *regs, unsigned long address,
if (debugger_break_match(regs))
return;
+ /*
+ * We reach here only when watchpoint exception is generated by ptrace
+ * event (or hw is buggy!). Now if CONFIG_HAVE_HW_BREAKPOINT is set,
+ * watchpoint is already handled by hw_breakpoint_handler() so we don't
+ * have to do anything. But when CONFIG_HAVE_HW_BREAKPOINT is not set,
+ * we need to manually handle the watchpoint here.
+ */
+ if (!IS_ENABLED(CONFIG_HAVE_HW_BREAKPOINT))
+ do_break_handler(regs);
+
/* Deliver the signal to userspace */
force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __user *)address);
}
@@ -783,9 +811,8 @@ static void switch_hw_breakpoint(struct task_struct *new)
static inline int __set_dabr(unsigned long dabr, unsigned long dabrx)
{
mtspr(SPRN_DAC1, dabr);
-#ifdef CONFIG_PPC_47x
- isync();
-#endif
+ if (IS_ENABLED(CONFIG_PPC_47x))
+ isync();
return 0;
}
#elif defined(CONFIG_PPC_BOOK3S)
@@ -1256,15 +1283,17 @@ struct task_struct *__switch_to(struct task_struct *prev,
restore_math(current->thread.regs);
/*
- * The copy-paste buffer can only store into foreign real
- * addresses, so unprivileged processes can not see the
- * data or use it in any way unless they have foreign real
- * mappings. If the new process has the foreign real address
- * mappings, we must issue a cp_abort to clear any state and
- * prevent snooping, corruption or a covert channel.
+ * On POWER9 the copy-paste buffer can only paste into
+ * foreign real addresses, so unprivileged processes can not
+ * see the data or use it in any way unless they have
+ * foreign real mappings. If the new process has the foreign
+ * real address mappings, we must issue a cp_abort to clear
+ * any state and prevent snooping, corruption or a covert
+ * channel. ISA v3.1 supports paste into local memory.
*/
if (current->mm &&
- atomic_read(&current->mm->context.vas_windows))
+ (cpu_has_feature(CPU_FTR_ARCH_31) ||
+ atomic_read(&current->mm->context.vas_windows)))
asm volatile(PPC_CP_ABORT);
}
#endif /* CONFIG_PPC_BOOK3S_64 */
@@ -1453,12 +1482,13 @@ void show_regs(struct pt_regs * regs)
trap = TRAP(regs);
if (!trap_is_syscall(regs) && cpu_has_feature(CPU_FTR_CFAR))
pr_cont("CFAR: "REG" ", regs->orig_gpr3);
- if (trap == 0x200 || trap == 0x300 || trap == 0x600)
-#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
- pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr);
-#else
- pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr);
-#endif
+ if (trap == 0x200 || trap == 0x300 || trap == 0x600) {
+ if (IS_ENABLED(CONFIG_4xx) || IS_ENABLED(CONFIG_BOOKE))
+ pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr);
+ else
+ pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr);
+ }
+
#ifdef CONFIG_PPC64
pr_cont("IRQMASK: %lx ", regs->softe);
#endif
@@ -1475,14 +1505,14 @@ void show_regs(struct pt_regs * regs)
break;
}
pr_cont("\n");
-#ifdef CONFIG_KALLSYMS
/*
* Lookup NIP late so we have the best change of getting the
* above info out without failing
*/
- printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip);
- printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link);
-#endif
+ if (IS_ENABLED(CONFIG_KALLSYMS)) {
+ printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip);
+ printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link);
+ }
show_stack(current, (unsigned long *) regs->gpr[1], KERN_DEFAULT);
if (!user_mode(regs))
show_instructions(regs);
@@ -1731,11 +1761,9 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
#ifdef CONFIG_PPC64
unsigned long load_addr = regs->gpr[2]; /* saved by ELF_PLAT_INIT */
-#ifdef CONFIG_PPC_BOOK3S_64
- if (!radix_enabled())
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !radix_enabled())
preload_new_slb_context(start, sp);
#endif
-#endif
/*
* If we exec out of a kernel thread then thread.regs will not be
@@ -1866,7 +1894,6 @@ int set_fpexc_mode(struct task_struct *tsk, unsigned int val)
* fpexc_mode. fpexc_mode is also used for setting FP exception
* mode (asyn, precise, disabled) for 'Classic' FP. */
if (val & PR_FP_EXC_SW_ENABLE) {
-#ifdef CONFIG_SPE
if (cpu_has_feature(CPU_FTR_SPE)) {
/*
* When the sticky exception bits are set
@@ -1880,16 +1907,15 @@ int set_fpexc_mode(struct task_struct *tsk, unsigned int val)
* anyway to restore the prctl settings from
* the saved environment.
*/
+#ifdef CONFIG_SPE
tsk->thread.spefscr_last = mfspr(SPRN_SPEFSCR);
tsk->thread.fpexc_mode = val &
(PR_FP_EXC_SW_ENABLE | PR_FP_ALL_EXCEPT);
+#endif
return 0;
} else {
return -EINVAL;
}
-#else
- return -EINVAL;
-#endif
}
/* on a CONFIG_SPE this does not hurt us. The bits that
@@ -1908,10 +1934,9 @@ int set_fpexc_mode(struct task_struct *tsk, unsigned int val)
int get_fpexc_mode(struct task_struct *tsk, unsigned long adr)
{
- unsigned int val;
+ unsigned int val = 0;
- if (tsk->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE)
-#ifdef CONFIG_SPE
+ if (tsk->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) {
if (cpu_has_feature(CPU_FTR_SPE)) {
/*
* When the sticky exception bits are set
@@ -1925,15 +1950,15 @@ int get_fpexc_mode(struct task_struct *tsk, unsigned long adr)
* anyway to restore the prctl settings from
* the saved environment.
*/
+#ifdef CONFIG_SPE
tsk->thread.spefscr_last = mfspr(SPRN_SPEFSCR);
val = tsk->thread.fpexc_mode;
+#endif
} else
return -EINVAL;
-#else
- return -EINVAL;
-#endif
- else
+ } else {
val = __unpack_fe01(tsk->thread.fpexc_mode);
+ }
return put_user(val, (unsigned int __user *) adr);
}
@@ -2102,10 +2127,8 @@ void show_stack(struct task_struct *tsk, unsigned long *stack,
unsigned long sp, ip, lr, newsp;
int count = 0;
int firstframe = 1;
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
unsigned long ret_addr;
int ftrace_idx = 0;
-#endif
if (tsk == NULL)
tsk = current;
@@ -2133,12 +2156,10 @@ void show_stack(struct task_struct *tsk, unsigned long *stack,
if (!firstframe || ip != lr) {
printk("%s["REG"] ["REG"] %pS",
loglvl, sp, ip, (void *)ip);
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
ret_addr = ftrace_graph_ret_addr(current,
&ftrace_idx, ip, stack);
if (ret_addr != ip)
pr_cont(" (%pS)", (void *)ret_addr);
-#endif
if (firstframe)
pr_cont(" (unreliable)");
pr_cont("\n");
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index d8a2fb87ba0c..c1545f22c077 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -776,6 +776,11 @@ void __init early_init_devtree(void *params)
limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE);
memblock_enforce_memory_limit(limit);
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_4K_PAGES)
+ if (!early_radix_enabled())
+ memblock_cap_memory_range(0, 1UL << (H_MAX_PHYSMEM_BITS));
+#endif
+
memblock_allow_resize();
memblock_dump_all();
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index ae7ec9903191..5090a5ab54e5 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -2422,10 +2422,19 @@ static void __init prom_check_displays(void)
u32 width, height, pitch, addr;
prom_printf("Setting btext !\n");
- prom_getprop(node, "width", &width, 4);
- prom_getprop(node, "height", &height, 4);
- prom_getprop(node, "linebytes", &pitch, 4);
- prom_getprop(node, "address", &addr, 4);
+
+ if (prom_getprop(node, "width", &width, 4) == PROM_ERROR)
+ return;
+
+ if (prom_getprop(node, "height", &height, 4) == PROM_ERROR)
+ return;
+
+ if (prom_getprop(node, "linebytes", &pitch, 4) == PROM_ERROR)
+ return;
+
+ if (prom_getprop(node, "address", &addr, 4) == PROM_ERROR)
+ return;
+
prom_printf("W=%d H=%d LB=%d addr=0x%x\n",
width, height, pitch, addr);
btext_setup_display(width, height, 8, pitch, addr);
diff --git a/arch/powerpc/kernel/ptrace/ptrace-noadv.c b/arch/powerpc/kernel/ptrace/ptrace-noadv.c
index 697c7e4b5877..aa36fcad36cd 100644
--- a/arch/powerpc/kernel/ptrace/ptrace-noadv.c
+++ b/arch/powerpc/kernel/ptrace/ptrace-noadv.c
@@ -57,6 +57,8 @@ void ppc_gethwdinfo(struct ppc_debug_info *dbginfo)
} else {
dbginfo->features = 0;
}
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ dbginfo->features |= PPC_DEBUG_FEATURE_DATA_BP_ARCH_31;
}
int ptrace_get_debugreg(struct task_struct *child, unsigned long addr,
@@ -217,8 +219,9 @@ long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_inf
return -EIO;
brk.address = ALIGN_DOWN(bp_info->addr, HW_BREAKPOINT_SIZE);
- brk.type = HW_BRK_TYPE_TRANSLATE;
+ brk.type = HW_BRK_TYPE_TRANSLATE | HW_BRK_TYPE_PRIV_ALL;
brk.len = DABR_MAX_LEN;
+ brk.hw_len = DABR_MAX_LEN;
if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
brk.type |= HW_BRK_TYPE_READ;
if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
@@ -286,11 +289,13 @@ long ppc_del_hwdebug(struct task_struct *child, long data)
}
return ret;
#else /* CONFIG_HAVE_HW_BREAKPOINT */
- if (child->thread.hw_brk[data - 1].address == 0)
+ if (!(child->thread.hw_brk[data - 1].flags & HW_BRK_FLAG_DISABLED) &&
+ child->thread.hw_brk[data - 1].address == 0)
return -ENOENT;
child->thread.hw_brk[data - 1].address = 0;
child->thread.hw_brk[data - 1].type = 0;
+ child->thread.hw_brk[data - 1].flags = 0;
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
return 0;
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 806d554ce357..954f41676f69 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -992,6 +992,147 @@ struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log,
return NULL;
}
+#ifdef CONFIG_PPC_RTAS_FILTER
+
+/*
+ * The sys_rtas syscall, as originally designed, allows root to pass
+ * arbitrary physical addresses to RTAS calls. A number of RTAS calls
+ * can be abused to write to arbitrary memory and do other things that
+ * are potentially harmful to system integrity, and thus should only
+ * be used inside the kernel and not exposed to userspace.
+ *
+ * All known legitimate users of the sys_rtas syscall will only ever
+ * pass addresses that fall within the RMO buffer, and use a known
+ * subset of RTAS calls.
+ *
+ * Accordingly, we filter RTAS requests to check that the call is
+ * permitted, and that provided pointers fall within the RMO buffer.
+ * The rtas_filters list contains an entry for each permitted call,
+ * with the indexes of the parameters which are expected to contain
+ * addresses and sizes of buffers allocated inside the RMO buffer.
+ */
+struct rtas_filter {
+ const char *name;
+ int token;
+ /* Indexes into the args buffer, -1 if not used */
+ int buf_idx1;
+ int size_idx1;
+ int buf_idx2;
+ int size_idx2;
+
+ int fixed_size;
+};
+
+static struct rtas_filter rtas_filters[] __ro_after_init = {
+ { "ibm,activate-firmware", -1, -1, -1, -1, -1 },
+ { "ibm,configure-connector", -1, 0, -1, 1, -1, 4096 }, /* Special cased */
+ { "display-character", -1, -1, -1, -1, -1 },
+ { "ibm,display-message", -1, 0, -1, -1, -1 },
+ { "ibm,errinjct", -1, 2, -1, -1, -1, 1024 },
+ { "ibm,close-errinjct", -1, -1, -1, -1, -1 },
+ { "ibm,open-errinct", -1, -1, -1, -1, -1 },
+ { "ibm,get-config-addr-info2", -1, -1, -1, -1, -1 },
+ { "ibm,get-dynamic-sensor-state", -1, 1, -1, -1, -1 },
+ { "ibm,get-indices", -1, 2, 3, -1, -1 },
+ { "get-power-level", -1, -1, -1, -1, -1 },
+ { "get-sensor-state", -1, -1, -1, -1, -1 },
+ { "ibm,get-system-parameter", -1, 1, 2, -1, -1 },
+ { "get-time-of-day", -1, -1, -1, -1, -1 },
+ { "ibm,get-vpd", -1, 0, -1, 1, 2 },
+ { "ibm,lpar-perftools", -1, 2, 3, -1, -1 },
+ { "ibm,platform-dump", -1, 4, 5, -1, -1 },
+ { "ibm,read-slot-reset-state", -1, -1, -1, -1, -1 },
+ { "ibm,scan-log-dump", -1, 0, 1, -1, -1 },
+ { "ibm,set-dynamic-indicator", -1, 2, -1, -1, -1 },
+ { "ibm,set-eeh-option", -1, -1, -1, -1, -1 },
+ { "set-indicator", -1, -1, -1, -1, -1 },
+ { "set-power-level", -1, -1, -1, -1, -1 },
+ { "set-time-for-power-on", -1, -1, -1, -1, -1 },
+ { "ibm,set-system-parameter", -1, 1, -1, -1, -1 },
+ { "set-time-of-day", -1, -1, -1, -1, -1 },
+ { "ibm,suspend-me", -1, -1, -1, -1, -1 },
+ { "ibm,update-nodes", -1, 0, -1, -1, -1, 4096 },
+ { "ibm,update-properties", -1, 0, -1, -1, -1, 4096 },
+ { "ibm,physical-attestation", -1, 0, 1, -1, -1 },
+};
+
+static bool in_rmo_buf(u32 base, u32 end)
+{
+ return base >= rtas_rmo_buf &&
+ base < (rtas_rmo_buf + RTAS_RMOBUF_MAX) &&
+ base <= end &&
+ end >= rtas_rmo_buf &&
+ end < (rtas_rmo_buf + RTAS_RMOBUF_MAX);
+}
+
+static bool block_rtas_call(int token, int nargs,
+ struct rtas_args *args)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) {
+ struct rtas_filter *f = &rtas_filters[i];
+ u32 base, size, end;
+
+ if (token != f->token)
+ continue;
+
+ if (f->buf_idx1 != -1) {
+ base = be32_to_cpu(args->args[f->buf_idx1]);
+ if (f->size_idx1 != -1)
+ size = be32_to_cpu(args->args[f->size_idx1]);
+ else if (f->fixed_size)
+ size = f->fixed_size;
+ else
+ size = 1;
+
+ end = base + size - 1;
+ if (!in_rmo_buf(base, end))
+ goto err;
+ }
+
+ if (f->buf_idx2 != -1) {
+ base = be32_to_cpu(args->args[f->buf_idx2]);
+ if (f->size_idx2 != -1)
+ size = be32_to_cpu(args->args[f->size_idx2]);
+ else if (f->fixed_size)
+ size = f->fixed_size;
+ else
+ size = 1;
+ end = base + size - 1;
+
+ /*
+ * Special case for ibm,configure-connector where the
+ * address can be 0
+ */
+ if (!strcmp(f->name, "ibm,configure-connector") &&
+ base == 0)
+ return false;
+
+ if (!in_rmo_buf(base, end))
+ goto err;
+ }
+
+ return false;
+ }
+
+err:
+ pr_err_ratelimited("sys_rtas: RTAS call blocked - exploit attempt?\n");
+ pr_err_ratelimited("sys_rtas: token=0x%x, nargs=%d (called by %s)\n",
+ token, nargs, current->comm);
+ return true;
+}
+
+#else
+
+static bool block_rtas_call(int token, int nargs,
+ struct rtas_args *args)
+{
+ return false;
+}
+
+#endif /* CONFIG_PPC_RTAS_FILTER */
+
/* We assume to be passed big endian arguments */
SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
{
@@ -1029,6 +1170,9 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
args.rets = &args.args[nargs];
memset(args.rets, 0, nret * sizeof(rtas_arg_t));
+ if (block_rtas_call(token, nargs, &args))
+ return -EINVAL;
+
/* Need to handle ibm,suspend_me call specially */
if (token == ibm_suspend_me_token) {
@@ -1090,6 +1234,9 @@ void __init rtas_initialize(void)
unsigned long rtas_region = RTAS_INSTANTIATE_MAX;
u32 base, size, entry;
int no_base, no_size, no_entry;
+#ifdef CONFIG_PPC_RTAS_FILTER
+ int i;
+#endif
/* Get RTAS dev node and fill up our "rtas" structure with infos
* about it.
@@ -1129,6 +1276,12 @@ void __init rtas_initialize(void)
#ifdef CONFIG_RTAS_ERROR_LOGGING
rtas_last_error_token = rtas_token("rtas-last-error");
#endif
+
+#ifdef CONFIG_PPC_RTAS_FILTER
+ for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) {
+ rtas_filters[i].token = rtas_token(rtas_filters[i].name);
+ }
+#endif
}
int __init early_init_dt_scan_rtas(unsigned long node,
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index c9876aab3142..e4e1a94ccf6a 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -430,30 +430,44 @@ device_initcall(stf_barrier_debugfs_init);
static void update_branch_cache_flush(void)
{
+ u32 *site;
+
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ site = &patch__call_kvm_flush_link_stack;
// This controls the branch from guest_exit_cont to kvm_flush_link_stack
if (link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) {
- patch_instruction_site(&patch__call_kvm_flush_link_stack,
- ppc_inst(PPC_INST_NOP));
+ patch_instruction_site(site, ppc_inst(PPC_INST_NOP));
} else {
// Could use HW flush, but that could also flush count cache
- patch_branch_site(&patch__call_kvm_flush_link_stack,
- (u64)&kvm_flush_link_stack, BRANCH_SET_LINK);
+ patch_branch_site(site, (u64)&kvm_flush_link_stack, BRANCH_SET_LINK);
}
#endif
+ // Patch out the bcctr first, then nop the rest
+ site = &patch__call_flush_branch_caches3;
+ patch_instruction_site(site, ppc_inst(PPC_INST_NOP));
+ site = &patch__call_flush_branch_caches2;
+ patch_instruction_site(site, ppc_inst(PPC_INST_NOP));
+ site = &patch__call_flush_branch_caches1;
+ patch_instruction_site(site, ppc_inst(PPC_INST_NOP));
+
// This controls the branch from _switch to flush_branch_caches
if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE &&
link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) {
- patch_instruction_site(&patch__call_flush_branch_caches,
- ppc_inst(PPC_INST_NOP));
+ // Nothing to be done
+
} else if (count_cache_flush_type == BRANCH_CACHE_FLUSH_HW &&
link_stack_flush_type == BRANCH_CACHE_FLUSH_HW) {
- patch_instruction_site(&patch__call_flush_branch_caches,
- ppc_inst(PPC_INST_BCCTR_FLUSH));
+ // Patch in the bcctr last
+ site = &patch__call_flush_branch_caches1;
+ patch_instruction_site(site, ppc_inst(0x39207fff)); // li r9,0x7fff
+ site = &patch__call_flush_branch_caches2;
+ patch_instruction_site(site, ppc_inst(0x7d2903a6)); // mtctr r9
+ site = &patch__call_flush_branch_caches3;
+ patch_instruction_site(site, ppc_inst(PPC_INST_BCCTR_FLUSH));
+
} else {
- patch_branch_site(&patch__call_flush_branch_caches,
- (u64)&flush_branch_caches, BRANCH_SET_LINK);
+ patch_branch_site(site, (u64)&flush_branch_caches, BRANCH_SET_LINK);
// If we just need to flush the link stack, early return
if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE) {
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 1823706ae076..057d6b8e9bb0 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -223,6 +223,6 @@ __init void initialize_cache_info(void)
dcache_bsize = cur_cpu_spec->dcache_bsize;
icache_bsize = cur_cpu_spec->icache_bsize;
ucache_bsize = 0;
- if (IS_ENABLED(CONFIG_PPC_BOOK3S_601) || IS_ENABLED(CONFIG_E200))
+ if (IS_ENABLED(CONFIG_E200))
ucache_bsize = icache_bsize = dcache_bsize;
}
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 6be430107c6f..bb9cab3641d7 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -66,6 +66,7 @@
#include <asm/feature-fixups.h>
#include <asm/kup.h>
#include <asm/early_ioremap.h>
+#include <asm/pgalloc.h>
#include "setup.h"
@@ -756,17 +757,46 @@ void __init emergency_stack_init(void)
}
#ifdef CONFIG_SMP
-#define PCPU_DYN_SIZE ()
-
-static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
+/**
+ * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
+ * @cpu: cpu to allocate for
+ * @size: size allocation in bytes
+ * @align: alignment
+ *
+ * Allocate @size bytes aligned at @align for cpu @cpu. This wrapper
+ * does the right thing for NUMA regardless of the current
+ * configuration.
+ *
+ * RETURNS:
+ * Pointer to the allocated area on success, NULL on failure.
+ */
+static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
+ size_t align)
{
- return memblock_alloc_try_nid(size, align, __pa(MAX_DMA_ADDRESS),
- MEMBLOCK_ALLOC_ACCESSIBLE,
- early_cpu_to_node(cpu));
+ const unsigned long goal = __pa(MAX_DMA_ADDRESS);
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+ int node = early_cpu_to_node(cpu);
+ void *ptr;
+ if (!node_online(node) || !NODE_DATA(node)) {
+ ptr = memblock_alloc_from(size, align, goal);
+ pr_info("cpu %d has no node %d or node-local memory\n",
+ cpu, node);
+ pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
+ cpu, size, __pa(ptr));
+ } else {
+ ptr = memblock_alloc_try_nid(size, align, goal,
+ MEMBLOCK_ALLOC_ACCESSIBLE, node);
+ pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
+ "%016lx\n", cpu, size, node, __pa(ptr));
+ }
+ return ptr;
+#else
+ return memblock_alloc_from(size, align, goal);
+#endif
}
-static void __init pcpu_fc_free(void *ptr, size_t size)
+static void __init pcpu_free_bootmem(void *ptr, size_t size)
{
memblock_free(__pa(ptr), size);
}
@@ -782,13 +812,58 @@ static int pcpu_cpu_distance(unsigned int from, unsigned int to)
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(__per_cpu_offset);
+static void __init pcpu_populate_pte(unsigned long addr)
+{
+ pgd_t *pgd = pgd_offset_k(addr);
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ p4d = p4d_offset(pgd, addr);
+ if (p4d_none(*p4d)) {
+ pud_t *new;
+
+ new = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE);
+ if (!new)
+ goto err_alloc;
+ p4d_populate(&init_mm, p4d, new);
+ }
+
+ pud = pud_offset(p4d, addr);
+ if (pud_none(*pud)) {
+ pmd_t *new;
+
+ new = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE);
+ if (!new)
+ goto err_alloc;
+ pud_populate(&init_mm, pud, new);
+ }
+
+ pmd = pmd_offset(pud, addr);
+ if (!pmd_present(*pmd)) {
+ pte_t *new;
+
+ new = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE);
+ if (!new)
+ goto err_alloc;
+ pmd_populate_kernel(&init_mm, pmd, new);
+ }
+
+ return;
+
+err_alloc:
+ panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
+ __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+}
+
+
void __init setup_per_cpu_areas(void)
{
const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
size_t atom_size;
unsigned long delta;
unsigned int cpu;
- int rc;
+ int rc = -EINVAL;
/*
* Linear mapping is one of 4K, 1M and 16M. For 4K, no need
@@ -800,8 +875,18 @@ void __init setup_per_cpu_areas(void)
else
atom_size = 1 << 20;
- rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
- pcpu_fc_alloc, pcpu_fc_free);
+ if (pcpu_chosen_fc != PCPU_FC_PAGE) {
+ rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
+ pcpu_alloc_bootmem, pcpu_free_bootmem);
+ if (rc)
+ pr_warn("PERCPU: %s allocator failed (%d), "
+ "falling back to page size\n",
+ pcpu_fc_names[pcpu_chosen_fc], rc);
+ }
+
+ if (rc < 0)
+ rc = pcpu_page_first_chunk(0, pcpu_alloc_bootmem, pcpu_free_bootmem,
+ pcpu_populate_pte);
if (rc < 0)
panic("cannot initialize percpu area (err=%d)", rc);
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 8261999c7d52..0dc1b8591cc8 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -75,17 +75,28 @@ static DEFINE_PER_CPU(int, cpu_state) = { 0 };
struct task_struct *secondary_current;
bool has_big_cores;
+bool coregroup_enabled;
DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
+DEFINE_PER_CPU(cpumask_var_t, cpu_coregroup_map);
EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map);
EXPORT_PER_CPU_SYMBOL(cpu_core_map);
EXPORT_SYMBOL_GPL(has_big_cores);
+enum {
+#ifdef CONFIG_SCHED_SMT
+ smt_idx,
+#endif
+ cache_idx,
+ mc_idx,
+ die_idx,
+};
+
#define MAX_THREAD_LIST_SIZE 8
#define THREAD_GROUP_SHARE_L1 1
struct thread_groups {
@@ -660,6 +671,28 @@ static void set_cpus_unrelated(int i, int j,
#endif
/*
+ * Extends set_cpus_related. Instead of setting one CPU at a time in
+ * dstmask, set srcmask at oneshot. dstmask should be super set of srcmask.
+ */
+static void or_cpumasks_related(int i, int j, struct cpumask *(*srcmask)(int),
+ struct cpumask *(*dstmask)(int))
+{
+ struct cpumask *mask;
+ int k;
+
+ mask = srcmask(j);
+ for_each_cpu(k, srcmask(i))
+ cpumask_or(dstmask(k), dstmask(k), mask);
+
+ if (i == j)
+ return;
+
+ mask = srcmask(i);
+ for_each_cpu(k, srcmask(j))
+ cpumask_or(dstmask(k), dstmask(k), mask);
+}
+
+/*
* parse_thread_groups: Parses the "ibm,thread-groups" device tree
* property for the CPU device node @dn and stores
* the parsed output in the thread_groups
@@ -789,10 +822,6 @@ static int init_cpu_l1_cache_map(int cpu)
if (err)
goto out;
- zalloc_cpumask_var_node(&per_cpu(cpu_l1_cache_map, cpu),
- GFP_KERNEL,
- cpu_to_node(cpu));
-
cpu_group_start = get_cpu_thread_group_start(cpu, &tg);
if (unlikely(cpu_group_start == -1)) {
@@ -801,6 +830,9 @@ static int init_cpu_l1_cache_map(int cpu)
goto out;
}
+ zalloc_cpumask_var_node(&per_cpu(cpu_l1_cache_map, cpu),
+ GFP_KERNEL, cpu_to_node(cpu));
+
for (i = first_thread; i < first_thread + threads_per_core; i++) {
int i_group_start = get_cpu_thread_group_start(i, &tg);
@@ -819,6 +851,74 @@ out:
return err;
}
+static bool shared_caches;
+
+#ifdef CONFIG_SCHED_SMT
+/* cpumask of CPUs with asymmetric SMT dependency */
+static int powerpc_smt_flags(void)
+{
+ int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
+
+ if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
+ printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
+ flags |= SD_ASYM_PACKING;
+ }
+ return flags;
+}
+#endif
+
+/*
+ * P9 has a slightly odd architecture where pairs of cores share an L2 cache.
+ * This topology makes it *much* cheaper to migrate tasks between adjacent cores
+ * since the migrated task remains cache hot. We want to take advantage of this
+ * at the scheduler level so an extra topology level is required.
+ */
+static int powerpc_shared_cache_flags(void)
+{
+ return SD_SHARE_PKG_RESOURCES;
+}
+
+/*
+ * We can't just pass cpu_l2_cache_mask() directly because
+ * returns a non-const pointer and the compiler barfs on that.
+ */
+static const struct cpumask *shared_cache_mask(int cpu)
+{
+ return per_cpu(cpu_l2_cache_map, cpu);
+}
+
+#ifdef CONFIG_SCHED_SMT
+static const struct cpumask *smallcore_smt_mask(int cpu)
+{
+ return cpu_smallcore_mask(cpu);
+}
+#endif
+
+static struct cpumask *cpu_coregroup_mask(int cpu)
+{
+ return per_cpu(cpu_coregroup_map, cpu);
+}
+
+static bool has_coregroup_support(void)
+{
+ return coregroup_enabled;
+}
+
+static const struct cpumask *cpu_mc_mask(int cpu)
+{
+ return cpu_coregroup_mask(cpu);
+}
+
+static struct sched_domain_topology_level powerpc_topology[] = {
+#ifdef CONFIG_SCHED_SMT
+ { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
+#endif
+ { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
+ { cpu_mc_mask, SD_INIT_NAME(MC) },
+ { cpu_cpu_mask, SD_INIT_NAME(DIE) },
+ { NULL, },
+};
+
static int init_big_cores(void)
{
int cpu;
@@ -861,6 +961,11 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
GFP_KERNEL, cpu_to_node(cpu));
zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu),
GFP_KERNEL, cpu_to_node(cpu));
+ if (has_coregroup_support())
+ zalloc_cpumask_var_node(&per_cpu(cpu_coregroup_map, cpu),
+ GFP_KERNEL, cpu_to_node(cpu));
+
+#ifdef CONFIG_NEED_MULTIPLE_NODES
/*
* numa_node_id() works after this.
*/
@@ -869,12 +974,21 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
set_cpu_numa_mem(cpu,
local_memory_node(numa_cpu_lookup_table[cpu]));
}
+#endif
+ /*
+ * cpu_core_map is now more updated and exists only since
+ * its been exported for long. It only will have a snapshot
+ * of cpu_cpu_mask.
+ */
+ cpumask_copy(per_cpu(cpu_core_map, cpu), cpu_cpu_mask(cpu));
}
/* Init the cpumasks so the boot CPU is related to itself */
cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid));
cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid));
- cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));
+
+ if (has_coregroup_support())
+ cpumask_set_cpu(boot_cpuid, cpu_coregroup_mask(boot_cpuid));
init_big_cores();
if (has_big_cores) {
@@ -1126,30 +1240,61 @@ static struct device_node *cpu_to_l2cache(int cpu)
return cache;
}
-static bool update_mask_by_l2(int cpu, struct cpumask *(*mask_fn)(int))
+static bool update_mask_by_l2(int cpu)
{
+ struct cpumask *(*submask_fn)(int) = cpu_sibling_mask;
struct device_node *l2_cache, *np;
+ cpumask_var_t mask;
int i;
l2_cache = cpu_to_l2cache(cpu);
- if (!l2_cache)
+ if (!l2_cache) {
+ struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask;
+
+ /*
+ * If no l2cache for this CPU, assume all siblings to share
+ * cache with this CPU.
+ */
+ if (has_big_cores)
+ sibling_mask = cpu_smallcore_mask;
+
+ for_each_cpu(i, sibling_mask(cpu))
+ set_cpus_related(cpu, i, cpu_l2_cache_mask);
+
return false;
+ }
+
+ alloc_cpumask_var_node(&mask, GFP_KERNEL, cpu_to_node(cpu));
+ cpumask_and(mask, cpu_online_mask, cpu_cpu_mask(cpu));
- for_each_cpu(i, cpu_online_mask) {
+ if (has_big_cores)
+ submask_fn = cpu_smallcore_mask;
+
+ /* Update l2-cache mask with all the CPUs that are part of submask */
+ or_cpumasks_related(cpu, cpu, submask_fn, cpu_l2_cache_mask);
+
+ /* Skip all CPUs already part of current CPU l2-cache mask */
+ cpumask_andnot(mask, mask, cpu_l2_cache_mask(cpu));
+
+ for_each_cpu(i, mask) {
/*
* when updating the marks the current CPU has not been marked
* online, but we need to update the cache masks
*/
np = cpu_to_l2cache(i);
- if (!np)
- continue;
- if (np == l2_cache)
- set_cpus_related(cpu, i, mask_fn);
+ /* Skip all CPUs already part of current CPU l2-cache */
+ if (np == l2_cache) {
+ or_cpumasks_related(cpu, i, submask_fn, cpu_l2_cache_mask);
+ cpumask_andnot(mask, mask, submask_fn(i));
+ } else {
+ cpumask_andnot(mask, mask, cpu_l2_cache_mask(i));
+ }
of_node_put(np);
}
of_node_put(l2_cache);
+ free_cpumask_var(mask);
return true;
}
@@ -1157,59 +1302,75 @@ static bool update_mask_by_l2(int cpu, struct cpumask *(*mask_fn)(int))
#ifdef CONFIG_HOTPLUG_CPU
static void remove_cpu_from_masks(int cpu)
{
+ struct cpumask *(*mask_fn)(int) = cpu_sibling_mask;
int i;
- /* NB: cpu_core_mask is a superset of the others */
- for_each_cpu(i, cpu_core_mask(cpu)) {
- set_cpus_unrelated(cpu, i, cpu_core_mask);
+ if (shared_caches)
+ mask_fn = cpu_l2_cache_mask;
+
+ for_each_cpu(i, mask_fn(cpu)) {
set_cpus_unrelated(cpu, i, cpu_l2_cache_mask);
set_cpus_unrelated(cpu, i, cpu_sibling_mask);
if (has_big_cores)
set_cpus_unrelated(cpu, i, cpu_smallcore_mask);
}
+
+ if (has_coregroup_support()) {
+ for_each_cpu(i, cpu_coregroup_mask(cpu))
+ set_cpus_unrelated(cpu, i, cpu_coregroup_mask);
+ }
}
#endif
static inline void add_cpu_to_smallcore_masks(int cpu)
{
- struct cpumask *this_l1_cache_map = per_cpu(cpu_l1_cache_map, cpu);
- int i, first_thread = cpu_first_thread_sibling(cpu);
+ int i;
if (!has_big_cores)
return;
cpumask_set_cpu(cpu, cpu_smallcore_mask(cpu));
- for (i = first_thread; i < first_thread + threads_per_core; i++) {
- if (cpu_online(i) && cpumask_test_cpu(i, this_l1_cache_map))
+ for_each_cpu(i, per_cpu(cpu_l1_cache_map, cpu)) {
+ if (cpu_online(i))
set_cpus_related(i, cpu, cpu_smallcore_mask);
}
}
-int get_physical_package_id(int cpu)
+static void update_coregroup_mask(int cpu)
{
- int pkg_id = cpu_to_chip_id(cpu);
+ struct cpumask *(*submask_fn)(int) = cpu_sibling_mask;
+ cpumask_var_t mask;
+ int coregroup_id = cpu_to_coregroup_id(cpu);
+ int i;
- /*
- * If the platform is PowerNV or Guest on KVM, ibm,chip-id is
- * defined. Hence we would return the chip-id as the result of
- * get_physical_package_id.
- */
- if (pkg_id == -1 && firmware_has_feature(FW_FEATURE_LPAR) &&
- IS_ENABLED(CONFIG_PPC_SPLPAR)) {
- struct device_node *np = of_get_cpu_node(cpu, NULL);
- pkg_id = of_node_to_nid(np);
- of_node_put(np);
- }
+ alloc_cpumask_var_node(&mask, GFP_KERNEL, cpu_to_node(cpu));
+ cpumask_and(mask, cpu_online_mask, cpu_cpu_mask(cpu));
+
+ if (shared_caches)
+ submask_fn = cpu_l2_cache_mask;
- return pkg_id;
+ /* Update coregroup mask with all the CPUs that are part of submask */
+ or_cpumasks_related(cpu, cpu, submask_fn, cpu_coregroup_mask);
+
+ /* Skip all CPUs already part of coregroup mask */
+ cpumask_andnot(mask, mask, cpu_coregroup_mask(cpu));
+
+ for_each_cpu(i, mask) {
+ /* Skip all CPUs not part of this coregroup */
+ if (coregroup_id == cpu_to_coregroup_id(i)) {
+ or_cpumasks_related(cpu, i, submask_fn, cpu_coregroup_mask);
+ cpumask_andnot(mask, mask, submask_fn(i));
+ } else {
+ cpumask_andnot(mask, mask, cpu_coregroup_mask(i));
+ }
+ }
+ free_cpumask_var(mask);
}
-EXPORT_SYMBOL_GPL(get_physical_package_id);
static void add_cpu_to_masks(int cpu)
{
int first_thread = cpu_first_thread_sibling(cpu);
- int pkg_id = get_physical_package_id(cpu);
int i;
/*
@@ -1223,36 +1384,16 @@ static void add_cpu_to_masks(int cpu)
set_cpus_related(i, cpu, cpu_sibling_mask);
add_cpu_to_smallcore_masks(cpu);
- /*
- * Copy the thread sibling mask into the cache sibling mask
- * and mark any CPUs that share an L2 with this CPU.
- */
- for_each_cpu(i, cpu_sibling_mask(cpu))
- set_cpus_related(cpu, i, cpu_l2_cache_mask);
- update_mask_by_l2(cpu, cpu_l2_cache_mask);
+ update_mask_by_l2(cpu);
- /*
- * Copy the cache sibling mask into core sibling mask and mark
- * any CPUs on the same chip as this CPU.
- */
- for_each_cpu(i, cpu_l2_cache_mask(cpu))
- set_cpus_related(cpu, i, cpu_core_mask);
-
- if (pkg_id == -1)
- return;
-
- for_each_cpu(i, cpu_online_mask)
- if (get_physical_package_id(i) == pkg_id)
- set_cpus_related(cpu, i, cpu_core_mask);
+ if (has_coregroup_support())
+ update_coregroup_mask(cpu);
}
-static bool shared_caches;
-
/* Activate a secondary processor. */
void start_secondary(void *unused)
{
unsigned int cpu = smp_processor_id();
- struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask;
mmgrab(&init_mm);
current->active_mm = &init_mm;
@@ -1278,14 +1419,20 @@ void start_secondary(void *unused)
/* Update topology CPU masks */
add_cpu_to_masks(cpu);
- if (has_big_cores)
- sibling_mask = cpu_smallcore_mask;
/*
* Check for any shared caches. Note that this must be done on a
* per-core basis because one core in the pair might be disabled.
*/
- if (!cpumask_equal(cpu_l2_cache_mask(cpu), sibling_mask(cpu)))
- shared_caches = true;
+ if (!shared_caches) {
+ struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask;
+ struct cpumask *mask = cpu_l2_cache_mask(cpu);
+
+ if (has_big_cores)
+ sibling_mask = cpu_smallcore_mask;
+
+ if (cpumask_weight(mask) > cpumask_weight(sibling_mask(cpu)))
+ shared_caches = true;
+ }
set_numa_node(numa_cpu_lookup_table[cpu]);
set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));
@@ -1311,63 +1458,44 @@ int setup_profiling_timer(unsigned int multiplier)
return 0;
}
-#ifdef CONFIG_SCHED_SMT
-/* cpumask of CPUs with asymetric SMT dependancy */
-static int powerpc_smt_flags(void)
+static void fixup_topology(void)
{
- int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
+ int i;
- if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
- printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
- flags |= SD_ASYM_PACKING;
+#ifdef CONFIG_SCHED_SMT
+ if (has_big_cores) {
+ pr_info("Big cores detected but using small core scheduling\n");
+ powerpc_topology[smt_idx].mask = smallcore_smt_mask;
}
- return flags;
-}
#endif
-static struct sched_domain_topology_level powerpc_topology[] = {
-#ifdef CONFIG_SCHED_SMT
- { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
-#endif
- { cpu_cpu_mask, SD_INIT_NAME(DIE) },
- { NULL, },
-};
+ if (!has_coregroup_support())
+ powerpc_topology[mc_idx].mask = powerpc_topology[cache_idx].mask;
-/*
- * P9 has a slightly odd architecture where pairs of cores share an L2 cache.
- * This topology makes it *much* cheaper to migrate tasks between adjacent cores
- * since the migrated task remains cache hot. We want to take advantage of this
- * at the scheduler level so an extra topology level is required.
- */
-static int powerpc_shared_cache_flags(void)
-{
- return SD_SHARE_PKG_RESOURCES;
-}
+ /*
+ * Try to consolidate topology levels here instead of
+ * allowing scheduler to degenerate.
+ * - Dont consolidate if masks are different.
+ * - Dont consolidate if sd_flags exists and are different.
+ */
+ for (i = 1; i <= die_idx; i++) {
+ if (powerpc_topology[i].mask != powerpc_topology[i - 1].mask)
+ continue;
-/*
- * We can't just pass cpu_l2_cache_mask() directly because
- * returns a non-const pointer and the compiler barfs on that.
- */
-static const struct cpumask *shared_cache_mask(int cpu)
-{
- return cpu_l2_cache_mask(cpu);
-}
+ if (powerpc_topology[i].sd_flags && powerpc_topology[i - 1].sd_flags &&
+ powerpc_topology[i].sd_flags != powerpc_topology[i - 1].sd_flags)
+ continue;
-#ifdef CONFIG_SCHED_SMT
-static const struct cpumask *smallcore_smt_mask(int cpu)
-{
- return cpu_smallcore_mask(cpu);
-}
-#endif
+ if (!powerpc_topology[i - 1].sd_flags)
+ powerpc_topology[i - 1].sd_flags = powerpc_topology[i].sd_flags;
-static struct sched_domain_topology_level power9_topology[] = {
-#ifdef CONFIG_SCHED_SMT
- { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
+ powerpc_topology[i].mask = powerpc_topology[i + 1].mask;
+ powerpc_topology[i].sd_flags = powerpc_topology[i + 1].sd_flags;
+#ifdef CONFIG_SCHED_DEBUG
+ powerpc_topology[i].name = powerpc_topology[i + 1].name;
#endif
- { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
- { cpu_cpu_mask, SD_INIT_NAME(DIE) },
- { NULL, },
-};
+ }
+}
void __init smp_cpus_done(unsigned int max_cpus)
{
@@ -1382,24 +1510,8 @@ void __init smp_cpus_done(unsigned int max_cpus)
dump_numa_cpu_topology();
-#ifdef CONFIG_SCHED_SMT
- if (has_big_cores) {
- pr_info("Big cores detected but using small core scheduling\n");
- power9_topology[0].mask = smallcore_smt_mask;
- powerpc_topology[0].mask = smallcore_smt_mask;
- }
-#endif
- /*
- * If any CPU detects that it's sharing a cache with another CPU then
- * use the deeper topology that is aware of this sharing.
- */
- if (shared_caches) {
- pr_info("Using shared cache scheduler topology\n");
- set_sched_topology(power9_topology);
- } else {
- pr_info("Using standard scheduler topology\n");
- set_sched_topology(powerpc_topology);
- }
+ fixup_topology();
+ set_sched_topology(powerpc_topology);
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -1429,16 +1541,18 @@ void __cpu_die(unsigned int cpu)
smp_ops->cpu_die(cpu);
}
-void cpu_die(void)
+void arch_cpu_idle_dead(void)
{
+ sched_preempt_enable_no_resched();
+
/*
* Disable on the down path. This will be re-enabled by
* start_secondary() via start_secondary_resume() below
*/
this_cpu_disable_ftrace();
- if (ppc_md.cpu_die)
- ppc_md.cpu_die();
+ if (smp_ops->cpu_offline_self)
+ smp_ops->cpu_offline_self();
/* If we return, we re-enter start_secondary */
start_secondary_resume();
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 9d7fb4ced290..1275daec7fec 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -529,3 +529,4 @@
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
+440 common process_madvise sys_process_madvise
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 46b4ebc33db7..2e08640bb3b4 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -32,29 +32,27 @@
static DEFINE_PER_CPU(struct cpu, cpu_devices);
-/*
- * SMT snooze delay stuff, 64-bit only for now
- */
-
#ifdef CONFIG_PPC64
-/* Time in microseconds we delay before sleeping in the idle loop */
-static DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 };
+/*
+ * Snooze delay has not been hooked up since 3fa8cad82b94 ("powerpc/pseries/cpuidle:
+ * smt-snooze-delay cleanup.") and has been broken even longer. As was foretold in
+ * 2014:
+ *
+ * "ppc64_util currently utilises it. Once we fix ppc64_util, propose to clean
+ * up the kernel code."
+ *
+ * powerpc-utils stopped using it as of 1.3.8. At some point in the future this
+ * code should be removed.
+ */
static ssize_t store_smt_snooze_delay(struct device *dev,
struct device_attribute *attr,
const char *buf,
size_t count)
{
- struct cpu *cpu = container_of(dev, struct cpu, dev);
- ssize_t ret;
- long snooze;
-
- ret = sscanf(buf, "%ld", &snooze);
- if (ret != 1)
- return -EINVAL;
-
- per_cpu(smt_snooze_delay, cpu->dev.id) = snooze;
+ pr_warn_once("%s (%d) stored to unsupported smt_snooze_delay, which has no effect.\n",
+ current->comm, current->pid);
return count;
}
@@ -62,9 +60,9 @@ static ssize_t show_smt_snooze_delay(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- struct cpu *cpu = container_of(dev, struct cpu, dev);
-
- return sprintf(buf, "%ld\n", per_cpu(smt_snooze_delay, cpu->dev.id));
+ pr_warn_once("%s (%d) read from unsupported smt_snooze_delay\n",
+ current->comm, current->pid);
+ return sprintf(buf, "100\n");
}
static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
@@ -72,16 +70,10 @@ static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
static int __init setup_smt_snooze_delay(char *str)
{
- unsigned int cpu;
- long snooze;
-
if (!cpu_has_feature(CPU_FTR_SMT))
return 1;
- snooze = simple_strtol(str, NULL, 10);
- for_each_possible_cpu(cpu)
- per_cpu(smt_snooze_delay, cpu) = snooze;
-
+ pr_warn("smt-snooze-delay command line option has no effect\n");
return 1;
}
__setup("smt-snooze-delay=", setup_smt_snooze_delay);
@@ -225,14 +217,13 @@ static DEVICE_ATTR(dscr_default, 0600,
static void sysfs_create_dscr_default(void)
{
if (cpu_has_feature(CPU_FTR_DSCR)) {
- int err = 0;
int cpu;
dscr_default = spr_default_dscr;
for_each_possible_cpu(cpu)
paca_ptrs[cpu]->dscr_default = dscr_default;
- err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default);
+ device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default);
}
}
#endif /* CONFIG_PPC64 */
@@ -1168,6 +1159,7 @@ static int __init topology_init(void)
for_each_possible_cpu(cpu) {
struct cpu *c = &per_cpu(cpu_devices, cpu);
+#ifdef CONFIG_HOTPLUG_CPU
/*
* For now, we just see if the system supports making
* the RTAS calls for CPU hotplug. But, there may be a
@@ -1175,8 +1167,9 @@ static int __init topology_init(void)
* CPU. For instance, the boot cpu might never be valid
* for hotplugging.
*/
- if (ppc_md.cpu_die)
+ if (smp_ops->cpu_offline_self)
c->hotpluggable = 1;
+#endif
if (cpu_online(cpu) || c->hotpluggable) {
register_cpu(c, cpu);
diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c
index e2ab8a111b69..0b4694b8d248 100644
--- a/arch/powerpc/kernel/tau_6xx.c
+++ b/arch/powerpc/kernel/tau_6xx.c
@@ -13,13 +13,14 @@
*/
#include <linux/errno.h>
-#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/param.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/workqueue.h>
#include <asm/io.h>
#include <asm/reg.h>
@@ -39,9 +40,7 @@ static struct tau_temp
unsigned char grew;
} tau[NR_CPUS];
-struct timer_list tau_timer;
-
-#undef DEBUG
+static bool tau_int_enable;
/* TODO: put these in a /proc interface, with some sanity checks, and maybe
* dynamic adjustment to minimize # of interrupts */
@@ -50,72 +49,49 @@ struct timer_list tau_timer;
#define step_size 2 /* step size when temp goes out of range */
#define window_expand 1 /* expand the window by this much */
/* configurable values for shrinking the window */
-#define shrink_timer 2*HZ /* period between shrinking the window */
+#define shrink_timer 2000 /* period between shrinking the window */
#define min_window 2 /* minimum window size, degrees C */
static void set_thresholds(unsigned long cpu)
{
-#ifdef CONFIG_TAU_INT
- /*
- * setup THRM1,
- * threshold, valid bit, enable interrupts, interrupt when below threshold
- */
- mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | THRM1_TIE | THRM1_TID);
+ u32 maybe_tie = tau_int_enable ? THRM1_TIE : 0;
- /* setup THRM2,
- * threshold, valid bit, enable interrupts, interrupt when above threshold
- */
- mtspr (SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V | THRM1_TIE);
-#else
- /* same thing but don't enable interrupts */
- mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | THRM1_TID);
- mtspr(SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V);
-#endif
+ /* setup THRM1, threshold, valid bit, interrupt when below threshold */
+ mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | maybe_tie | THRM1_TID);
+
+ /* setup THRM2, threshold, valid bit, interrupt when above threshold */
+ mtspr(SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V | maybe_tie);
}
static void TAUupdate(int cpu)
{
- unsigned thrm;
-
-#ifdef DEBUG
- printk("TAUupdate ");
-#endif
+ u32 thrm;
+ u32 bits = THRM1_TIV | THRM1_TIN | THRM1_V;
/* if both thresholds are crossed, the step_sizes cancel out
* and the window winds up getting expanded twice. */
- if((thrm = mfspr(SPRN_THRM1)) & THRM1_TIV){ /* is valid? */
- if(thrm & THRM1_TIN){ /* crossed low threshold */
- if (tau[cpu].low >= step_size){
- tau[cpu].low -= step_size;
- tau[cpu].high -= (step_size - window_expand);
- }
- tau[cpu].grew = 1;
-#ifdef DEBUG
- printk("low threshold crossed ");
-#endif
+ thrm = mfspr(SPRN_THRM1);
+ if ((thrm & bits) == bits) {
+ mtspr(SPRN_THRM1, 0);
+
+ if (tau[cpu].low >= step_size) {
+ tau[cpu].low -= step_size;
+ tau[cpu].high -= (step_size - window_expand);
}
+ tau[cpu].grew = 1;
+ pr_debug("%s: low threshold crossed\n", __func__);
}
- if((thrm = mfspr(SPRN_THRM2)) & THRM1_TIV){ /* is valid? */
- if(thrm & THRM1_TIN){ /* crossed high threshold */
- if (tau[cpu].high <= 127-step_size){
- tau[cpu].low += (step_size - window_expand);
- tau[cpu].high += step_size;
- }
- tau[cpu].grew = 1;
-#ifdef DEBUG
- printk("high threshold crossed ");
-#endif
+ thrm = mfspr(SPRN_THRM2);
+ if ((thrm & bits) == bits) {
+ mtspr(SPRN_THRM2, 0);
+
+ if (tau[cpu].high <= 127 - step_size) {
+ tau[cpu].low += (step_size - window_expand);
+ tau[cpu].high += step_size;
}
+ tau[cpu].grew = 1;
+ pr_debug("%s: high threshold crossed\n", __func__);
}
-
-#ifdef DEBUG
- printk("grew = %d\n", tau[cpu].grew);
-#endif
-
-#ifndef CONFIG_TAU_INT /* tau_timeout will do this if not using interrupts */
- set_thresholds(cpu);
-#endif
-
}
#ifdef CONFIG_TAU_INT
@@ -140,17 +116,16 @@ void TAUException(struct pt_regs * regs)
static void tau_timeout(void * info)
{
int cpu;
- unsigned long flags;
int size;
int shrink;
- /* disabling interrupts *should* be okay */
- local_irq_save(flags);
cpu = smp_processor_id();
-#ifndef CONFIG_TAU_INT
- TAUupdate(cpu);
-#endif
+ if (!tau_int_enable)
+ TAUupdate(cpu);
+
+ /* Stop thermal sensor comparisons and interrupts */
+ mtspr(SPRN_THRM3, 0);
size = tau[cpu].high - tau[cpu].low;
if (size > min_window && ! tau[cpu].grew) {
@@ -173,32 +148,26 @@ static void tau_timeout(void * info)
set_thresholds(cpu);
- /*
- * Do the enable every time, since otherwise a bunch of (relatively)
- * complex sleep code needs to be added. One mtspr every time
- * tau_timeout is called is probably not a big deal.
- *
- * Enable thermal sensor and set up sample interval timer
- * need 20 us to do the compare.. until a nice 'cpu_speed' function
- * call is implemented, just assume a 500 mhz clock. It doesn't really
- * matter if we take too long for a compare since it's all interrupt
- * driven anyway.
- *
- * use a extra long time.. (60 us @ 500 mhz)
+ /* Restart thermal sensor comparisons and interrupts.
+ * The "PowerPC 740 and PowerPC 750 Microprocessor Datasheet"
+ * recommends that "the maximum value be set in THRM3 under all
+ * conditions."
*/
- mtspr(SPRN_THRM3, THRM3_SITV(500*60) | THRM3_E);
-
- local_irq_restore(flags);
+ mtspr(SPRN_THRM3, THRM3_SITV(0x1fff) | THRM3_E);
}
-static void tau_timeout_smp(struct timer_list *unused)
-{
+static struct workqueue_struct *tau_workq;
- /* schedule ourselves to be run again */
- mod_timer(&tau_timer, jiffies + shrink_timer) ;
+static void tau_work_func(struct work_struct *work)
+{
+ msleep(shrink_timer);
on_each_cpu(tau_timeout, NULL, 0);
+ /* schedule ourselves to be run again */
+ queue_work(tau_workq, work);
}
+DECLARE_WORK(tau_work, tau_work_func);
+
/*
* setup the TAU
*
@@ -231,21 +200,19 @@ static int __init TAU_init(void)
return 1;
}
+ tau_int_enable = IS_ENABLED(CONFIG_TAU_INT) &&
+ !strcmp(cur_cpu_spec->platform, "ppc750");
- /* first, set up the window shrinking timer */
- timer_setup(&tau_timer, tau_timeout_smp, 0);
- tau_timer.expires = jiffies + shrink_timer;
- add_timer(&tau_timer);
+ tau_workq = alloc_workqueue("tau", WQ_UNBOUND, 1, 0);
+ if (!tau_workq)
+ return -ENOMEM;
on_each_cpu(TAU_init_smp, NULL, 0);
- printk("Thermal assist unit ");
-#ifdef CONFIG_TAU_INT
- printk("using interrupts, ");
-#else
- printk("using timers, ");
-#endif
- printk("shrink_timer: %d jiffies\n", shrink_timer);
+ queue_work(tau_workq, &tau_work);
+
+ pr_info("Thermal assist unit using %s, shrink_timer: %d ms\n",
+ tau_int_enable ? "interrupts" : "workqueue", shrink_timer);
tau_initialized = 1;
return 0;
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index f85539ebb513..74efe46f5532 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -75,15 +75,6 @@
#include <linux/clockchips.h>
#include <linux/timekeeper_internal.h>
-static u64 rtc_read(struct clocksource *);
-static struct clocksource clocksource_rtc = {
- .name = "rtc",
- .rating = 400,
- .flags = CLOCK_SOURCE_IS_CONTINUOUS,
- .mask = CLOCKSOURCE_MASK(64),
- .read = rtc_read,
-};
-
static u64 timebase_read(struct clocksource *);
static struct clocksource clocksource_timebase = {
.name = "timebase",
@@ -447,19 +438,9 @@ void vtime_flush(struct task_struct *tsk)
void __delay(unsigned long loops)
{
unsigned long start;
- int diff;
spin_begin();
- if (__USE_RTC()) {
- start = get_rtcl();
- do {
- /* the RTCL register wraps at 1000000000 */
- diff = get_rtcl() - start;
- if (diff < 0)
- diff += 1000000000;
- spin_cpu_relax();
- } while (diff < loops);
- } else if (tb_invalid) {
+ if (tb_invalid) {
/*
* TB is in error state and isn't ticking anymore.
* HMI handler was unable to recover from TB error.
@@ -467,8 +448,8 @@ void __delay(unsigned long loops)
*/
spin_cpu_relax();
} else {
- start = get_tbl();
- while (get_tbl() - start < loops)
+ start = mftb();
+ while (mftb() - start < loops)
spin_cpu_relax();
}
spin_end();
@@ -614,7 +595,7 @@ void timer_interrupt(struct pt_regs *regs)
irq_work_run();
}
- now = get_tb_or_rtc();
+ now = get_tb();
if (now >= *next_tb) {
*next_tb = ~(u64)0;
if (evt->event_handler)
@@ -696,8 +677,6 @@ EXPORT_SYMBOL_GPL(tb_to_ns);
*/
notrace unsigned long long sched_clock(void)
{
- if (__USE_RTC())
- return get_rtc();
return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift;
}
@@ -847,11 +826,6 @@ void read_persistent_clock64(struct timespec64 *ts)
}
/* clocksource code */
-static notrace u64 rtc_read(struct clocksource *cs)
-{
- return (u64)get_rtc();
-}
-
static notrace u64 timebase_read(struct clocksource *cs)
{
return (u64)get_tb();
@@ -948,12 +922,7 @@ void update_vsyscall_tz(void)
static void __init clocksource_init(void)
{
- struct clocksource *clock;
-
- if (__USE_RTC())
- clock = &clocksource_rtc;
- else
- clock = &clocksource_timebase;
+ struct clocksource *clock = &clocksource_timebase;
if (clocksource_register_hz(clock, tb_ticks_per_sec)) {
printk(KERN_ERR "clocksource: %s is already registered\n",
@@ -968,7 +937,7 @@ static void __init clocksource_init(void)
static int decrementer_set_next_event(unsigned long evt,
struct clock_event_device *dev)
{
- __this_cpu_write(decrementers_next_tb, get_tb_or_rtc() + evt);
+ __this_cpu_write(decrementers_next_tb, get_tb() + evt);
set_dec(evt);
/* We may have raced with new irq work */
@@ -1071,17 +1040,12 @@ void __init time_init(void)
u64 scale;
unsigned shift;
- if (__USE_RTC()) {
- /* 601 processor: dec counts down by 128 every 128ns */
- ppc_tb_freq = 1000000000;
- } else {
- /* Normal PowerPC with timebase register */
- ppc_md.calibrate_decr();
- printk(KERN_DEBUG "time_init: decrementer frequency = %lu.%.6lu MHz\n",
- ppc_tb_freq / 1000000, ppc_tb_freq % 1000000);
- printk(KERN_DEBUG "time_init: processor frequency = %lu.%.6lu MHz\n",
- ppc_proc_freq / 1000000, ppc_proc_freq % 1000000);
- }
+ /* Normal PowerPC with timebase register */
+ ppc_md.calibrate_decr();
+ printk(KERN_DEBUG "time_init: decrementer frequency = %lu.%.6lu MHz\n",
+ ppc_tb_freq / 1000000, ppc_tb_freq % 1000000);
+ printk(KERN_DEBUG "time_init: processor frequency = %lu.%.6lu MHz\n",
+ ppc_proc_freq / 1000000, ppc_proc_freq % 1000000);
tb_ticks_per_jiffy = ppc_tb_freq / HZ;
tb_ticks_per_sec = ppc_tb_freq;
@@ -1107,7 +1071,7 @@ void __init time_init(void)
tb_to_ns_scale = scale;
tb_to_ns_shift = shift;
/* Save the current timebase to pretty up CONFIG_PRINTK_TIME */
- boot_tb = get_tb_or_rtc();
+ boot_tb = get_tb();
/* If platform provided a timezone (pmac), we correct the time */
if (timezone_offset) {
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index 6ba0fdd1e7f8..2b91f233b05d 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -122,6 +122,13 @@ _GLOBAL(tm_reclaim)
std r3, STK_PARAM(R3)(r1)
SAVE_NVGPRS(r1)
+ /*
+ * Save kernel live AMR since it will be clobbered by treclaim
+ * but can be used elsewhere later in kernel space.
+ */
+ mfspr r3, SPRN_AMR
+ std r3, TM_FRAME_L1(r1)
+
/* We need to setup MSR for VSX register save instructions. */
mfmsr r14
mr r15, r14
@@ -245,7 +252,7 @@ _GLOBAL(tm_reclaim)
* but is used in signal return to 'wind back' to the abort handler.
*/
- /* ******************** CR,LR,CCR,MSR ********** */
+ /* ***************** CTR, LR, CR, XER ********** */
mfctr r3
mflr r4
mfcr r5
@@ -256,7 +263,6 @@ _GLOBAL(tm_reclaim)
std r5, _CCR(r7)
std r6, _XER(r7)
-
/* ******************** TAR, DSCR ********** */
mfspr r3, SPRN_TAR
mfspr r4, SPRN_DSCR
@@ -264,6 +270,10 @@ _GLOBAL(tm_reclaim)
std r3, THREAD_TM_TAR(r12)
std r4, THREAD_TM_DSCR(r12)
+ /* ******************** AMR **************** */
+ mfspr r3, SPRN_AMR
+ std r3, THREAD_TM_AMR(r12)
+
/*
* MSR and flags: We don't change CRs, and we don't need to alter MSR.
*/
@@ -308,7 +318,9 @@ _GLOBAL(tm_reclaim)
std r3, THREAD_TM_TFHAR(r12)
std r4, THREAD_TM_TFIAR(r12)
- /* AMR is checkpointed too, but is unsupported by Linux. */
+ /* Restore kernel live AMR */
+ ld r8, TM_FRAME_L1(r1)
+ mtspr SPRN_AMR, r8
/* Restore original MSR/IRQ state & clear TM mode */
ld r14, TM_FRAME_L0(r1) /* Orig MSR */
@@ -355,6 +367,13 @@ _GLOBAL(__tm_recheckpoint)
*/
SAVE_NVGPRS(r1)
+ /*
+ * Save kernel live AMR since it will be clobbered for trechkpt
+ * but can be used elsewhere later in kernel space.
+ */
+ mfspr r8, SPRN_AMR
+ std r8, TM_FRAME_L0(r1)
+
/* Load complete register state from ts_ckpt* registers */
addi r7, r3, PT_CKPT_REGS /* Thread's ckpt_regs */
@@ -404,7 +423,7 @@ _GLOBAL(__tm_recheckpoint)
restore_gprs:
- /* ******************** CR,LR,CCR,MSR ********** */
+ /* ****************** CTR, LR, XER ************* */
ld r4, _CTR(r7)
ld r5, _LINK(r7)
ld r8, _XER(r7)
@@ -417,6 +436,10 @@ restore_gprs:
ld r4, THREAD_TM_TAR(r3)
mtspr SPRN_TAR, r4
+ /* ******************** AMR ******************** */
+ ld r4, THREAD_TM_AMR(r3)
+ mtspr SPRN_AMR, r4
+
/* Load up the PPR and DSCR in GPRs only at this stage */
ld r5, THREAD_TM_DSCR(r3)
ld r6, THREAD_TM_PPR(r3)
@@ -509,6 +532,10 @@ restore_gprs:
li r4, MSR_RI
mtmsrd r4, 1
+ /* Restore kernel live AMR */
+ ld r8, TM_FRAME_L0(r1)
+ mtspr SPRN_AMR, r8
+
REST_NVGPRS(r1)
addi r1, r1, TM_FRAME_SIZE
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index d1ebe152f210..c5f39f13e96e 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -529,9 +529,6 @@ out:
* Check if the NIP corresponds to the address of a sync
* instruction for which there is an entry in the exception
* table.
- * Note that the 601 only takes a machine check on TEA
- * (transfer error ack) signal assertion, and does not
- * set any of the top 16 bits of SRR1.
* -- paulus.
*/
static inline int check_io_access(struct pt_regs *regs)
@@ -796,7 +793,6 @@ int machine_check_generic(struct pt_regs *regs)
case 0x80000:
pr_cont("Machine check signal\n");
break;
- case 0: /* for 601 */
case 0x40000:
case 0x140000: /* 7450 MSS error and TEA */
pr_cont("Transfer error ack signal\n");
diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S
index 217bb630f8f9..1d23e2771dba 100644
--- a/arch/powerpc/kernel/vdso32/datapage.S
+++ b/arch/powerpc/kernel/vdso32/datapage.S
@@ -47,7 +47,6 @@ V_FUNCTION_END(__kernel_get_syscall_map)
*
* returns the timebase frequency in HZ
*/
-#ifndef CONFIG_PPC_BOOK3S_601
V_FUNCTION_BEGIN(__kernel_get_tbfreq)
.cfi_startproc
mflr r12
@@ -60,4 +59,3 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq)
blr
.cfi_endproc
V_FUNCTION_END(__kernel_get_tbfreq)
-#endif
diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S
index 5206c2eb2a1d..7eadac74c7f9 100644
--- a/arch/powerpc/kernel/vdso32/vdso32.lds.S
+++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S
@@ -144,13 +144,11 @@ VERSION
__kernel_datapage_offset;
__kernel_get_syscall_map;
-#ifndef CONFIG_PPC_BOOK3S_601
__kernel_gettimeofday;
__kernel_clock_gettime;
__kernel_clock_getres;
__kernel_time;
__kernel_get_tbfreq;
-#endif
__kernel_sync_dicache;
__kernel_sync_dicache_p5;
__kernel_sigtramp32;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 4ba06a2a306c..3bd3118c7633 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3530,6 +3530,13 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
*/
asm volatile("eieio; tlbsync; ptesync");
+ /*
+ * cp_abort is required if the processor supports local copy-paste
+ * to clear the copy buffer that was under control of the guest.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ asm volatile(PPC_CP_ABORT);
+
mtspr(SPRN_LPID, vcpu->kvm->arch.host_lpid); /* restore host LPID */
isync();
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 799d6d0f4ead..cd9995ee8441 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1831,6 +1831,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_RADIX_PREFETCH_BUG)
#endif /* CONFIG_PPC_RADIX_MMU */
/*
+ * cp_abort is required if the processor supports local copy-paste
+ * to clear the copy buffer that was under control of the guest.
+ */
+BEGIN_FTR_SECTION
+ PPC_CP_ABORT
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
+
+ /*
* POWER7/POWER8 guest -> host partition switch code.
* We don't have to lock against tlbies but we do
* have to coordinate the hardware threads.
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 8c3934ea6220..2333625b5e31 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -21,21 +21,18 @@
static int __patch_instruction(struct ppc_inst *exec_addr, struct ppc_inst instr,
struct ppc_inst *patch_addr)
{
- int err = 0;
-
- if (!ppc_inst_prefixed(instr)) {
- __put_user_asm(ppc_inst_val(instr), patch_addr, err, "stw");
- } else {
- __put_user_asm(ppc_inst_as_u64(instr), patch_addr, err, "std");
- }
-
- if (err)
- return err;
+ if (!ppc_inst_prefixed(instr))
+ __put_user_asm_goto(ppc_inst_val(instr), patch_addr, failed, "stw");
+ else
+ __put_user_asm_goto(ppc_inst_as_u64(instr), patch_addr, failed, "std");
asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr),
"r" (exec_addr));
return 0;
+
+failed:
+ return -EFAULT;
}
int raw_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr)
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index caee8cc77e19..e9dcaba9a4f8 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -219,10 +219,13 @@ static nokprobe_inline unsigned long mlsd_8lsd_ea(unsigned int instr,
ea += regs->gpr[ra];
else if (!prefix_r && !ra)
; /* Leave ea as is */
- else if (prefix_r && !ra)
+ else if (prefix_r)
ea += regs->nip;
- else if (prefix_r && ra)
- ; /* Invalid form. Should already be checked for by caller! */
+
+ /*
+ * (prefix_r && ra) is an invalid form. Should already be
+ * checked for by caller!
+ */
return ea;
}
diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S
index 1690d369688b..b2c912e517b9 100644
--- a/arch/powerpc/mm/book3s32/hash_low.S
+++ b/arch/powerpc/mm/book3s32/hash_low.S
@@ -15,6 +15,7 @@
*/
#include <linux/pgtable.h>
+#include <linux/init.h>
#include <asm/reg.h>
#include <asm/page.h>
#include <asm/cputable.h>
@@ -199,11 +200,9 @@ _GLOBAL(add_hash_page)
* covered by a BAT). -- paulus
*/
mfmsr r9
- SYNC
rlwinm r0,r9,0,17,15 /* clear bit 16 (MSR_EE) */
rlwinm r0,r0,0,28,26 /* clear MSR_DR */
mtmsr r0
- SYNC_601
isync
#ifdef CONFIG_SMP
@@ -262,7 +261,6 @@ _GLOBAL(add_hash_page)
/* reenable interrupts and DR */
mtmsr r9
- SYNC_601
isync
lwz r0,4(r1)
@@ -287,9 +285,9 @@ _ASM_NOKPROBE_SYMBOL(add_hash_page)
*
* For speed, 4 of the instructions get patched once the size and
* physical address of the hash table are known. These definitions
- * of Hash_base and Hash_bits below are just an example.
+ * of Hash_base and Hash_bits below are for the early hash table.
*/
-Hash_base = 0xc0180000
+Hash_base = early_hash
Hash_bits = 12 /* e.g. 256kB hash table */
Hash_msk = (((1 << Hash_bits) - 1) * 64)
@@ -310,6 +308,7 @@ Hash_msk = (((1 << Hash_bits) - 1) * 64)
#define HASH_LEFT 31-(LG_PTEG_SIZE+Hash_bits-1)
#define HASH_RIGHT 31-LG_PTEG_SIZE
+__REF
_GLOBAL(create_hpte)
/* Convert linux-style PTE (r5) to low word of PPC-style PTE (r8) */
rlwinm r8,r5,32-9,30,30 /* _PAGE_RW -> PP msb */
@@ -476,6 +475,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
sync /* make sure pte updates get to memory */
blr
+ .previous
_ASM_NOKPROBE_SYMBOL(create_hpte)
.section .bss
@@ -496,6 +496,7 @@ htab_hash_searches:
*
* We assume that there is a hash table in use (Hash != 0).
*/
+__REF
_GLOBAL(flush_hash_pages)
/*
* We disable interrupts here, even on UP, because we want
@@ -506,11 +507,9 @@ _GLOBAL(flush_hash_pages)
* covered by a BAT). -- paulus
*/
mfmsr r10
- SYNC
rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */
rlwinm r0,r0,0,28,26 /* clear MSR_DR */
mtmsr r0
- SYNC_601
isync
/* First find a PTE in the range that has _PAGE_HASHPTE set */
@@ -629,9 +628,9 @@ _GLOBAL(flush_hash_pages)
#endif
19: mtmsr r10
- SYNC_601
isync
blr
+ .previous
EXPORT_SYMBOL(flush_hash_pages)
_ASM_NOKPROBE_SYMBOL(flush_hash_pages)
@@ -643,11 +642,9 @@ _GLOBAL(_tlbie)
lwz r8,TASK_CPU(r2)
oris r8,r8,11
mfmsr r10
- SYNC
rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */
rlwinm r0,r0,0,28,26 /* clear DR */
mtmsr r0
- SYNC_601
isync
lis r9,mmu_hash_lock@h
ori r9,r9,mmu_hash_lock@l
@@ -664,7 +661,6 @@ _GLOBAL(_tlbie)
li r0,0
stw r0,0(r9) /* clear mmu_hash_lock */
mtmsr r10
- SYNC_601
isync
#else /* CONFIG_SMP */
tlbie r3
@@ -681,11 +677,9 @@ _GLOBAL(_tlbia)
lwz r8,TASK_CPU(r2)
oris r8,r8,10
mfmsr r10
- SYNC
rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */
rlwinm r0,r0,0,28,26 /* clear DR */
mtmsr r0
- SYNC_601
isync
lis r9,mmu_hash_lock@h
ori r9,r9,mmu_hash_lock@l
@@ -709,7 +703,6 @@ _GLOBAL(_tlbia)
li r0,0
stw r0,0(r9) /* clear mmu_hash_lock */
mtmsr r10
- SYNC_601
isync
#endif /* CONFIG_SMP */
blr
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index d426eaf76bb0..a59e7ec98180 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -31,6 +31,8 @@
#include <mm/mmu_decl.h>
+u8 __initdata early_hash[SZ_256K] __aligned(SZ_256K) = {0};
+
struct hash_pte *Hash;
static unsigned long Hash_size, Hash_mask;
unsigned long _SDR1;
@@ -73,23 +75,13 @@ unsigned long p_block_mapped(phys_addr_t pa)
static int find_free_bat(void)
{
int b;
+ int n = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
- if (IS_ENABLED(CONFIG_PPC_BOOK3S_601)) {
- for (b = 0; b < 4; b++) {
- struct ppc_bat *bat = BATS[b];
-
- if (!(bat[0].batl & 0x40))
- return b;
- }
- } else {
- int n = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
+ for (b = 0; b < n; b++) {
+ struct ppc_bat *bat = BATS[b];
- for (b = 0; b < n; b++) {
- struct ppc_bat *bat = BATS[b];
-
- if (!(bat[1].batu & 3))
- return b;
- }
+ if (!(bat[1].batu & 3))
+ return b;
}
return -1;
}
@@ -97,7 +89,7 @@ static int find_free_bat(void)
/*
* This function calculates the size of the larger block usable to map the
* beginning of an area based on the start address and size of that area:
- * - max block size is 8M on 601 and 256 on other 6xx.
+ * - max block size is 256 on 6xx.
* - base address must be aligned to the block size. So the maximum block size
* is identified by the lowest bit set to 1 in the base address (for instance
* if base is 0x16000000, max size is 0x02000000).
@@ -106,7 +98,7 @@ static int find_free_bat(void)
*/
static unsigned int block_size(unsigned long base, unsigned long top)
{
- unsigned int max_size = IS_ENABLED(CONFIG_PPC_BOOK3S_601) ? SZ_8M : SZ_256M;
+ unsigned int max_size = SZ_256M;
unsigned int base_shift = (ffs(base) - 1) & 31;
unsigned int block_shift = (fls(top - base) - 1) & 31;
@@ -117,7 +109,6 @@ static unsigned int block_size(unsigned long base, unsigned long top)
* Set up one of the IBAT (block address translation) register pairs.
* The parameters are not checked; in particular size must be a power
* of 2 between 128k and 256M.
- * Only for 603+ ...
*/
static void setibat(int index, unsigned long virt, phys_addr_t phys,
unsigned int size, pgprot_t prot)
@@ -214,9 +205,6 @@ void mmu_mark_initmem_nx(void)
unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET;
unsigned long size;
- if (IS_ENABLED(CONFIG_PPC_BOOK3S_601))
- return;
-
for (i = 0; i < nb - 1 && base < top && top - base > (128 << 10);) {
size = block_size(base, top);
setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT);
@@ -253,9 +241,6 @@ void mmu_mark_rodata_ro(void)
int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
int i;
- if (IS_ENABLED(CONFIG_PPC_BOOK3S_601))
- return;
-
for (i = 0; i < nb; i++) {
struct ppc_bat *bat = BATS[i];
@@ -294,35 +279,22 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys,
flags &= ~_PAGE_COHERENT;
bl = (size >> 17) - 1;
- if (!IS_ENABLED(CONFIG_PPC_BOOK3S_601)) {
- /* 603, 604, etc. */
- /* Do DBAT first */
- wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE
- | _PAGE_COHERENT | _PAGE_GUARDED);
- wimgxpp |= (flags & _PAGE_RW)? BPP_RW: BPP_RX;
- bat[1].batu = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */
- bat[1].batl = BAT_PHYS_ADDR(phys) | wimgxpp;
- if (flags & _PAGE_USER)
- bat[1].batu |= 1; /* Vp = 1 */
- if (flags & _PAGE_GUARDED) {
- /* G bit must be zero in IBATs */
- flags &= ~_PAGE_EXEC;
- }
- if (flags & _PAGE_EXEC)
- bat[0] = bat[1];
- else
- bat[0].batu = bat[0].batl = 0;
- } else {
- /* 601 cpu */
- if (bl > BL_8M)
- bl = BL_8M;
- wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE
- | _PAGE_COHERENT);
- wimgxpp |= (flags & _PAGE_RW)?
- ((flags & _PAGE_USER)? PP_RWRW: PP_RWXX): PP_RXRX;
- bat->batu = virt | wimgxpp | 4; /* Ks=0, Ku=1 */
- bat->batl = phys | bl | 0x40; /* V=1 */
+ /* Do DBAT first */
+ wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE
+ | _PAGE_COHERENT | _PAGE_GUARDED);
+ wimgxpp |= (flags & _PAGE_RW)? BPP_RW: BPP_RX;
+ bat[1].batu = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */
+ bat[1].batl = BAT_PHYS_ADDR(phys) | wimgxpp;
+ if (flags & _PAGE_USER)
+ bat[1].batu |= 1; /* Vp = 1 */
+ if (flags & _PAGE_GUARDED) {
+ /* G bit must be zero in IBATs */
+ flags &= ~_PAGE_EXEC;
}
+ if (flags & _PAGE_EXEC)
+ bat[0] = bat[1];
+ else
+ bat[0].batu = bat[0].batl = 0;
bat_addrs[index].start = virt;
bat_addrs[index].limit = virt + ((bl + 1) << 17) - 1;
@@ -425,15 +397,6 @@ void __init MMU_init_hw(void)
hash_mb2 = hash_mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg;
if (lg_n_hpteg > 16)
hash_mb2 = 16 - LG_HPTEG_SIZE;
-
- /*
- * When KASAN is selected, there is already an early temporary hash
- * table and the switch to the final hash table is done later.
- */
- if (IS_ENABLED(CONFIG_KASAN))
- return;
-
- MMU_init_hw_patch();
}
void __init MMU_init_hw_patch(void)
@@ -441,6 +404,9 @@ void __init MMU_init_hw_patch(void)
unsigned int hmask = Hash_mask >> (16 - LG_HPTEG_SIZE);
unsigned int hash = (unsigned int)Hash - PAGE_OFFSET;
+ if (!mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ return;
+
if (ppc_md.progress)
ppc_md.progress("hash:patch", 0x345);
if (ppc_md.progress)
@@ -474,11 +440,7 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base,
*/
BUG_ON(first_memblock_base != 0);
- /* 601 can only access 16MB at the moment */
- if (IS_ENABLED(CONFIG_PPC_BOOK3S_601))
- memblock_set_current_limit(min_t(u64, first_memblock_size, 0x01000000));
- else /* Anything else has 256M mapped */
- memblock_set_current_limit(min_t(u64, first_memblock_size, 0x10000000));
+ memblock_set_current_limit(min_t(u64, first_memblock_size, SZ_256M));
}
void __init print_system_hash_info(void)
diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c
index cf20e5229ce1..0203cdf48c54 100644
--- a/arch/powerpc/mm/book3s64/hash_native.c
+++ b/arch/powerpc/mm/book3s64/hash_native.c
@@ -82,7 +82,7 @@ static void tlbiel_all_isa206(unsigned int num_sets, unsigned int is)
for (set = 0; set < num_sets; set++)
tlbiel_hash_set_isa206(set, is);
- asm volatile("ptesync": : :"memory");
+ ppc_after_tlbiel_barrier();
}
static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
@@ -110,7 +110,7 @@ static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
*/
tlbiel_hash_set_isa300(0, is, 0, 2, 1);
- asm volatile("ptesync": : :"memory");
+ ppc_after_tlbiel_barrier();
asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory");
}
@@ -303,7 +303,7 @@ static inline void tlbie(unsigned long vpn, int psize, int apsize,
asm volatile("ptesync": : :"memory");
if (use_local) {
__tlbiel(vpn, psize, apsize, ssize);
- asm volatile("ptesync": : :"memory");
+ ppc_after_tlbiel_barrier();
} else {
__tlbie(vpn, psize, apsize, ssize);
fixup_tlbie_vpn(vpn, psize, apsize, ssize);
@@ -879,7 +879,7 @@ static void native_flush_hash_range(unsigned long number, int local)
__tlbiel(vpn, psize, psize, ssize);
} pte_iterate_hashed_end();
}
- asm volatile("ptesync":::"memory");
+ ppc_after_tlbiel_barrier();
} else {
int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index b830adee51f5..24702c0a92e0 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -260,8 +260,12 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
DBG("htab_bolt_mapping(%lx..%lx -> %lx (%lx,%d,%d)\n",
vstart, vend, pstart, prot, psize, ssize);
- for (vaddr = vstart, paddr = pstart; vaddr < vend;
- vaddr += step, paddr += step) {
+ /* Carefully map only the possible range */
+ vaddr = ALIGN(vstart, step);
+ paddr = ALIGN(pstart, step);
+ vend = ALIGN_DOWN(vend, step);
+
+ for (; vaddr < vend; vaddr += step, paddr += step) {
unsigned long hash, hpteg;
unsigned long vsid = get_kernel_vsid(vaddr, ssize);
unsigned long vpn = hpt_vpn(vaddr, vsid, ssize);
@@ -343,7 +347,9 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend,
if (!mmu_hash_ops.hpte_removebolted)
return -ENODEV;
- for (vaddr = vstart; vaddr < vend; vaddr += step) {
+ /* Unmap the full range specificied */
+ vaddr = ALIGN_DOWN(vstart, step);
+ for (;vaddr < vend; vaddr += step) {
rc = mmu_hash_ops.hpte_removebolted(vaddr, psize, ssize);
if (rc == -ENOENT) {
ret = -ENOENT;
diff --git a/arch/powerpc/mm/book3s64/internal.h b/arch/powerpc/mm/book3s64/internal.h
index 7eda0d30d765..c12d78ee42f5 100644
--- a/arch/powerpc/mm/book3s64/internal.h
+++ b/arch/powerpc/mm/book3s64/internal.h
@@ -13,4 +13,6 @@ static inline bool stress_slb(void)
return static_branch_unlikely(&stress_slb_key);
}
+void slb_setup_new_exec(void);
+
#endif /* ARCH_POWERPC_MM_BOOK3S64_INTERNAL_H */
diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c
index 0ba30b8b935b..1c54821de7bf 100644
--- a/arch/powerpc/mm/book3s64/mmu_context.c
+++ b/arch/powerpc/mm/book3s64/mmu_context.c
@@ -21,6 +21,8 @@
#include <asm/mmu_context.h>
#include <asm/pgalloc.h>
+#include "internal.h"
+
static DEFINE_IDA(mmu_context_ida);
static int alloc_context_id(int min_id, int max_id)
@@ -48,8 +50,6 @@ int hash__alloc_context_id(void)
}
EXPORT_SYMBOL_GPL(hash__alloc_context_id);
-void slb_setup_new_exec(void);
-
static int realloc_context_ids(mm_context_t *ctx)
{
int i, id;
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index cc72666e891a..3adcf730f478 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -34,7 +34,7 @@
unsigned int mmu_pid_bits;
unsigned int mmu_base_pid;
-unsigned int radix_mem_block_size __ro_after_init;
+unsigned long radix_mem_block_size __ro_after_init;
static __ref void *early_alloc_pgtable(unsigned long size, int nid,
unsigned long region_start, unsigned long region_end)
@@ -276,6 +276,7 @@ static int __meminit create_physical_mapping(unsigned long start,
int psize;
start = ALIGN(start, PAGE_SIZE);
+ end = ALIGN_DOWN(end, PAGE_SIZE);
for (addr = start; addr < end; addr += mapping_size) {
unsigned long gap, previous_size;
int rc;
@@ -497,7 +498,7 @@ static int __init probe_memory_block_size(unsigned long node, const char *uname,
depth, void *data)
{
unsigned long *mem_block_size = (unsigned long *)data;
- const __be64 *prop;
+ const __be32 *prop;
int len;
if (depth != 1)
@@ -507,13 +508,14 @@ static int __init probe_memory_block_size(unsigned long node, const char *uname,
return 0;
prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len);
- if (!prop || len < sizeof(__be64))
+
+ if (!prop || len < dt_root_size_cells * sizeof(__be32))
/*
* Nothing in the device tree
*/
*mem_block_size = MIN_MEMORY_BLOCK_SIZE;
else
- *mem_block_size = be64_to_cpup(prop);
+ *mem_block_size = of_read_number(prop, dt_root_size_cells);
return 1;
}
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 0d233763441f..b487b489d4b6 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -65,7 +65,7 @@ static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
for (set = 1; set < num_sets; set++)
tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1);
- asm volatile("ptesync": : :"memory");
+ ppc_after_tlbiel_barrier();
}
void radix__tlbiel_all(unsigned int action)
@@ -296,7 +296,7 @@ static __always_inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
/* For PWC, only one flush is needed */
if (ric == RIC_FLUSH_PWC) {
- asm volatile("ptesync": : :"memory");
+ ppc_after_tlbiel_barrier();
return;
}
@@ -304,7 +304,7 @@ static __always_inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
__tlbiel_pid(pid, set, RIC_FLUSH_TLB);
- asm volatile("ptesync": : :"memory");
+ ppc_after_tlbiel_barrier();
asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory");
}
@@ -431,7 +431,7 @@ static __always_inline void _tlbiel_va(unsigned long va, unsigned long pid,
asm volatile("ptesync": : :"memory");
__tlbiel_va(va, pid, ap, ric);
- asm volatile("ptesync": : :"memory");
+ ppc_after_tlbiel_barrier();
}
static inline void _tlbiel_va_range(unsigned long start, unsigned long end,
@@ -442,7 +442,7 @@ static inline void _tlbiel_va_range(unsigned long start, unsigned long end,
if (also_pwc)
__tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
__tlbiel_va_range(start, end, pid, page_size, psize);
- asm volatile("ptesync": : :"memory");
+ ppc_after_tlbiel_barrier();
}
static inline void __tlbie_va_range(unsigned long start, unsigned long end,
@@ -645,19 +645,29 @@ static void do_exit_flush_lazy_tlb(void *arg)
struct mm_struct *mm = arg;
unsigned long pid = mm->context.id;
+ /*
+ * A kthread could have done a mmget_not_zero() after the flushing CPU
+ * checked mm_is_singlethreaded, and be in the process of
+ * kthread_use_mm when interrupted here. In that case, current->mm will
+ * be set to mm, because kthread_use_mm() setting ->mm and switching to
+ * the mm is done with interrupts off.
+ */
if (current->mm == mm)
- return; /* Local CPU */
+ goto out_flush;
if (current->active_mm == mm) {
- /*
- * Must be a kernel thread because sender is single-threaded.
- */
- BUG_ON(current->mm);
+ WARN_ON_ONCE(current->mm != NULL);
+ /* Is a kernel thread and is using mm as the lazy tlb */
mmgrab(&init_mm);
- switch_mm(mm, &init_mm, current);
current->active_mm = &init_mm;
+ switch_mm_irqs_off(mm, &init_mm, current);
mmdrop(mm);
}
+
+ atomic_dec(&mm->context.active_cpus);
+ cpumask_clear_cpu(smp_processor_id(), mm_cpumask(mm));
+
+out_flush:
_tlbiel_pid(pid, RIC_FLUSH_ALL);
}
@@ -672,7 +682,6 @@ static void exit_flush_lazy_tlbs(struct mm_struct *mm)
*/
smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb,
(void *)mm, 1);
- mm_reset_thread_local(mm);
}
void radix__flush_tlb_mm(struct mm_struct *mm)
@@ -940,7 +949,7 @@ is_local:
if (hflush)
__tlbiel_va_range(hstart, hend, pid,
PMD_SIZE, MMU_PAGE_2M);
- asm volatile("ptesync": : :"memory");
+ ppc_after_tlbiel_barrier();
} else if (cputlb_use_tlbie()) {
asm volatile("ptesync": : :"memory");
__tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c
index 156c38f89511..c30fcbfa0e32 100644
--- a/arch/powerpc/mm/book3s64/slb.c
+++ b/arch/powerpc/mm/book3s64/slb.c
@@ -765,8 +765,8 @@ static long slb_allocate_kernel(unsigned long ea, unsigned long id)
if (id == LINEAR_MAP_REGION_ID) {
- /* We only support upto MAX_PHYSMEM_BITS */
- if ((ea & EA_MASK) > (1UL << MAX_PHYSMEM_BITS))
+ /* We only support upto H_MAX_PHYSMEM_BITS */
+ if ((ea & EA_MASK) > (1UL << H_MAX_PHYSMEM_BITS))
return -EFAULT;
flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp;
diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index b2eeea39684c..9af3832c9d8d 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -389,10 +389,8 @@ static void __init init_drmem_v1_lmbs(const __be32 *prop)
if (!drmem_info->lmbs)
return;
- for_each_drmem_lmb(lmb) {
+ for_each_drmem_lmb(lmb)
read_drconf_v1_cell(lmb, &prop);
- lmb_set_nid(lmb);
- }
}
static void __init init_drmem_v2_lmbs(const __be32 *prop)
@@ -437,8 +435,6 @@ static void __init init_drmem_v2_lmbs(const __be32 *prop)
lmb->aa_index = dr_cell.aa_index;
lmb->flags = dr_cell.flags;
-
- lmb_set_nid(lmb);
}
}
}
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 26292544630f..36c3800769fb 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -180,7 +180,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
if (!hpdp)
return NULL;
- if (IS_ENABLED(CONFIG_PPC_8xx) && sz == SZ_512K)
+ if (IS_ENABLED(CONFIG_PPC_8xx) && pshift < PMD_SHIFT)
return pte_alloc_map(mm, (pmd_t *)hpdp, addr);
BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp));
@@ -330,10 +330,24 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif
get_hugepd_cache_index(pdshift - shift));
}
-static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, unsigned long addr)
+static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
+ unsigned long addr, unsigned long end,
+ unsigned long floor, unsigned long ceiling)
{
+ unsigned long start = addr;
pgtable_t token = pmd_pgtable(*pmd);
+ start &= PMD_MASK;
+ if (start < floor)
+ return;
+ if (ceiling) {
+ ceiling &= PMD_MASK;
+ if (!ceiling)
+ return;
+ }
+ if (end - 1 > ceiling - 1)
+ return;
+
pmd_clear(pmd);
pte_free_tlb(tlb, token, addr);
mm_dec_nr_ptes(tlb->mm);
@@ -363,7 +377,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
*/
WARN_ON(!IS_ENABLED(CONFIG_PPC_8xx));
- hugetlb_free_pte_range(tlb, pmd, addr);
+ hugetlb_free_pte_range(tlb, pmd, addr, end, floor, ceiling);
continue;
}
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 8459056cce67..386be136026e 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -162,16 +162,16 @@ static __meminit struct vmemmap_backing * vmemmap_list_alloc(int node)
return next++;
}
-static __meminit void vmemmap_list_populate(unsigned long phys,
- unsigned long start,
- int node)
+static __meminit int vmemmap_list_populate(unsigned long phys,
+ unsigned long start,
+ int node)
{
struct vmemmap_backing *vmem_back;
vmem_back = vmemmap_list_alloc(node);
if (unlikely(!vmem_back)) {
- WARN_ON(1);
- return;
+ pr_debug("vmemap list allocation failed\n");
+ return -ENOMEM;
}
vmem_back->phys = phys;
@@ -179,6 +179,7 @@ static __meminit void vmemmap_list_populate(unsigned long phys,
vmem_back->list = vmemmap_list;
vmemmap_list = vmem_back;
+ return 0;
}
static bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long start,
@@ -199,6 +200,7 @@ static bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long star
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap)
{
+ bool altmap_alloc;
unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
/* Align to the page size of the linear mapping. */
@@ -228,13 +230,32 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
p = vmemmap_alloc_block_buf(page_size, node, altmap);
if (!p)
pr_debug("altmap block allocation failed, falling back to system memory");
+ else
+ altmap_alloc = true;
}
- if (!p)
+ if (!p) {
p = vmemmap_alloc_block_buf(page_size, node, NULL);
+ altmap_alloc = false;
+ }
if (!p)
return -ENOMEM;
- vmemmap_list_populate(__pa(p), start, node);
+ if (vmemmap_list_populate(__pa(p), start, node)) {
+ /*
+ * If we don't populate vmemap list, we don't have
+ * the ability to free the allocated vmemmap
+ * pages in section_deactivate. Hence free them
+ * here.
+ */
+ int nr_pfns = page_size >> PAGE_SHIFT;
+ unsigned long page_order = get_order(page_size);
+
+ if (altmap_alloc)
+ vmem_altmap_free(altmap, nr_pfns);
+ else
+ free_pages((unsigned long)p, page_order);
+ return -ENOMEM;
+ }
pr_debug(" * %016lx..%016lx allocated at %p\n",
start, start + page_size, p);
@@ -264,10 +285,8 @@ static unsigned long vmemmap_list_free(unsigned long start)
vmem_back_prev = vmem_back;
}
- if (unlikely(!vmem_back)) {
- WARN_ON(1);
+ if (unlikely(!vmem_back))
return 0;
- }
/* remove it from vmemmap_list */
if (vmem_back == vmemmap_list) /* remove head */
diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c
index 26fda3203320..cf8770b1a692 100644
--- a/arch/powerpc/mm/kasan/kasan_init_32.c
+++ b/arch/powerpc/mm/kasan/kasan_init_32.c
@@ -127,8 +127,7 @@ void __init kasan_mmu_init(void)
{
int ret;
- if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE) ||
- IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
+ if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) {
ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END);
if (ret)
@@ -140,10 +139,10 @@ void __init kasan_init(void)
{
phys_addr_t base, end;
u64 i;
+ int ret;
for_each_mem_range(i, &base, &end) {
phys_addr_t top = min(end, total_lowmem);
- int ret;
if (base >= top)
continue;
@@ -153,6 +152,13 @@ void __init kasan_init(void)
panic("kasan: kasan_init_region() failed");
}
+ if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
+ ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END);
+
+ if (ret)
+ panic("kasan: kasan_init_shadow_page_tables() failed");
+ }
+
kasan_remap_early_shadow_ro();
clear_page(kasan_early_shadow_page);
@@ -168,22 +174,6 @@ void __init kasan_late_init(void)
kasan_unmap_early_shadow_vmalloc();
}
-#ifdef CONFIG_PPC_BOOK3S_32
-u8 __initdata early_hash[256 << 10] __aligned(256 << 10) = {0};
-
-static void __init kasan_early_hash_table(void)
-{
- unsigned int hash = __pa(early_hash);
-
- modify_instruction_site(&patch__hash_page_A0, 0xffff, hash >> 16);
- modify_instruction_site(&patch__flush_hash_A0, 0xffff, hash >> 16);
-
- Hash = (struct hash_pte *)early_hash;
-}
-#else
-static void __init kasan_early_hash_table(void) {}
-#endif
-
void __init kasan_early_init(void)
{
unsigned long addr = KASAN_SHADOW_START;
@@ -199,7 +189,4 @@ void __init kasan_early_init(void)
next = pgd_addr_end(addr, end);
pmd_populate_kernel(&init_mm, pmd, kasan_early_shadow_pte);
} while (pmd++, addr = next, addr != end);
-
- if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
- kasan_early_hash_table();
}
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 5e2e7c0a8f1a..01ec2a252f09 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -49,6 +49,7 @@
#include <asm/swiotlb.h>
#include <asm/rtas.h>
#include <asm/kasan.h>
+#include <asm/svm.h>
#include <mm/mmu_decl.h>
@@ -283,7 +284,10 @@ void __init mem_init(void)
* back to to-down.
*/
memblock_set_bottom_up(true);
- swiotlb_init(0);
+ if (is_secure_guest())
+ svm_swiotlb_init();
+ else
+ swiotlb_init(0);
#endif
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
index d2b37146ae6c..231ca95f9ffb 100644
--- a/arch/powerpc/mm/nohash/8xx.c
+++ b/arch/powerpc/mm/nohash/8xx.c
@@ -244,13 +244,6 @@ void set_context(unsigned long id, pgd_t *pgd)
mb();
}
-void flush_instruction_cache(void)
-{
- isync();
- mtspr(SPRN_IC_CST, IDC_INVALL);
- isync();
-}
-
#ifdef CONFIG_PPC_KUEP
void __init setup_kuep(bool disabled)
{
diff --git a/arch/powerpc/mm/nohash/fsl_booke.c b/arch/powerpc/mm/nohash/fsl_booke.c
index 0c294827d6e5..36bda962d3b3 100644
--- a/arch/powerpc/mm/nohash/fsl_booke.c
+++ b/arch/powerpc/mm/nohash/fsl_booke.c
@@ -219,6 +219,22 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
return tlbcam_addrs[tlbcam_index - 1].limit - PAGE_OFFSET + 1;
}
+void flush_instruction_cache(void)
+{
+ unsigned long tmp;
+
+ if (IS_ENABLED(CONFIG_E200)) {
+ tmp = mfspr(SPRN_L1CSR0);
+ tmp |= L1CSR0_CFI | L1CSR0_CLFC;
+ mtspr(SPRN_L1CSR0, tmp);
+ } else {
+ tmp = mfspr(SPRN_L1CSR1);
+ tmp |= L1CSR1_ICFI | L1CSR1_ICLFR;
+ mtspr(SPRN_L1CSR1, tmp);
+ }
+ isync();
+}
+
/*
* MMU_init_hw does the chip-specific initialization of the MMU hardware.
*/
diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c
index 14514585db98..5872f69141d5 100644
--- a/arch/powerpc/mm/nohash/tlb.c
+++ b/arch/powerpc/mm/nohash/tlb.c
@@ -83,16 +83,12 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
};
#elif defined(CONFIG_PPC_8xx)
struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
- /* we only manage 4k and 16k pages as normal pages */
-#ifdef CONFIG_PPC_4K_PAGES
[MMU_PAGE_4K] = {
.shift = 12,
},
-#else
[MMU_PAGE_16K] = {
.shift = 14,
},
-#endif
[MMU_PAGE_512K] = {
.shift = 19,
},
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index f4e20d8e6c02..63f61d8b55e5 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -430,7 +430,7 @@ static int of_get_assoc_arrays(struct assoc_arrays *aa)
* This is like of_node_to_nid_single() for memory represented in the
* ibm,dynamic-reconfiguration-memory node.
*/
-static int of_drconf_to_nid_single(struct drmem_lmb *lmb)
+int of_drconf_to_nid_single(struct drmem_lmb *lmb)
{
struct assoc_arrays aa = { .arrays = NULL };
int default_nid = NUMA_NO_NODE;
@@ -507,6 +507,11 @@ static int numa_setup_cpu(unsigned long lcpu)
int fcpu = cpu_first_thread_sibling(lcpu);
int nid = NUMA_NO_NODE;
+ if (!cpu_present(lcpu)) {
+ set_cpu_numa_node(lcpu, first_online_node);
+ return first_online_node;
+ }
+
/*
* If a valid cpu-to-node mapping is already available, use it
* directly instead of querying the firmware, since it represents
@@ -723,21 +728,22 @@ static int __init parse_numa_properties(void)
*/
for_each_present_cpu(i) {
struct device_node *cpu;
- int nid;
-
- cpu = of_get_cpu_node(i, NULL);
- BUG_ON(!cpu);
- nid = of_node_to_nid_single(cpu);
- of_node_put(cpu);
+ int nid = vphn_get_nid(i);
/*
* Don't fall back to default_nid yet -- we will plug
* cpus into nodes once the memory scan has discovered
* the topology.
*/
- if (nid < 0)
- continue;
- node_set_online(nid);
+ if (nid == NUMA_NO_NODE) {
+ cpu = of_get_cpu_node(i, NULL);
+ BUG_ON(!cpu);
+ nid = of_node_to_nid_single(cpu);
+ of_node_put(cpu);
+ }
+
+ if (likely(nid > 0))
+ node_set_online(nid);
}
get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells);
@@ -888,7 +894,9 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
static void __init find_possible_nodes(void)
{
struct device_node *rtas;
- u32 numnodes, i;
+ const __be32 *domains;
+ int prop_length, max_nodes;
+ u32 i;
if (!numa_enabled)
return;
@@ -897,16 +905,31 @@ static void __init find_possible_nodes(void)
if (!rtas)
return;
- if (of_property_read_u32_index(rtas,
- "ibm,max-associativity-domains",
- min_common_depth, &numnodes))
- goto out;
+ /*
+ * ibm,current-associativity-domains is a fairly recent property. If
+ * it doesn't exist, then fallback on ibm,max-associativity-domains.
+ * Current denotes what the platform can support compared to max
+ * which denotes what the Hypervisor can support.
+ */
+ domains = of_get_property(rtas, "ibm,current-associativity-domains",
+ &prop_length);
+ if (!domains) {
+ domains = of_get_property(rtas, "ibm,max-associativity-domains",
+ &prop_length);
+ if (!domains)
+ goto out;
+ }
- for (i = 0; i < numnodes; i++) {
+ max_nodes = of_read_number(&domains[min_common_depth], 1);
+ for (i = 0; i < max_nodes; i++) {
if (!node_possible(i))
node_set(i, node_possible_map);
}
+ prop_length /= sizeof(int);
+ if (prop_length > min_common_depth + 2)
+ coregroup_enabled = 1;
+
out:
of_node_put(rtas);
}
@@ -915,6 +938,16 @@ void __init mem_topology_setup(void)
{
int cpu;
+ /*
+ * Linux/mm assumes node 0 to be online at boot. However this is not
+ * true on PowerPC, where node 0 is similar to any other node, it
+ * could be cpuless, memoryless node. So force node 0 to be offline
+ * for now. This will prevent cpuless, memoryless node 0 showing up
+ * unnecessarily as online. If a node has cpus or memory that need
+ * to be online, then node will anyway be marked online.
+ */
+ node_set_offline(0);
+
if (parse_numa_properties())
setup_nonnuma();
@@ -932,8 +965,17 @@ void __init mem_topology_setup(void)
reset_numa_cpu_lookup_table();
- for_each_present_cpu(cpu)
+ for_each_possible_cpu(cpu) {
+ /*
+ * Powerpc with CONFIG_NUMA always used to have a node 0,
+ * even if it was memoryless or cpuless. For all cpus that
+ * are possible but not present, cpu_to_node() would point
+ * to node 0. To remove a cpuless, memoryless dummy node,
+ * powerpc need to make sure all possible but not present
+ * cpu_to_node are set to a proper node.
+ */
numa_setup_cpu(cpu);
+ }
}
void __init initmem_init(void)
@@ -1200,6 +1242,31 @@ int find_and_online_cpu_nid(int cpu)
return new_nid;
}
+int cpu_to_coregroup_id(int cpu)
+{
+ __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
+ int index;
+
+ if (cpu < 0 || cpu > nr_cpu_ids)
+ return -1;
+
+ if (!coregroup_enabled)
+ goto out;
+
+ if (!firmware_has_feature(FW_FEATURE_VPHN))
+ goto out;
+
+ if (vphn_get_associativity(cpu, associativity))
+ goto out;
+
+ index = of_read_number(associativity, 1);
+ if (index > min_common_depth + 1)
+ return of_read_number(&associativity[index - 1], 1);
+
+out:
+ return cpu_to_core_id(cpu);
+}
+
static int topology_update_init(void)
{
topology_inited = 1;
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 9c0547d77af3..15555c95cebc 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -184,9 +184,6 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
*/
VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep));
- /* Add the pte bit when trying to set a pte */
- pte = pte_mkpte(pte);
-
/* Note: mm->context.id might not yet have been assigned as
* this context might not have been activated yet when this
* is called.
@@ -266,8 +263,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_
pmd_t *pmd = pmd_off(mm, addr);
pte_basic_t val;
pte_basic_t *entry = &ptep->pte;
- int num = is_hugepd(*((hugepd_t *)pmd)) ? 1 : SZ_512K / SZ_4K;
- int i;
+ int num, i;
/*
* Make sure hardware valid bit is not set. We don't do
@@ -275,11 +271,12 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_
*/
VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep));
- pte = pte_mkpte(pte);
-
pte = set_pte_filter(pte);
val = pte_val(pte);
+
+ num = number_of_cells_per_pte(pmd, val, 1);
+
for (i = 0; i < num; i++, entry++, val += SZ_4K)
*entry = val;
}
diff --git a/arch/powerpc/mm/ptdump/8xx.c b/arch/powerpc/mm/ptdump/8xx.c
index 8a797dcbf475..86da2a669680 100644
--- a/arch/powerpc/mm/ptdump/8xx.c
+++ b/arch/powerpc/mm/ptdump/8xx.c
@@ -11,8 +11,13 @@
static const struct flag_info flag_array[] = {
{
+#ifdef CONFIG_PPC_16K_PAGES
.mask = _PAGE_HUGE,
.val = _PAGE_HUGE,
+#else
+ .mask = _PAGE_SPS,
+ .val = _PAGE_SPS,
+#endif
.set = "huge",
.clear = " ",
}, {
diff --git a/arch/powerpc/mm/ptdump/bats.c b/arch/powerpc/mm/ptdump/bats.c
index e29b338d499f..c4c628b03cf8 100644
--- a/arch/powerpc/mm/ptdump/bats.c
+++ b/arch/powerpc/mm/ptdump/bats.c
@@ -12,62 +12,6 @@
#include "ptdump.h"
-static char *pp_601(int k, int pp)
-{
- if (pp == 0)
- return k ? " " : "rwx";
- if (pp == 1)
- return k ? "r x" : "rwx";
- if (pp == 2)
- return "rwx";
- return "r x";
-}
-
-static void bat_show_601(struct seq_file *m, int idx, u32 lower, u32 upper)
-{
- u32 blpi = upper & 0xfffe0000;
- u32 k = (upper >> 2) & 3;
- u32 pp = upper & 3;
- phys_addr_t pbn = PHYS_BAT_ADDR(lower);
- u32 bsm = lower & 0x3ff;
- u32 size = (bsm + 1) << 17;
-
- seq_printf(m, "%d: ", idx);
- if (!(lower & 0x40)) {
- seq_puts(m, " -\n");
- return;
- }
-
- seq_printf(m, "0x%08x-0x%08x ", blpi, blpi + size - 1);
-#ifdef CONFIG_PHYS_64BIT
- seq_printf(m, "0x%016llx ", pbn);
-#else
- seq_printf(m, "0x%08x ", pbn);
-#endif
- pt_dump_size(m, size);
-
- seq_printf(m, "Kernel %s User %s", pp_601(k & 2, pp), pp_601(k & 1, pp));
-
- seq_puts(m, lower & _PAGE_WRITETHRU ? "w " : " ");
- seq_puts(m, lower & _PAGE_NO_CACHE ? "i " : " ");
- seq_puts(m, lower & _PAGE_COHERENT ? "m " : " ");
- seq_puts(m, "\n");
-}
-
-#define BAT_SHOW_601(_m, _n, _l, _u) bat_show_601(_m, _n, mfspr(_l), mfspr(_u))
-
-static int bats_show_601(struct seq_file *m, void *v)
-{
- seq_puts(m, "---[ Block Address Translation ]---\n");
-
- BAT_SHOW_601(m, 0, SPRN_IBAT0L, SPRN_IBAT0U);
- BAT_SHOW_601(m, 1, SPRN_IBAT1L, SPRN_IBAT1U);
- BAT_SHOW_601(m, 2, SPRN_IBAT2L, SPRN_IBAT2U);
- BAT_SHOW_601(m, 3, SPRN_IBAT3L, SPRN_IBAT3U);
-
- return 0;
-}
-
static void bat_show_603(struct seq_file *m, int idx, u32 lower, u32 upper, bool is_d)
{
u32 bepi = upper & 0xfffe0000;
@@ -146,9 +90,6 @@ static int bats_show_603(struct seq_file *m, void *v)
static int bats_open(struct inode *inode, struct file *file)
{
- if (IS_ENABLED(CONFIG_PPC_BOOK3S_601))
- return single_open(file, bats_show_601, NULL);
-
return single_open(file, bats_show_603, NULL);
}
diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c
index df59d0bb121f..489f993100d5 100644
--- a/arch/powerpc/oprofile/cell/spu_task_sync.c
+++ b/arch/powerpc/oprofile/cell/spu_task_sync.c
@@ -572,7 +572,7 @@ void spu_sync_buffer(int spu_num, unsigned int *samples,
* samples are recorded.
* No big deal -- so we just drop a few samples.
*/
- pr_debug("SPU_PROF: No cached SPU contex "
+ pr_debug("SPU_PROF: No cached SPU context "
"for SPU #%d. Dropping samples.\n", spu_num);
goto out;
}
diff --git a/arch/powerpc/perf/hv-gpci-requests.h b/arch/powerpc/perf/hv-gpci-requests.h
index e608f9db12dd..8965b4463d43 100644
--- a/arch/powerpc/perf/hv-gpci-requests.h
+++ b/arch/powerpc/perf/hv-gpci-requests.h
@@ -95,7 +95,7 @@ REQUEST(__field(0, 8, partition_id)
#define REQUEST_NAME system_performance_capabilities
#define REQUEST_NUM 0x40
-#define REQUEST_IDX_KIND "starting_index=0xffffffffffffffff"
+#define REQUEST_IDX_KIND "starting_index=0xffffffff"
#include I(REQUEST_BEGIN)
REQUEST(__field(0, 1, perf_collect_privileged)
__field(0x1, 1, capability_mask)
@@ -223,7 +223,7 @@ REQUEST(__field(0, 2, partition_id)
#define REQUEST_NAME system_hypervisor_times
#define REQUEST_NUM 0xF0
-#define REQUEST_IDX_KIND "starting_index=0xffffffffffffffff"
+#define REQUEST_IDX_KIND "starting_index=0xffffffff"
#include I(REQUEST_BEGIN)
REQUEST(__count(0, 8, time_spent_to_dispatch_virtual_processors)
__count(0x8, 8, time_spent_processing_virtual_processor_timers)
@@ -234,7 +234,7 @@ REQUEST(__count(0, 8, time_spent_to_dispatch_virtual_processors)
#define REQUEST_NAME system_tlbie_count_and_time
#define REQUEST_NUM 0xF4
-#define REQUEST_IDX_KIND "starting_index=0xffffffffffffffff"
+#define REQUEST_IDX_KIND "starting_index=0xffffffff"
#include I(REQUEST_BEGIN)
REQUEST(__count(0, 8, tlbie_instructions_issued)
/*
diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c
index 6884d16ec19b..d48413e28c39 100644
--- a/arch/powerpc/perf/hv-gpci.c
+++ b/arch/powerpc/perf/hv-gpci.c
@@ -48,6 +48,8 @@ EVENT_DEFINE_RANGE_FORMAT(length, config1, 24, 31);
/* u32, byte offset */
EVENT_DEFINE_RANGE_FORMAT(offset, config1, 32, 63);
+static cpumask_t hv_gpci_cpumask;
+
static struct attribute *format_attrs[] = {
&format_attr_request.attr,
&format_attr_starting_index.attr,
@@ -94,7 +96,15 @@ static ssize_t kernel_version_show(struct device *dev,
return sprintf(page, "0x%x\n", COUNTER_INFO_VERSION_CURRENT);
}
+static ssize_t cpumask_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return cpumap_print_to_pagebuf(true, buf, &hv_gpci_cpumask);
+}
+
static DEVICE_ATTR_RO(kernel_version);
+static DEVICE_ATTR_RO(cpumask);
+
HV_CAPS_ATTR(version, "0x%x\n");
HV_CAPS_ATTR(ga, "%d\n");
HV_CAPS_ATTR(expanded, "%d\n");
@@ -111,6 +121,15 @@ static struct attribute *interface_attrs[] = {
NULL,
};
+static struct attribute *cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL,
+};
+
+static struct attribute_group cpumask_attr_group = {
+ .attrs = cpumask_attrs,
+};
+
static struct attribute_group interface_group = {
.name = "interface",
.attrs = interface_attrs,
@@ -120,20 +139,12 @@ static const struct attribute_group *attr_groups[] = {
&format_group,
&event_group,
&interface_group,
+ &cpumask_attr_group,
NULL,
};
-#define HGPCI_REQ_BUFFER_SIZE 4096
-#define HGPCI_MAX_DATA_BYTES \
- (HGPCI_REQ_BUFFER_SIZE - sizeof(struct hv_get_perf_counter_info_params))
-
static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t));
-struct hv_gpci_request_buffer {
- struct hv_get_perf_counter_info_params params;
- uint8_t bytes[HGPCI_MAX_DATA_BYTES];
-} __packed;
-
static unsigned long single_gpci_request(u32 req, u32 starting_index,
u16 secondary_index, u8 version_in, u32 offset, u8 length,
u64 *value)
@@ -275,6 +286,45 @@ static struct pmu h_gpci_pmu = {
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};
+static int ppc_hv_gpci_cpu_online(unsigned int cpu)
+{
+ if (cpumask_empty(&hv_gpci_cpumask))
+ cpumask_set_cpu(cpu, &hv_gpci_cpumask);
+
+ return 0;
+}
+
+static int ppc_hv_gpci_cpu_offline(unsigned int cpu)
+{
+ int target;
+
+ /* Check if exiting cpu is used for collecting gpci events */
+ if (!cpumask_test_and_clear_cpu(cpu, &hv_gpci_cpumask))
+ return 0;
+
+ /* Find a new cpu to collect gpci events */
+ target = cpumask_last(cpu_active_mask);
+
+ if (target < 0 || target >= nr_cpu_ids) {
+ pr_err("hv_gpci: CPU hotplug init failed\n");
+ return -1;
+ }
+
+ /* Migrate gpci events to the new target */
+ cpumask_set_cpu(target, &hv_gpci_cpumask);
+ perf_pmu_migrate_context(&h_gpci_pmu, cpu, target);
+
+ return 0;
+}
+
+static int hv_gpci_cpu_hotplug_init(void)
+{
+ return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_HV_GPCI_ONLINE,
+ "perf/powerpc/hv_gcpi:online",
+ ppc_hv_gpci_cpu_online,
+ ppc_hv_gpci_cpu_offline);
+}
+
static int hv_gpci_init(void)
{
int r;
@@ -295,6 +345,11 @@ static int hv_gpci_init(void)
return -ENODEV;
}
+ /* init cpuhotplug */
+ r = hv_gpci_cpu_hotplug_init();
+ if (r)
+ return r;
+
/* sampling not supported */
h_gpci_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
diff --git a/arch/powerpc/perf/hv-gpci.h b/arch/powerpc/perf/hv-gpci.h
index a3053eda5dcc..4d108262bed7 100644
--- a/arch/powerpc/perf/hv-gpci.h
+++ b/arch/powerpc/perf/hv-gpci.h
@@ -2,33 +2,6 @@
#ifndef LINUX_POWERPC_PERF_HV_GPCI_H_
#define LINUX_POWERPC_PERF_HV_GPCI_H_
-#include <linux/types.h>
-
-/* From the document "H_GetPerformanceCounterInfo Interface" v1.07 */
-
-/* H_GET_PERF_COUNTER_INFO argument */
-struct hv_get_perf_counter_info_params {
- __be32 counter_request; /* I */
- __be32 starting_index; /* IO */
- __be16 secondary_index; /* IO */
- __be16 returned_values; /* O */
- __be32 detail_rc; /* O, only needed when called via *_norets() */
-
- /*
- * O, size each of counter_value element in bytes, only set for version
- * >= 0x3
- */
- __be16 cv_element_size;
-
- /* I, 0 (zero) for versions < 0x3 */
- __u8 counter_info_version_in;
-
- /* O, 0 (zero) if version < 0x3. Must be set to 0 when making hcall */
- __u8 counter_info_version_out;
- __u8 reserved[0xC];
- __u8 counter_value[];
-} __packed;
-
/*
* counter info version => fw version/reference (spec version)
*
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 62d0b54086f8..9ed4fcccf8a9 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -1426,8 +1426,6 @@ static void trace_imc_event_del(struct perf_event *event, int flags)
static int trace_imc_event_init(struct perf_event *event)
{
- struct task_struct *target;
-
if (event->attr.type != event->pmu->type)
return -ENOENT;
@@ -1458,7 +1456,6 @@ static int trace_imc_event_init(struct perf_event *event)
mutex_unlock(&imc_global_refc.lock);
event->hw.idx = -1;
- target = event->hw.target;
event->pmu->task_ctx_nr = perf_hw_context;
event->destroy = reset_global_refc;
diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c
index 964437adec18..2848904df638 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -288,6 +288,15 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
mask |= CNST_PMC_MASK(pmc);
value |= CNST_PMC_VAL(pmc);
+
+ /*
+ * PMC5 and PMC6 are used to count cycles and instructions and
+ * they do not support most of the constraint bits. Add a check
+ * to exclude PMC5/6 from most of the constraints except for
+ * EBB/BHRB.
+ */
+ if (pmc >= 5)
+ goto ebb_bhrb;
}
if (pmc <= 4) {
@@ -357,6 +366,7 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
}
}
+ebb_bhrb:
if (!pmc && ebb)
/* EBB events must specify the PMC */
return -1;
diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h
index 044de65e96b9..7025de5e60e7 100644
--- a/arch/powerpc/perf/isa207-common.h
+++ b/arch/powerpc/perf/isa207-common.h
@@ -13,6 +13,8 @@
#include <asm/firmware.h>
#include <asm/cputable.h>
+#include "internal.h"
+
#define EVENT_EBB_MASK 1ull
#define EVENT_EBB_SHIFT PERF_EVENT_CONFIG_EBB_SHIFT
#define EVENT_BHRB_MASK 1ull
diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c
index 83148656b524..9dbe8f9b89b4 100644
--- a/arch/powerpc/perf/power10-pmu.c
+++ b/arch/powerpc/perf/power10-pmu.c
@@ -9,7 +9,6 @@
#define pr_fmt(fmt) "power10-pmu: " fmt
#include "isa207-common.h"
-#include "internal.h"
/*
* Raw event encoding for Power10:
diff --git a/arch/powerpc/perf/power5+-pmu.c b/arch/powerpc/perf/power5+-pmu.c
index a62b2cd7914f..3e64b4a1511f 100644
--- a/arch/powerpc/perf/power5+-pmu.c
+++ b/arch/powerpc/perf/power5+-pmu.c
@@ -10,6 +10,8 @@
#include <asm/reg.h>
#include <asm/cputable.h>
+#include "internal.h"
+
/*
* Bits in event code for POWER5+ (POWER5 GS) and POWER5++ (POWER5 GS DD3)
*/
diff --git a/arch/powerpc/perf/power5-pmu.c b/arch/powerpc/perf/power5-pmu.c
index 8732b587cf71..017bb19b73fb 100644
--- a/arch/powerpc/perf/power5-pmu.c
+++ b/arch/powerpc/perf/power5-pmu.c
@@ -10,6 +10,8 @@
#include <asm/reg.h>
#include <asm/cputable.h>
+#include "internal.h"
+
/*
* Bits in event code for POWER5 (not POWER5++)
*/
diff --git a/arch/powerpc/perf/power6-pmu.c b/arch/powerpc/perf/power6-pmu.c
index 0e318cf87129..189974478e9f 100644
--- a/arch/powerpc/perf/power6-pmu.c
+++ b/arch/powerpc/perf/power6-pmu.c
@@ -10,6 +10,8 @@
#include <asm/reg.h>
#include <asm/cputable.h>
+#include "internal.h"
+
/*
* Bits in event code for POWER6
*/
diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c
index 5e0bf09cf077..bacfab104a1a 100644
--- a/arch/powerpc/perf/power7-pmu.c
+++ b/arch/powerpc/perf/power7-pmu.c
@@ -10,6 +10,8 @@
#include <asm/reg.h>
#include <asm/cputable.h>
+#include "internal.h"
+
/*
* Bits in event code for POWER7
*/
diff --git a/arch/powerpc/perf/ppc970-pmu.c b/arch/powerpc/perf/ppc970-pmu.c
index d35223fb112c..7d78df97f272 100644
--- a/arch/powerpc/perf/ppc970-pmu.c
+++ b/arch/powerpc/perf/ppc970-pmu.c
@@ -9,6 +9,8 @@
#include <asm/reg.h>
#include <asm/cputable.h>
+#include "internal.h"
+
/*
* Bits in event code for PPC970
*/
diff --git a/arch/powerpc/platforms/44x/machine_check.c b/arch/powerpc/platforms/44x/machine_check.c
index 90ad6ac529d2..a5c898bb9bab 100644
--- a/arch/powerpc/platforms/44x/machine_check.c
+++ b/arch/powerpc/platforms/44x/machine_check.c
@@ -7,6 +7,7 @@
#include <linux/ptrace.h>
#include <asm/reg.h>
+#include <asm/cacheflush.h>
int machine_check_440A(struct pt_regs *regs)
{
diff --git a/arch/powerpc/platforms/44x/ppc476.c b/arch/powerpc/platforms/44x/ppc476.c
index cba83eee685c..07f7e3ce67b5 100644
--- a/arch/powerpc/platforms/44x/ppc476.c
+++ b/arch/powerpc/platforms/44x/ppc476.c
@@ -86,8 +86,7 @@ static void __noreturn avr_reset_system(char *cmd)
avr_halt_system(AVR_PWRCTL_RESET);
}
-static int avr_probe(struct i2c_client *client,
- const struct i2c_device_id *id)
+static int avr_probe(struct i2c_client *client)
{
avr_i2c_client = client;
ppc_md.restart = avr_reset_system;
@@ -104,7 +103,7 @@ static struct i2c_driver avr_driver = {
.driver = {
.name = "akebono-avr",
},
- .probe = avr_probe,
+ .probe_new = avr_probe,
.id_table = avr_id,
};
diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
index 0967bdfb1691..409481016928 100644
--- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
+++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
@@ -142,7 +142,7 @@ static int mcu_gpiochip_remove(struct mcu *mcu)
return 0;
}
-static int mcu_probe(struct i2c_client *client, const struct i2c_device_id *id)
+static int mcu_probe(struct i2c_client *client)
{
struct mcu *mcu;
int ret;
@@ -221,7 +221,7 @@ static struct i2c_driver mcu_driver = {
.name = "mcu-mpc8349emitx",
.of_match_table = mcu_of_match_table,
},
- .probe = mcu_probe,
+ .probe_new = mcu_probe,
.remove = mcu_remove,
.id_table = mcu_ids,
};
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index fda108bae95f..c6df294054fe 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -112,7 +112,7 @@ static void mpc85xx_take_timebase(void)
local_irq_restore(flags);
}
-static void smp_85xx_mach_cpu_die(void)
+static void smp_85xx_cpu_offline_self(void)
{
unsigned int cpu = smp_processor_id();
@@ -506,7 +506,7 @@ void __init mpc85xx_smp_init(void)
if (qoriq_pm_ops) {
smp_85xx_ops.give_timebase = mpc85xx_give_timebase;
smp_85xx_ops.take_timebase = mpc85xx_take_timebase;
- ppc_md.cpu_die = smp_85xx_mach_cpu_die;
+ smp_85xx_ops.cpu_offline_self = smp_85xx_cpu_offline_self;
smp_85xx_ops.cpu_die = qoriq_cpu_kill;
}
#endif
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index fb7515b4fa9c..7a5e8f4541e3 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -199,21 +199,6 @@ source "drivers/cpuidle/Kconfig"
endmenu
-config PPC601_SYNC_FIX
- bool "Workarounds for PPC601 bugs"
- depends on PPC_BOOK3S_601 && PPC_PMAC
- default y
- help
- Some versions of the PPC601 (the first PowerPC chip) have bugs which
- mean that extra synchronization instructions are required near
- certain instructions, typically those that make major changes to the
- CPU state. These extra instructions reduce performance slightly.
- If you say N here, these extra instructions will not be included,
- resulting in a kernel which will run faster but may not run at all
- on some systems with the PPC601 chip.
-
- If in doubt, say Y here.
-
config TAU
bool "On-chip CPU temperature sensor support"
depends on PPC_BOOK3S_32
@@ -223,12 +208,11 @@ config TAU
temperature within 2-4 degrees Celsius. This option shows the current
on-die temperature in /proc/cpuinfo if the cpu supports it.
- Unfortunately, on some chip revisions, this sensor is very inaccurate
- and in many cases, does not work at all, so don't assume the cpu
- temp is actually what /proc/cpuinfo says it is.
+ Unfortunately, this sensor is very inaccurate when uncalibrated, so
+ don't assume the cpu temp is actually what /proc/cpuinfo says it is.
config TAU_INT
- bool "Interrupt driven TAU driver (DANGEROUS)"
+ bool "Interrupt driven TAU driver (EXPERIMENTAL)"
depends on TAU
help
The TAU supports an interrupt driven mode which causes an interrupt
@@ -236,12 +220,7 @@ config TAU_INT
to get notified the temp has exceeded a range. With this option off,
a timer is used to re-check the temperature periodically.
- However, on some cpus it appears that the TAU interrupt hardware
- is buggy and can cause a situation which would lead unexplained hard
- lockups.
-
- Unless you are extending the TAU driver, or enjoy kernel/hardware
- debugging, leave this option off.
+ If in doubt, say N here.
config TAU_AVERAGE
bool "Average high and low temp"
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 1dc9d3c81872..c194c4ae8bc7 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -20,7 +20,7 @@ choice
depends on PPC32
help
There are five families of 32 bit PowerPC chips supported.
- The most common ones are the desktop and server CPUs (601, 603,
+ The most common ones are the desktop and server CPUs (603,
604, 740, 750, 74xx) CPUs from Freescale and IBM, with their
embedded 512x/52xx/82xx/83xx/86xx counterparts.
The other embedded parts, namely 4xx, 8xx, e200 (55xx) and e500
@@ -30,7 +30,7 @@ choice
If unsure, select 52xx/6xx/7xx/74xx/82xx/83xx/86xx.
config PPC_BOOK3S_6xx
- bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx except 601"
+ bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx"
select PPC_BOOK3S_32
select PPC_FPU
select PPC_HAVE_PMU_SUPPORT
@@ -38,13 +38,6 @@ config PPC_BOOK3S_6xx
select PPC_HAVE_KUAP
select HAVE_ARCH_VMAP_STACK if !ADB_PMU
-config PPC_BOOK3S_601
- bool "PowerPC 601"
- select PPC_BOOK3S_32
- select PPC_FPU
- select PPC_HAVE_KUAP
- select HAVE_ARCH_VMAP_STACK
-
config PPC_85xx
bool "Freescale 85xx"
select E500
@@ -490,13 +483,12 @@ endmenu
config VDSO32
def_bool y
- depends on PPC32 || CPU_BIG_ENDIAN
+ depends on PPC32 || COMPAT
help
This symbol controls whether we build the 32-bit VDSO. We obviously
want to do that if we're building a 32-bit kernel. If we're building
- a 64-bit kernel then we only want a 32-bit VDSO if we're building for
- big endian. That is because the only little endian configuration we
- support is ppc64le which is 64-bit only.
+ a 64-bit kernel then we only want a 32-bit VDSO if we're also enabling
+ COMPAT.
choice
prompt "Endianness selection"
diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
index 15437abe1f6d..b95c3380d2b5 100644
--- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
+++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
@@ -147,7 +147,8 @@ static void __noreturn mpc7448_hpc2_restart(char *cmd)
local_irq_disable();
/* Set exception prefix high - to the firmware */
- _nmask_and_or_msr(0, MSR_IP);
+ mtmsr(mfmsr() | MSR_IP);
+ isync();
for (;;) ; /* Spin until reset happens */
}
diff --git a/arch/powerpc/platforms/embedded6xx/storcenter.c b/arch/powerpc/platforms/embedded6xx/storcenter.c
index ed1914dd34bb..e346ddcef45e 100644
--- a/arch/powerpc/platforms/embedded6xx/storcenter.c
+++ b/arch/powerpc/platforms/embedded6xx/storcenter.c
@@ -101,7 +101,8 @@ static void __noreturn storcenter_restart(char *cmd)
local_irq_disable();
/* Set exception prefix high - to the firmware */
- _nmask_and_or_msr(0, MSR_IP);
+ mtmsr(mfmsr() | MSR_IP);
+ isync();
/* Wait for reset to happen */
for (;;) ;
diff --git a/arch/powerpc/platforms/powermac/pmac.h b/arch/powerpc/platforms/powermac/pmac.h
index 16a52afdb76e..0d715db434dc 100644
--- a/arch/powerpc/platforms/powermac/pmac.h
+++ b/arch/powerpc/platforms/powermac/pmac.h
@@ -34,7 +34,7 @@ extern void pmac_check_ht_link(void);
extern void pmac_setup_smp(void);
extern int psurge_secondary_virq;
-extern void low_cpu_die(void) __attribute__((noreturn));
+extern void low_cpu_offline_self(void) __attribute__((noreturn));
extern int pmac_nvram_init(void);
extern void pmac_pic_init(void);
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index f002b0fa69b8..2e2cc0c75d87 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -284,7 +284,7 @@ static void __init pmac_setup_arch(void)
/* 604, G3, G4 etc. */
loops_per_jiffy = *fp / HZ;
else
- /* 601, 603, etc. */
+ /* 603, etc. */
loops_per_jiffy = *fp / (2 * HZ);
of_node_put(cpu);
break;
diff --git a/arch/powerpc/platforms/powermac/sleep.S b/arch/powerpc/platforms/powermac/sleep.S
index f9a680fdd9c4..7e0f8ba6e54a 100644
--- a/arch/powerpc/platforms/powermac/sleep.S
+++ b/arch/powerpc/platforms/powermac/sleep.S
@@ -201,8 +201,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
addi r3,r3,sleep_storage@l
stw r5,0(r3)
- .globl low_cpu_die
-low_cpu_die:
+ .globl low_cpu_offline_self
+low_cpu_offline_self:
/* Flush & disable all caches */
bl flush_disable_caches
@@ -244,7 +244,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450)
mtmsr r2
isync
b 1b
-_ASM_NOKPROBE_SYMBOL(low_cpu_die)
+_ASM_NOKPROBE_SYMBOL(low_cpu_offline_self)
/*
* Here is the resume code.
*/
@@ -294,14 +294,7 @@ grackle_wake_up:
* we do any r1 memory access as we are not sure they
* are in a sane state above the first 256Mb region
*/
- li r0,16 /* load up segment register values */
- mtctr r0 /* for context 0 */
- lis r3,0x2000 /* Ku = 1, VSID = 0 */
- li r4,0
-3: mtsrin r3,r4
- addi r3,r3,0x111 /* increment VSID */
- addis r4,r4,0x1000 /* address of next segment */
- bdnz 3b
+ bl load_segment_registers
sync
isync
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index eb23264910e1..74ebe664b016 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -270,10 +270,6 @@ static void __init smp_psurge_probe(void)
int i, ncpus;
struct device_node *dn;
- /* We don't do SMP on the PPC601 -- paulus */
- if (PVR_VER(mfspr(SPRN_PVR)) == 1)
- return;
-
/*
* The powersurge cpu board can be used in the generation
* of powermacs that have a socket for an upgradeable cpu card,
@@ -920,7 +916,7 @@ static int smp_core99_cpu_disable(void)
#ifdef CONFIG_PPC32
-static void pmac_cpu_die(void)
+static void pmac_cpu_offline_self(void)
{
int cpu = smp_processor_id();
@@ -930,12 +926,12 @@ static void pmac_cpu_die(void)
generic_set_cpu_dead(cpu);
smp_wmb();
mb();
- low_cpu_die();
+ low_cpu_offline_self();
}
#else /* CONFIG_PPC32 */
-static void pmac_cpu_die(void)
+static void pmac_cpu_offline_self(void)
{
int cpu = smp_processor_id();
@@ -1020,7 +1016,7 @@ void __init pmac_setup_smp(void)
#endif /* CONFIG_PPC_PMAC32_PSURGE */
#ifdef CONFIG_HOTPLUG_CPU
- ppc_md.cpu_die = pmac_cpu_die;
+ smp_ops->cpu_offline_self = pmac_cpu_offline_self;
#endif
}
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 9af8c3b98853..89e22c460ebf 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -38,60 +38,12 @@
static int eeh_event_irq = -EINVAL;
-void pnv_pcibios_bus_add_device(struct pci_dev *pdev)
+static void pnv_pcibios_bus_add_device(struct pci_dev *pdev)
{
dev_dbg(&pdev->dev, "EEH: Setting up device\n");
eeh_probe_device(pdev);
}
-static int pnv_eeh_init(void)
-{
- struct pci_controller *hose;
- struct pnv_phb *phb;
- int max_diag_size = PNV_PCI_DIAG_BUF_SIZE;
-
- if (!firmware_has_feature(FW_FEATURE_OPAL)) {
- pr_warn("%s: OPAL is required !\n",
- __func__);
- return -EINVAL;
- }
-
- /* Set probe mode */
- eeh_add_flag(EEH_PROBE_MODE_DEV);
-
- /*
- * P7IOC blocks PCI config access to frozen PE, but PHB3
- * doesn't do that. So we have to selectively enable I/O
- * prior to collecting error log.
- */
- list_for_each_entry(hose, &hose_list, list_node) {
- phb = hose->private_data;
-
- if (phb->model == PNV_PHB_MODEL_P7IOC)
- eeh_add_flag(EEH_ENABLE_IO_FOR_LOG);
-
- if (phb->diag_data_size > max_diag_size)
- max_diag_size = phb->diag_data_size;
-
- /*
- * PE#0 should be regarded as valid by EEH core
- * if it's not the reserved one. Currently, we
- * have the reserved PE#255 and PE#127 for PHB3
- * and P7IOC separately. So we should regard
- * PE#0 as valid for PHB3 and P7IOC.
- */
- if (phb->ioda.reserved_pe_idx != 0)
- eeh_add_flag(EEH_VALID_PE_ZERO);
-
- break;
- }
-
- eeh_set_pe_aux_size(max_diag_size);
- ppc_md.pcibios_bus_add_device = pnv_pcibios_bus_add_device;
-
- return 0;
-}
-
static irqreturn_t pnv_eeh_event(int irq, void *data)
{
/*
@@ -135,7 +87,7 @@ static ssize_t pnv_eeh_ei_write(struct file *filp,
return -EINVAL;
/* Retrieve PE */
- pe = eeh_pe_get(hose, pe_no, 0);
+ pe = eeh_pe_get(hose, pe_no);
if (!pe)
return -ENODEV;
@@ -190,7 +142,7 @@ PNV_EEH_DBGFS_ENTRY(inbB, 0xE10);
#endif /* CONFIG_DEBUG_FS */
-void pnv_eeh_enable_phbs(void)
+static void pnv_eeh_enable_phbs(void)
{
struct pci_controller *hose;
struct pnv_phb *phb;
@@ -354,7 +306,7 @@ static struct eeh_pe *pnv_eeh_get_upstream_pe(struct pci_dev *pdev)
if (parent) {
struct pnv_ioda_pe *ioda_pe = pnv_ioda_get_pe(parent);
- return eeh_pe_get(phb->hose, ioda_pe->pe_number, 0);
+ return eeh_pe_get(phb->hose, ioda_pe->pe_number);
}
return NULL;
@@ -1406,7 +1358,7 @@ static int pnv_eeh_get_pe(struct pci_controller *hose,
}
/* Find the PE according to PE# */
- dev_pe = eeh_pe_get(hose, pe_no, 0);
+ dev_pe = eeh_pe_get(hose, pe_no);
if (!dev_pe)
return -EEXIST;
@@ -1674,7 +1626,6 @@ static int pnv_eeh_restore_config(struct eeh_dev *edev)
static struct eeh_ops pnv_eeh_ops = {
.name = "powernv",
- .init = pnv_eeh_init,
.probe = pnv_eeh_probe,
.set_option = pnv_eeh_set_option,
.get_state = pnv_eeh_get_state,
@@ -1715,9 +1666,44 @@ DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_pci_fixup_vf_mps);
*/
static int __init eeh_powernv_init(void)
{
+ int max_diag_size = PNV_PCI_DIAG_BUF_SIZE;
+ struct pci_controller *hose;
+ struct pnv_phb *phb;
int ret = -EINVAL;
- ret = eeh_ops_register(&pnv_eeh_ops);
+ if (!firmware_has_feature(FW_FEATURE_OPAL)) {
+ pr_warn("%s: OPAL is required !\n", __func__);
+ return -EINVAL;
+ }
+
+ /* Set probe mode */
+ eeh_add_flag(EEH_PROBE_MODE_DEV);
+
+ /*
+ * P7IOC blocks PCI config access to frozen PE, but PHB3
+ * doesn't do that. So we have to selectively enable I/O
+ * prior to collecting error log.
+ */
+ list_for_each_entry(hose, &hose_list, list_node) {
+ phb = hose->private_data;
+
+ if (phb->model == PNV_PHB_MODEL_P7IOC)
+ eeh_add_flag(EEH_ENABLE_IO_FOR_LOG);
+
+ if (phb->diag_data_size > max_diag_size)
+ max_diag_size = phb->diag_data_size;
+
+ break;
+ }
+
+ /*
+ * eeh_init() allocates the eeh_pe and its aux data buf so the
+ * size needs to be set before calling eeh_init().
+ */
+ eeh_set_pe_aux_size(max_diag_size);
+ ppc_md.pcibios_bus_add_device = pnv_pcibios_bus_add_device;
+
+ ret = eeh_init(&pnv_eeh_ops);
if (!ret)
pr_info("EEH: PowerNV platform initialized\n");
else
@@ -1725,4 +1711,4 @@ static int __init eeh_powernv_init(void)
return ret;
}
-machine_early_initcall(powernv, eeh_powernv_init);
+machine_arch_initcall(powernv, eeh_powernv_init);
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 345ab062b21a..1ed7c5286487 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -565,7 +565,7 @@ void power7_idle_type(unsigned long type)
irq_set_pending_from_srr1(srr1);
}
-void power7_idle(void)
+static void power7_idle(void)
{
if (!powersave_nap)
return;
@@ -659,20 +659,6 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
mmcr0 = mfspr(SPRN_MMCR0);
}
- if (cpu_has_feature(CPU_FTR_ARCH_31)) {
- /*
- * POWER10 uses MMCRA (BHRBRD) as BHRB disable bit.
- * If the user hasn't asked for the BHRB to be
- * written, the value of MMCRA[BHRBRD] is 1.
- * On wakeup from stop, MMCRA[BHRBD] will be 0,
- * since it is previleged resource and will be lost.
- * Thus, if we do not save and restore the MMCRA[BHRBD],
- * hardware will be needlessly writing to the BHRB
- * in problem mode.
- */
- mmcra = mfspr(SPRN_MMCRA);
- }
-
if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) {
sprs.lpcr = mfspr(SPRN_LPCR);
sprs.hfscr = mfspr(SPRN_HFSCR);
@@ -735,10 +721,6 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
mtspr(SPRN_MMCR0, mmcr0);
}
- /* Reload MMCRA to restore BHRB disable bit for POWER10 */
- if (cpu_has_feature(CPU_FTR_ARCH_31))
- mtspr(SPRN_MMCRA, mmcra);
-
/*
* DD2.2 and earlier need to set then clear bit 60 in MMCRA
* to ensure the PMU starts running.
@@ -823,73 +805,6 @@ out:
return srr1;
}
-#ifdef CONFIG_HOTPLUG_CPU
-static unsigned long power9_offline_stop(unsigned long psscr)
-{
- unsigned long srr1;
-
-#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- __ppc64_runlatch_off();
- srr1 = power9_idle_stop(psscr, true);
- __ppc64_runlatch_on();
-#else
- /*
- * Tell KVM we're entering idle.
- * This does not have to be done in real mode because the P9 MMU
- * is independent per-thread. Some steppings share radix/hash mode
- * between threads, but in that case KVM has a barrier sync in real
- * mode before and after switching between radix and hash.
- *
- * kvm_start_guest must still be called in real mode though, hence
- * the false argument.
- */
- local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
-
- __ppc64_runlatch_off();
- srr1 = power9_idle_stop(psscr, false);
- __ppc64_runlatch_on();
-
- local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
- /* Order setting hwthread_state vs. testing hwthread_req */
- smp_mb();
- if (local_paca->kvm_hstate.hwthread_req)
- srr1 = idle_kvm_start_guest(srr1);
- mtmsr(MSR_KERNEL);
-#endif
-
- return srr1;
-}
-#endif
-
-void power9_idle_type(unsigned long stop_psscr_val,
- unsigned long stop_psscr_mask)
-{
- unsigned long psscr;
- unsigned long srr1;
-
- if (!prep_irq_for_idle_irqsoff())
- return;
-
- psscr = mfspr(SPRN_PSSCR);
- psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
-
- __ppc64_runlatch_off();
- srr1 = power9_idle_stop(psscr, true);
- __ppc64_runlatch_on();
-
- fini_irq_for_idle_irqsoff();
-
- irq_set_pending_from_srr1(srr1);
-}
-
-/*
- * Used for ppc_md.power_save which needs a function with no parameters
- */
-void power9_idle(void)
-{
- power9_idle_type(pnv_default_stop_val, pnv_default_stop_mask);
-}
-
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/*
* This is used in working around bugs in thread reconfiguration
@@ -962,6 +877,198 @@ void pnv_power9_force_smt4_release(void)
EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release);
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
+struct p10_sprs {
+ /*
+ * SPRs that get lost in shallow states:
+ *
+ * P10 loses CR, LR, CTR, FPSCR, VSCR, XER, TAR, SPRG2, and HSPRG1
+ * isa300 idle routines restore CR, LR.
+ * CTR is volatile
+ * idle thread doesn't use FP or VEC
+ * kernel doesn't use TAR
+ * HSPRG1 is only live in HV interrupt entry
+ * SPRG2 is only live in KVM guests, KVM handles it.
+ */
+};
+
+static unsigned long power10_idle_stop(unsigned long psscr, bool mmu_on)
+{
+ int cpu = raw_smp_processor_id();
+ int first = cpu_first_thread_sibling(cpu);
+ unsigned long *state = &paca_ptrs[first]->idle_state;
+ unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
+ unsigned long srr1;
+ unsigned long pls;
+// struct p10_sprs sprs = {}; /* avoid false used-uninitialised */
+ bool sprs_saved = false;
+
+ if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
+ /* EC=ESL=0 case */
+
+ BUG_ON(!mmu_on);
+
+ /*
+ * Wake synchronously. SRESET via xscom may still cause
+ * a 0x100 powersave wakeup with SRR1 reason!
+ */
+ srr1 = isa300_idle_stop_noloss(psscr); /* go idle */
+ if (likely(!srr1))
+ return 0;
+
+ /*
+ * Registers not saved, can't recover!
+ * This would be a hardware bug
+ */
+ BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS);
+
+ goto out;
+ }
+
+ /* EC=ESL=1 case */
+ if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) {
+ /* XXX: save SPRs for deep state loss here. */
+
+ sprs_saved = true;
+
+ atomic_start_thread_idle();
+ }
+
+ srr1 = isa300_idle_stop_mayloss(psscr); /* go idle */
+
+ psscr = mfspr(SPRN_PSSCR);
+
+ WARN_ON_ONCE(!srr1);
+ WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
+
+ if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
+ hmi_exception_realmode(NULL);
+
+ /*
+ * On POWER10, SRR1 bits do not match exactly as expected.
+ * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so
+ * just always test PSSCR for SPR/TB state loss.
+ */
+ pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT;
+ if (likely(pls < deep_spr_loss_state)) {
+ if (sprs_saved)
+ atomic_stop_thread_idle();
+ goto out;
+ }
+
+ /* HV state loss */
+ BUG_ON(!sprs_saved);
+
+ atomic_lock_thread_idle();
+
+ if ((*state & core_thread_mask) != 0)
+ goto core_woken;
+
+ /* XXX: restore per-core SPRs here */
+
+ if (pls >= pnv_first_tb_loss_level) {
+ /* TB loss */
+ if (opal_resync_timebase() != OPAL_SUCCESS)
+ BUG();
+ }
+
+ /*
+ * isync after restoring shared SPRs and before unlocking. Unlock
+ * only contains hwsync which does not necessarily do the right
+ * thing for SPRs.
+ */
+ isync();
+
+core_woken:
+ atomic_unlock_and_stop_thread_idle();
+
+ /* XXX: restore per-thread SPRs here */
+
+ if (!radix_enabled())
+ __slb_restore_bolted_realmode();
+
+out:
+ if (mmu_on)
+ mtmsr(MSR_KERNEL);
+
+ return srr1;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static unsigned long arch300_offline_stop(unsigned long psscr)
+{
+ unsigned long srr1;
+
+#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ __ppc64_runlatch_off();
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ srr1 = power10_idle_stop(psscr, true);
+ else
+ srr1 = power9_idle_stop(psscr, true);
+ __ppc64_runlatch_on();
+#else
+ /*
+ * Tell KVM we're entering idle.
+ * This does not have to be done in real mode because the P9 MMU
+ * is independent per-thread. Some steppings share radix/hash mode
+ * between threads, but in that case KVM has a barrier sync in real
+ * mode before and after switching between radix and hash.
+ *
+ * kvm_start_guest must still be called in real mode though, hence
+ * the false argument.
+ */
+ local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
+
+ __ppc64_runlatch_off();
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ srr1 = power10_idle_stop(psscr, false);
+ else
+ srr1 = power9_idle_stop(psscr, false);
+ __ppc64_runlatch_on();
+
+ local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
+ /* Order setting hwthread_state vs. testing hwthread_req */
+ smp_mb();
+ if (local_paca->kvm_hstate.hwthread_req)
+ srr1 = idle_kvm_start_guest(srr1);
+ mtmsr(MSR_KERNEL);
+#endif
+
+ return srr1;
+}
+#endif
+
+void arch300_idle_type(unsigned long stop_psscr_val,
+ unsigned long stop_psscr_mask)
+{
+ unsigned long psscr;
+ unsigned long srr1;
+
+ if (!prep_irq_for_idle_irqsoff())
+ return;
+
+ psscr = mfspr(SPRN_PSSCR);
+ psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
+
+ __ppc64_runlatch_off();
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ srr1 = power10_idle_stop(psscr, true);
+ else
+ srr1 = power9_idle_stop(psscr, true);
+ __ppc64_runlatch_on();
+
+ fini_irq_for_idle_irqsoff();
+
+ irq_set_pending_from_srr1(srr1);
+}
+
+/*
+ * Used for ppc_md.power_save which needs a function with no parameters
+ */
+static void arch300_idle(void)
+{
+ arch300_idle_type(pnv_default_stop_val, pnv_default_stop_mask);
+}
+
#ifdef CONFIG_HOTPLUG_CPU
void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
@@ -995,7 +1102,7 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
psscr = mfspr(SPRN_PSSCR);
psscr = (psscr & ~pnv_deepest_stop_psscr_mask) |
pnv_deepest_stop_psscr_val;
- srr1 = power9_offline_stop(psscr);
+ srr1 = arch300_offline_stop(psscr);
} else if (cpu_has_feature(CPU_FTR_ARCH_206) && power7_offline_type) {
srr1 = power7_offline();
} else {
@@ -1093,11 +1200,15 @@ int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags)
* @dt_idle_states: Number of idle state entries
* Returns 0 on success
*/
-static void __init pnv_power9_idle_init(void)
+static void __init pnv_arch300_idle_init(void)
{
u64 max_residency_ns = 0;
int i;
+ /* stop is not really architected, we only have p9,p10 drivers */
+ if (!pvr_version_is(PVR_POWER10) && !pvr_version_is(PVR_POWER9))
+ return;
+
/*
* pnv_deepest_stop_{val,mask} should be set to values corresponding to
* the deepest stop state.
@@ -1112,6 +1223,11 @@ static void __init pnv_power9_idle_init(void)
struct pnv_idle_states_t *state = &pnv_idle_states[i];
u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK;
+ /* No deep loss driver implemented for POWER10 yet */
+ if (pvr_version_is(PVR_POWER10) &&
+ state->flags & (OPAL_PM_TIMEBASE_STOP|OPAL_PM_LOSE_FULL_CONTEXT))
+ continue;
+
if ((state->flags & OPAL_PM_TIMEBASE_STOP) &&
(pnv_first_tb_loss_level > psscr_rl))
pnv_first_tb_loss_level = psscr_rl;
@@ -1162,7 +1278,7 @@ static void __init pnv_power9_idle_init(void)
if (unlikely(!default_stop_found)) {
pr_warn("cpuidle-powernv: No suitable default stop state found. Disabling platform idle.\n");
} else {
- ppc_md.power_save = power9_idle;
+ ppc_md.power_save = arch300_idle;
pr_info("cpuidle-powernv: Default stop: psscr = 0x%016llx,mask=0x%016llx\n",
pnv_default_stop_val, pnv_default_stop_mask);
}
@@ -1224,7 +1340,7 @@ static void __init pnv_probe_idle_states(void)
}
if (cpu_has_feature(CPU_FTR_ARCH_300))
- pnv_power9_idle_init();
+ pnv_arch300_idle_init();
for (i = 0; i < nr_pnv_idle_states; i++)
supported_cpuidle_states |= pnv_idle_states[i].flags;
@@ -1295,7 +1411,7 @@ static int pnv_parse_cpuidle_dt(void)
for (i = 0; i < nr_idle_states; i++)
pnv_idle_states[i].residency_ns = temp_u32[i];
- /* For power9 */
+ /* For power9 and later */
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
/* Read pm_crtl_val */
if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr",
@@ -1358,8 +1474,8 @@ static int __init pnv_init_idle_states(void)
if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
/* P7/P8 nap */
p->thread_idle_state = PNV_THREAD_RUNNING;
- } else {
- /* P9 stop */
+ } else if (pvr_version_is(PVR_POWER9)) {
+ /* P9 stop workarounds */
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
p->requested_psscr = 0;
atomic_set(&p->dont_stop, 0);
diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c
index 13b369d2cc45..6828108486f8 100644
--- a/arch/powerpc/platforms/powernv/memtrace.c
+++ b/arch/powerpc/platforms/powernv/memtrace.c
@@ -224,7 +224,7 @@ static int memtrace_online(void)
ent->mem = 0;
}
- if (add_memory(ent->nid, ent->start, ent->size)) {
+ if (add_memory(ent->nid, ent->start, ent->size, MHP_NONE)) {
pr_err("Failed to add trace memory to node %d\n",
ent->nid);
ret += 1;
diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c
index 8c65aacda9c8..ecdad219d704 100644
--- a/arch/powerpc/platforms/powernv/ocxl.c
+++ b/arch/powerpc/platforms/powernv/ocxl.c
@@ -2,7 +2,6 @@
// Copyright 2017 IBM Corp.
#include <asm/pnv-ocxl.h>
#include <asm/opal.h>
-#include <asm/xive.h>
#include <misc/ocxl-config.h>
#include "pci.h"
@@ -484,32 +483,3 @@ int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle)
return rc;
}
EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe_from_cache);
-
-int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr)
-{
- __be64 flags, trigger_page;
- s64 rc;
- u32 hwirq;
-
- hwirq = xive_native_alloc_irq();
- if (!hwirq)
- return -ENOENT;
-
- rc = opal_xive_get_irq_info(hwirq, &flags, NULL, &trigger_page, NULL,
- NULL);
- if (rc || !trigger_page) {
- xive_native_free_irq(hwirq);
- return -ENOENT;
- }
- *irq = hwirq;
- *trigger_addr = be64_to_cpu(trigger_page);
- return 0;
-
-}
-EXPORT_SYMBOL_GPL(pnv_ocxl_alloc_xive_irq);
-
-void pnv_ocxl_free_xive_irq(u32 irq)
-{
- xive_native_free_irq(irq);
-}
-EXPORT_SYMBOL_GPL(pnv_ocxl_free_xive_irq);
diff --git a/arch/powerpc/platforms/powernv/opal-core.c b/arch/powerpc/platforms/powernv/opal-core.c
index 6dba3b62269f..23571f0b555a 100644
--- a/arch/powerpc/platforms/powernv/opal-core.c
+++ b/arch/powerpc/platforms/powernv/opal-core.c
@@ -510,7 +510,7 @@ static void __init opalcore_config_init(void)
idx = be32_to_cpu(opalc_metadata->region_cnt);
if (idx > MAX_PT_LOAD_CNT) {
pr_warn("WARNING: OPAL regions count (%d) adjusted to limit (%d)",
- MAX_PT_LOAD_CNT, idx);
+ idx, MAX_PT_LOAD_CNT);
idx = MAX_PT_LOAD_CNT;
}
for (i = 0; i < idx; i++) {
diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c
index 62ef7ad995da..5e33b1fc67c2 100644
--- a/arch/powerpc/platforms/powernv/opal-elog.c
+++ b/arch/powerpc/platforms/powernv/opal-elog.c
@@ -179,14 +179,14 @@ static ssize_t raw_attr_read(struct file *filep, struct kobject *kobj,
return count;
}
-static struct elog_obj *create_elog_obj(uint64_t id, size_t size, uint64_t type)
+static void create_elog_obj(uint64_t id, size_t size, uint64_t type)
{
struct elog_obj *elog;
int rc;
elog = kzalloc(sizeof(*elog), GFP_KERNEL);
if (!elog)
- return NULL;
+ return;
elog->kobj.kset = elog_kset;
@@ -219,18 +219,37 @@ static struct elog_obj *create_elog_obj(uint64_t id, size_t size, uint64_t type)
rc = kobject_add(&elog->kobj, NULL, "0x%llx", id);
if (rc) {
kobject_put(&elog->kobj);
- return NULL;
+ return;
}
+ /*
+ * As soon as the sysfs file for this elog is created/activated there is
+ * a chance the opal_errd daemon (or any userspace) might read and
+ * acknowledge the elog before kobject_uevent() is called. If that
+ * happens then there is a potential race between
+ * elog_ack_store->kobject_put() and kobject_uevent() which leads to a
+ * use-after-free of a kernfs object resulting in a kernel crash.
+ *
+ * To avoid that, we need to take a reference on behalf of the bin file,
+ * so that our reference remains valid while we call kobject_uevent().
+ * We then drop our reference before exiting the function, leaving the
+ * bin file to drop the last reference (if it hasn't already).
+ */
+
+ /* Take a reference for the bin file */
+ kobject_get(&elog->kobj);
rc = sysfs_create_bin_file(&elog->kobj, &elog->raw_attr);
- if (rc) {
+ if (rc == 0) {
+ kobject_uevent(&elog->kobj, KOBJ_ADD);
+ } else {
+ /* Drop the reference taken for the bin file */
kobject_put(&elog->kobj);
- return NULL;
}
- kobject_uevent(&elog->kobj, KOBJ_ADD);
+ /* Drop our reference */
+ kobject_put(&elog->kobj);
- return elog;
+ return;
}
static irqreturn_t elog_event(int irq, void *data)
diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c
index d26da19a611f..d3b6e135c18b 100644
--- a/arch/powerpc/platforms/powernv/opal-msglog.c
+++ b/arch/powerpc/platforms/powernv/opal-msglog.c
@@ -12,6 +12,8 @@
#include <linux/types.h>
#include <asm/barrier.h>
+#include "powernv.h"
+
/* OPAL in-memory console. Defined in OPAL source at core/console.c */
struct memcons {
__be64 magic;
diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c
index 45f4223a790f..deddaebf8c14 100644
--- a/arch/powerpc/platforms/powernv/opal-prd.c
+++ b/arch/powerpc/platforms/powernv/opal-prd.c
@@ -24,7 +24,7 @@
#include <linux/uaccess.h>
-/**
+/*
* The msg member must be at the end of the struct, as it's followed by the
* message data.
*/
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 023a4f987bb2..2b4ceb5e6ce4 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -894,7 +894,6 @@ int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
{
- struct pci_dev *parent;
uint8_t bcomp, dcomp, fcomp;
long rc, rid_end, rid;
@@ -904,7 +903,6 @@ int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER;
fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER;
- parent = pe->pbus->self;
if (pe->flags & PNV_IODA_PE_BUS_ALL)
count = resource_size(&pe->pbus->busn_res);
else
@@ -925,12 +923,6 @@ int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
}
rid_end = pe->rid + (count << 8);
} else {
-#ifdef CONFIG_PCI_IOV
- if (pe->flags & PNV_IODA_PE_VF)
- parent = pe->parent_dev;
- else
-#endif /* CONFIG_PCI_IOV */
- parent = pe->pdev->bus->self;
bcomp = OpalPciBusAll;
dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER;
fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER;
diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h
index 1aa51c4fa904..11df4e16a1cc 100644
--- a/arch/powerpc/platforms/powernv/powernv.h
+++ b/arch/powerpc/platforms/powernv/powernv.h
@@ -2,6 +2,13 @@
#ifndef _POWERNV_H
#define _POWERNV_H
+/*
+ * There's various hacks scattered throughout the generic powerpc arch code
+ * that needs to call into powernv platform stuff. The prototypes for those
+ * functions are in asm/powernv.h
+ */
+#include <asm/powernv.h>
+
#ifdef CONFIG_SMP
extern void pnv_smp_init(void);
#else
diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
index 8035caf6e297..72c25295c1c2 100644
--- a/arch/powerpc/platforms/powernv/rng.c
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -65,7 +65,7 @@ int powernv_get_random_real_mode(unsigned long *v)
return 1;
}
-int powernv_get_random_darn(unsigned long *v)
+static int powernv_get_random_darn(unsigned long *v)
{
unsigned long val;
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 7fcb88623081..9acaa0f131b9 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -130,6 +130,28 @@ static void pnv_setup_rfi_flush(void)
setup_count_cache_flush();
}
+static void __init pnv_check_guarded_cores(void)
+{
+ struct device_node *dn;
+ int bad_count = 0;
+
+ for_each_node_by_type(dn, "cpu") {
+ if (of_property_match_string(dn, "status", "bad") >= 0)
+ bad_count++;
+ };
+
+ if (bad_count) {
+ printk(" _ _______________\n");
+ pr_cont(" | | / \\\n");
+ pr_cont(" | | | WARNING! |\n");
+ pr_cont(" | | | |\n");
+ pr_cont(" | | | It looks like |\n");
+ pr_cont(" |_| | you have %*d |\n", 3, bad_count);
+ pr_cont(" _ | guarded cores |\n");
+ pr_cont(" (_) \\_______________/\n");
+ }
+}
+
static void __init pnv_setup_arch(void)
{
set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
@@ -150,6 +172,8 @@ static void __init pnv_setup_arch(void)
/* Enable NAP mode */
powersave_nap = 1;
+ pnv_check_guarded_cores();
+
/* XXX PMCS */
}
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index b2ba3e95bda7..54c4ba45c7ce 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -43,7 +43,7 @@
#include <asm/udbg.h>
#define DBG(fmt...) udbg_printf(fmt)
#else
-#define DBG(fmt...)
+#define DBG(fmt...) do { } while (0)
#endif
static void pnv_smp_setup_cpu(int cpu)
@@ -158,7 +158,7 @@ static void pnv_flush_interrupts(void)
}
}
-static void pnv_smp_cpu_kill_self(void)
+static void pnv_cpu_offline_self(void)
{
unsigned long srr1, unexpected_mask, wmask;
unsigned int cpu;
@@ -417,6 +417,7 @@ static struct smp_ops_t pnv_smp_ops = {
#ifdef CONFIG_HOTPLUG_CPU
.cpu_disable = pnv_smp_cpu_disable,
.cpu_die = generic_cpu_die,
+ .cpu_offline_self = pnv_cpu_offline_self,
#endif /* CONFIG_HOTPLUG_CPU */
};
@@ -430,7 +431,6 @@ void __init pnv_smp_init(void)
smp_ops = &pnv_smp_ops;
#ifdef CONFIG_HOTPLUG_CPU
- ppc_md.cpu_die = pnv_smp_cpu_kill_self;
#ifdef CONFIG_KEXEC_CORE
crash_wake_offline = 1;
#endif
diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c
index 6434f9cb5aed..5f5fe63a3d1c 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -186,7 +186,7 @@ static void unmap_winctx_mmio_bars(struct vas_window *window)
* OS/User Window Context (UWC) MMIO Base Address Region for the given window.
* Map these bus addresses and save the mapped kernel addresses in @window.
*/
-int map_winctx_mmio_bars(struct vas_window *window)
+static int map_winctx_mmio_bars(struct vas_window *window)
{
int len;
u64 start;
@@ -214,7 +214,7 @@ int map_winctx_mmio_bars(struct vas_window *window)
* registers are not sequential. And, we can only write to offsets
* with valid registers.
*/
-void reset_window_regs(struct vas_window *window)
+static void reset_window_regs(struct vas_window *window)
{
write_hvwc_reg(window, VREG(LPID), 0ULL);
write_hvwc_reg(window, VREG(PID), 0ULL);
@@ -357,7 +357,8 @@ static void init_rsvd_tx_buf_count(struct vas_window *txwin,
* as a one-time task? That could work for NX but what about other
* receivers? Let the receivers tell us the rx-fifo buffers for now.
*/
-int init_winctx_regs(struct vas_window *window, struct vas_winctx *winctx)
+static void init_winctx_regs(struct vas_window *window,
+ struct vas_winctx *winctx)
{
u64 val;
int fifo_size;
@@ -499,8 +500,6 @@ int init_winctx_regs(struct vas_window *window, struct vas_winctx *winctx)
val = SET_FIELD(VAS_WINCTL_NX_WIN, val, winctx->nx_win);
val = SET_FIELD(VAS_WINCTL_OPEN, val, 1);
write_hvwc_reg(window, VREG(WINCTL), val);
-
- return 0;
}
static void vas_release_window_id(struct ida *ida, int winid)
diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c
index 1193c294b8d0..0c252478e556 100644
--- a/arch/powerpc/platforms/ps3/spu.c
+++ b/arch/powerpc/platforms/ps3/spu.c
@@ -448,7 +448,7 @@ static void ps3_disable_spu(struct spu_context *ctx)
ctx->ops->runcntl_stop(ctx);
}
-const struct spu_management_ops spu_management_ps3_ops = {
+static const struct spu_management_ops spu_management_ps3_ops = {
.enumerate_spus = ps3_enumerate_spus,
.create_spu = ps3_create_spu,
.destroy_spu = ps3_destroy_spu,
@@ -589,7 +589,7 @@ static u64 resource_allocation_enable_get(struct spu *spu)
return 0; /* No support. */
}
-const struct spu_priv1_ops spu_priv1_ps3_ops = {
+static const struct spu_priv1_ops spu_priv1_ps3_ops = {
.int_mask_and = int_mask_and,
.int_mask_or = int_mask_or,
.int_mask_set = int_mask_set,
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index cb2d9a970b7b..cf024fa37bda 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -33,8 +33,6 @@
#include <asm/ppc-pci.h>
#include <asm/rtas.h>
-static int pseries_eeh_get_pe_addr(struct pci_dn *pdn);
-
/* RTAS tokens */
static int ibm_set_eeh_option;
static int ibm_set_slot_reset;
@@ -86,42 +84,43 @@ void pseries_pcibios_bus_add_device(struct pci_dev *pdev)
/**
- * pseries_eeh_get_config_addr - Retrieve config address
+ * pseries_eeh_get_pe_config_addr - Find the pe_config_addr for a device
+ * @pdn: pci_dn of the input device
+ *
+ * The EEH RTAS calls use a tuple consisting of: (buid_hi, buid_lo,
+ * pe_config_addr) as a handle to a given PE. This function finds the
+ * pe_config_addr based on the device's config addr.
*
- * Retrieve the assocated config address. Actually, there're 2 RTAS
- * function calls dedicated for the purpose. We need implement
- * it through the new function and then the old one. Besides,
- * you should make sure the config address is figured out from
- * FDT node before calling the function.
+ * Keep in mind that the pe_config_addr *might* be numerically identical to the
+ * device's config addr, but the two are conceptually distinct.
*
- * It's notable that zero'ed return value means invalid PE config
- * address.
+ * Returns the pe_config_addr, or a negative error code.
*/
-static int pseries_eeh_get_config_addr(struct pci_controller *phb, int config_addr)
+static int pseries_eeh_get_pe_config_addr(struct pci_dn *pdn)
{
- int ret = 0;
- int rets[3];
+ int config_addr = rtas_config_addr(pdn->busno, pdn->devfn, 0);
+ struct pci_controller *phb = pdn->phb;
+ int ret, rets[3];
if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
/*
- * First of all, we need to make sure there has one PE
- * associated with the device. Otherwise, PE address is
- * meaningless.
+ * First of all, use function 1 to determine if this device is
+ * part of a PE or not. ret[0] being zero indicates it's not.
*/
ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
config_addr, BUID_HI(phb->buid),
BUID_LO(phb->buid), 1);
if (ret || (rets[0] == 0))
- return 0;
+ return -ENOENT;
- /* Retrieve the associated PE config address */
+ /* Retrieve the associated PE config address with function 0 */
ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
config_addr, BUID_HI(phb->buid),
BUID_LO(phb->buid), 0);
if (ret) {
pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n",
__func__, phb->global_number, config_addr);
- return 0;
+ return -ENXIO;
}
return rets[0];
@@ -134,13 +133,20 @@ static int pseries_eeh_get_config_addr(struct pci_controller *phb, int config_ad
if (ret) {
pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n",
__func__, phb->global_number, config_addr);
- return 0;
+ return -ENXIO;
}
return rets[0];
}
- return ret;
+ /*
+ * PAPR does describe a process for finding the pe_config_addr that was
+ * used before the ibm,get-config-addr-info calls were added. However,
+ * I haven't found *any* systems that don't have that RTAS call
+ * implemented. If you happen to find one that needs the old DT based
+ * process, patches are welcome!
+ */
+ return -ENOENT;
}
/**
@@ -161,8 +167,7 @@ static int pseries_eeh_phb_reset(struct pci_controller *phb, int config_addr, in
BUID_LO(phb->buid), option);
/* If fundamental-reset not supported, try hot-reset */
- if (option == EEH_RESET_FUNDAMENTAL &&
- ret == -8) {
+ if (option == EEH_RESET_FUNDAMENTAL && ret == -8) {
option = EEH_RESET_HOT;
ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
config_addr, BUID_HI(phb->buid),
@@ -170,8 +175,7 @@ static int pseries_eeh_phb_reset(struct pci_controller *phb, int config_addr, in
}
/* We need reset hold or settlement delay */
- if (option == EEH_RESET_FUNDAMENTAL ||
- option == EEH_RESET_HOT)
+ if (option == EEH_RESET_FUNDAMENTAL || option == EEH_RESET_HOT)
msleep(EEH_PE_RST_HOLD_TIME);
else
msleep(EEH_PE_RST_SETTLE_TIME);
@@ -239,88 +243,6 @@ static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX];
static DEFINE_SPINLOCK(slot_errbuf_lock);
static int eeh_error_buf_size;
-/**
- * pseries_eeh_init - EEH platform dependent initialization
- *
- * EEH platform dependent initialization on pseries.
- */
-static int pseries_eeh_init(void)
-{
- struct pci_controller *phb;
- struct pci_dn *pdn;
- int addr, config_addr;
-
- /* figure out EEH RTAS function call tokens */
- ibm_set_eeh_option = rtas_token("ibm,set-eeh-option");
- ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
- ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2");
- ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state");
- ibm_slot_error_detail = rtas_token("ibm,slot-error-detail");
- ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2");
- ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info");
- ibm_configure_pe = rtas_token("ibm,configure-pe");
-
- /*
- * ibm,configure-pe and ibm,configure-bridge have the same semantics,
- * however ibm,configure-pe can be faster. If we can't find
- * ibm,configure-pe then fall back to using ibm,configure-bridge.
- */
- if (ibm_configure_pe == RTAS_UNKNOWN_SERVICE)
- ibm_configure_pe = rtas_token("ibm,configure-bridge");
-
- /*
- * Necessary sanity check. We needn't check "get-config-addr-info"
- * and its variant since the old firmware probably support address
- * of domain/bus/slot/function for EEH RTAS operations.
- */
- if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE ||
- ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE ||
- (ibm_read_slot_reset_state2 == RTAS_UNKNOWN_SERVICE &&
- ibm_read_slot_reset_state == RTAS_UNKNOWN_SERVICE) ||
- ibm_slot_error_detail == RTAS_UNKNOWN_SERVICE ||
- ibm_configure_pe == RTAS_UNKNOWN_SERVICE) {
- pr_info("EEH functionality not supported\n");
- return -EINVAL;
- }
-
- /* Initialize error log lock and size */
- spin_lock_init(&slot_errbuf_lock);
- eeh_error_buf_size = rtas_token("rtas-error-log-max");
- if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) {
- pr_info("%s: unknown EEH error log size\n",
- __func__);
- eeh_error_buf_size = 1024;
- } else if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) {
- pr_info("%s: EEH error log size %d exceeds the maximal %d\n",
- __func__, eeh_error_buf_size, RTAS_ERROR_LOG_MAX);
- eeh_error_buf_size = RTAS_ERROR_LOG_MAX;
- }
-
- /* Set EEH probe mode */
- eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG);
-
- /* Set EEH machine dependent code */
- ppc_md.pcibios_bus_add_device = pseries_pcibios_bus_add_device;
-
- if (is_kdump_kernel() || reset_devices) {
- pr_info("Issue PHB reset ...\n");
- list_for_each_entry(phb, &hose_list, list_node) {
- pdn = list_first_entry(&PCI_DN(phb->dn)->child_list, struct pci_dn, list);
- addr = (pdn->busno << 16) | (pdn->devfn << 8);
- config_addr = pseries_eeh_get_config_addr(phb, addr);
- /* invalid PE config addr */
- if (config_addr == 0)
- continue;
-
- pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_FUNDAMENTAL);
- pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_DEACTIVATE);
- pseries_eeh_phb_configure_bridge(phb, config_addr);
- }
- }
-
- return 0;
-}
-
static int pseries_eeh_cap_start(struct pci_dn *pdn)
{
u32 status;
@@ -439,10 +361,9 @@ static struct eeh_pe *pseries_eeh_pe_get_parent(struct eeh_dev *edev)
*/
void pseries_eeh_init_edev(struct pci_dn *pdn)
{
+ struct eeh_pe pe, *parent;
struct eeh_dev *edev;
- struct eeh_pe pe;
u32 pcie_flags;
- int enable = 0;
int ret;
if (WARN_ON_ONCE(!eeh_has_flag(EEH_PROBE_MODE_DEVTREE)))
@@ -499,51 +420,38 @@ void pseries_eeh_init_edev(struct pci_dn *pdn)
}
}
- /* Initialize the fake PE */
+ /* first up, find the pe_config_addr for the PE containing the device */
+ ret = pseries_eeh_get_pe_config_addr(pdn);
+ if (ret < 0) {
+ eeh_edev_dbg(edev, "Unable to find pe_config_addr\n");
+ goto err;
+ }
+
+ /* Try enable EEH on the fake PE */
memset(&pe, 0, sizeof(struct eeh_pe));
pe.phb = pdn->phb;
- pe.config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
+ pe.addr = ret;
- /* Enable EEH on the device */
eeh_edev_dbg(edev, "Enabling EEH on device\n");
ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE);
if (ret) {
eeh_edev_dbg(edev, "EEH failed to enable on device (code %d)\n", ret);
- } else {
- struct eeh_pe *parent;
+ goto err;
+ }
- /* Retrieve PE address */
- edev->pe_config_addr = pseries_eeh_get_pe_addr(pdn);
- pe.addr = edev->pe_config_addr;
+ edev->pe_config_addr = pe.addr;
- /* Some older systems (Power4) allow the ibm,set-eeh-option
- * call to succeed even on nodes where EEH is not supported.
- * Verify support explicitly.
- */
- ret = eeh_ops->get_state(&pe, NULL);
- if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT)
- enable = 1;
+ eeh_add_flag(EEH_ENABLED);
- /*
- * This device doesn't support EEH, but it may have an
- * EEH parent. In this case any error on the device will
- * freeze the PE of it's upstream bridge, so added it to
- * the upstream PE.
- */
- parent = pseries_eeh_pe_get_parent(edev);
- if (parent && !enable)
- edev->pe_config_addr = parent->addr;
+ parent = pseries_eeh_pe_get_parent(edev);
+ eeh_pe_tree_insert(edev, parent);
+ eeh_save_bars(edev);
+ eeh_edev_dbg(edev, "EEH enabled for device");
- if (enable || parent) {
- eeh_add_flag(EEH_ENABLED);
- eeh_pe_tree_insert(edev, parent);
- }
- eeh_edev_dbg(edev, "EEH is %s on device (code %d)\n",
- (enable ? "enabled" : "unsupported"), ret);
- }
+ return;
- /* Save memory bars */
- eeh_save_bars(edev);
+err:
+ eeh_edev_dbg(edev, "EEH is unsupported on device (code = %d)\n", ret);
}
static struct eeh_dev *pseries_eeh_probe(struct pci_dev *pdev)
@@ -600,7 +508,6 @@ EXPORT_SYMBOL_GPL(pseries_eeh_init_edev_recursive);
static int pseries_eeh_set_option(struct eeh_pe *pe, int option)
{
int ret = 0;
- int config_addr;
/*
* When we're enabling or disabling EEH functioality on
@@ -613,85 +520,23 @@ static int pseries_eeh_set_option(struct eeh_pe *pe, int option)
case EEH_OPT_ENABLE:
case EEH_OPT_THAW_MMIO:
case EEH_OPT_THAW_DMA:
- config_addr = pe->config_addr;
- if (pe->addr)
- config_addr = pe->addr;
break;
case EEH_OPT_FREEZE_PE:
/* Not support */
return 0;
default:
- pr_err("%s: Invalid option %d\n",
- __func__, option);
+ pr_err("%s: Invalid option %d\n", __func__, option);
return -EINVAL;
}
ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
- config_addr, BUID_HI(pe->phb->buid),
+ pe->addr, BUID_HI(pe->phb->buid),
BUID_LO(pe->phb->buid), option);
return ret;
}
/**
- * pseries_eeh_get_pe_addr - Retrieve PE address
- * @pe: EEH PE
- *
- * Retrieve the assocated PE address. Actually, there're 2 RTAS
- * function calls dedicated for the purpose. We need implement
- * it through the new function and then the old one. Besides,
- * you should make sure the config address is figured out from
- * FDT node before calling the function.
- *
- * It's notable that zero'ed return value means invalid PE config
- * address.
- */
-static int pseries_eeh_get_pe_addr(struct pci_dn *pdn)
-{
- int config_addr = rtas_config_addr(pdn->busno, pdn->devfn, 0);
- unsigned long buid = pdn->phb->buid;
- int ret = 0;
- int rets[3];
-
- if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
- /*
- * First of all, we need to make sure there has one PE
- * associated with the device. Otherwise, PE address is
- * meaningless.
- */
- ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
- config_addr, BUID_HI(buid), BUID_LO(buid), 1);
- if (ret || (rets[0] == 0))
- return 0;
-
- /* Retrieve the associated PE config address */
- ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
- config_addr, BUID_HI(buid), BUID_LO(buid), 0);
- if (ret) {
- pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n",
- __func__, pdn->phb->global_number, config_addr);
- return 0;
- }
-
- return rets[0];
- }
-
- if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
- ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
- config_addr, BUID_HI(buid), BUID_LO(buid), 0);
- if (ret) {
- pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n",
- __func__, pdn->phb->global_number, config_addr);
- return 0;
- }
-
- return rets[0];
- }
-
- return ret;
-}
-
-/**
* pseries_eeh_get_state - Retrieve PE state
* @pe: EEH PE
* @delay: suggested time to wait if state is unavailable
@@ -706,25 +551,19 @@ static int pseries_eeh_get_pe_addr(struct pci_dn *pdn)
*/
static int pseries_eeh_get_state(struct eeh_pe *pe, int *delay)
{
- int config_addr;
int ret;
int rets[4];
int result;
- /* Figure out PE config address if possible */
- config_addr = pe->config_addr;
- if (pe->addr)
- config_addr = pe->addr;
-
if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) {
ret = rtas_call(ibm_read_slot_reset_state2, 3, 4, rets,
- config_addr, BUID_HI(pe->phb->buid),
+ pe->addr, BUID_HI(pe->phb->buid),
BUID_LO(pe->phb->buid));
} else if (ibm_read_slot_reset_state != RTAS_UNKNOWN_SERVICE) {
/* Fake PE unavailable info */
rets[2] = 0;
ret = rtas_call(ibm_read_slot_reset_state, 3, 3, rets,
- config_addr, BUID_HI(pe->phb->buid),
+ pe->addr, BUID_HI(pe->phb->buid),
BUID_LO(pe->phb->buid));
} else {
return EEH_STATE_NOT_SUPPORT;
@@ -778,14 +617,7 @@ static int pseries_eeh_get_state(struct eeh_pe *pe, int *delay)
*/
static int pseries_eeh_reset(struct eeh_pe *pe, int option)
{
- int config_addr;
-
- /* Figure out PE address */
- config_addr = pe->config_addr;
- if (pe->addr)
- config_addr = pe->addr;
-
- return pseries_eeh_phb_reset(pe->phb, config_addr, option);
+ return pseries_eeh_phb_reset(pe->phb, pe->addr, option);
}
/**
@@ -801,19 +633,13 @@ static int pseries_eeh_reset(struct eeh_pe *pe, int option)
*/
static int pseries_eeh_get_log(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len)
{
- int config_addr;
unsigned long flags;
int ret;
spin_lock_irqsave(&slot_errbuf_lock, flags);
memset(slot_errbuf, 0, eeh_error_buf_size);
- /* Figure out the PE address */
- config_addr = pe->config_addr;
- if (pe->addr)
- config_addr = pe->addr;
-
- ret = rtas_call(ibm_slot_error_detail, 8, 1, NULL, config_addr,
+ ret = rtas_call(ibm_slot_error_detail, 8, 1, NULL, pe->addr,
BUID_HI(pe->phb->buid), BUID_LO(pe->phb->buid),
virt_to_phys(drv_log), len,
virt_to_phys(slot_errbuf), eeh_error_buf_size,
@@ -832,14 +658,7 @@ static int pseries_eeh_get_log(struct eeh_pe *pe, int severity, char *drv_log, u
*/
static int pseries_eeh_configure_bridge(struct eeh_pe *pe)
{
- int config_addr;
-
- /* Figure out the PE address */
- config_addr = pe->config_addr;
- if (pe->addr)
- config_addr = pe->addr;
-
- return pseries_eeh_phb_configure_bridge(pe->phb, config_addr);
+ return pseries_eeh_phb_configure_bridge(pe->phb, pe->addr);
}
/**
@@ -954,8 +773,7 @@ static int pseries_notify_resume(struct eeh_dev *edev)
if (!edev)
return -EEXIST;
- if (rtas_token("ibm,open-sriov-allow-unfreeze")
- == RTAS_UNKNOWN_SERVICE)
+ if (rtas_token("ibm,open-sriov-allow-unfreeze") == RTAS_UNKNOWN_SERVICE)
return -EINVAL;
if (edev->pdev->is_physfn || edev->pdev->is_virtfn)
@@ -967,7 +785,6 @@ static int pseries_notify_resume(struct eeh_dev *edev)
static struct eeh_ops pseries_eeh_ops = {
.name = "pseries",
- .init = pseries_eeh_init,
.probe = pseries_eeh_probe,
.set_option = pseries_eeh_set_option,
.get_state = pseries_eeh_get_state,
@@ -992,15 +809,84 @@ static struct eeh_ops pseries_eeh_ops = {
*/
static int __init eeh_pseries_init(void)
{
- int ret;
+ struct pci_controller *phb;
+ struct pci_dn *pdn;
+ int ret, config_addr;
- ret = eeh_ops_register(&pseries_eeh_ops);
+ /* figure out EEH RTAS function call tokens */
+ ibm_set_eeh_option = rtas_token("ibm,set-eeh-option");
+ ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
+ ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2");
+ ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state");
+ ibm_slot_error_detail = rtas_token("ibm,slot-error-detail");
+ ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2");
+ ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info");
+ ibm_configure_pe = rtas_token("ibm,configure-pe");
+
+ /*
+ * ibm,configure-pe and ibm,configure-bridge have the same semantics,
+ * however ibm,configure-pe can be faster. If we can't find
+ * ibm,configure-pe then fall back to using ibm,configure-bridge.
+ */
+ if (ibm_configure_pe == RTAS_UNKNOWN_SERVICE)
+ ibm_configure_pe = rtas_token("ibm,configure-bridge");
+
+ /*
+ * Necessary sanity check. We needn't check "get-config-addr-info"
+ * and its variant since the old firmware probably support address
+ * of domain/bus/slot/function for EEH RTAS operations.
+ */
+ if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE ||
+ ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE ||
+ (ibm_read_slot_reset_state2 == RTAS_UNKNOWN_SERVICE &&
+ ibm_read_slot_reset_state == RTAS_UNKNOWN_SERVICE) ||
+ ibm_slot_error_detail == RTAS_UNKNOWN_SERVICE ||
+ ibm_configure_pe == RTAS_UNKNOWN_SERVICE) {
+ pr_info("EEH functionality not supported\n");
+ return -EINVAL;
+ }
+
+ /* Initialize error log lock and size */
+ spin_lock_init(&slot_errbuf_lock);
+ eeh_error_buf_size = rtas_token("rtas-error-log-max");
+ if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) {
+ pr_info("%s: unknown EEH error log size\n",
+ __func__);
+ eeh_error_buf_size = 1024;
+ } else if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) {
+ pr_info("%s: EEH error log size %d exceeds the maximal %d\n",
+ __func__, eeh_error_buf_size, RTAS_ERROR_LOG_MAX);
+ eeh_error_buf_size = RTAS_ERROR_LOG_MAX;
+ }
+
+ /* Set EEH probe mode */
+ eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG);
+
+ /* Set EEH machine dependent code */
+ ppc_md.pcibios_bus_add_device = pseries_pcibios_bus_add_device;
+
+ if (is_kdump_kernel() || reset_devices) {
+ pr_info("Issue PHB reset ...\n");
+ list_for_each_entry(phb, &hose_list, list_node) {
+ pdn = list_first_entry(&PCI_DN(phb->dn)->child_list, struct pci_dn, list);
+ config_addr = pseries_eeh_get_pe_config_addr(pdn);
+
+ /* invalid PE config addr */
+ if (config_addr < 0)
+ continue;
+
+ pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_FUNDAMENTAL);
+ pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_DEACTIVATE);
+ pseries_eeh_phb_configure_bridge(phb, config_addr);
+ }
+ }
+
+ ret = eeh_init(&pseries_eeh_ops);
if (!ret)
pr_info("EEH: pSeries platform initialized\n");
else
pr_info("EEH: pSeries platform initialization failure (%d)\n",
ret);
-
return ret;
}
-machine_early_initcall(pseries, eeh_pseries_init);
+machine_arch_initcall(pseries, eeh_pseries_init);
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 7a974ed6b240..f2837e33bf5d 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -55,7 +55,7 @@ static void rtas_stop_self(void)
panic("Alas, I survived.\n");
}
-static void pseries_mach_cpu_die(void)
+static void pseries_cpu_offline_self(void)
{
unsigned int hwcpu = hard_smp_processor_id();
@@ -102,7 +102,7 @@ static int pseries_cpu_disable(void)
* to self-destroy so that the cpu-offline thread can send the CPU_DEAD
* notifications.
*
- * OTOH, pseries_mach_cpu_die() is called by the @cpu when it wants to
+ * OTOH, pseries_cpu_offline_self() is called by the @cpu when it wants to
* self-destruct.
*/
static void pseries_cpu_die(unsigned int cpu)
@@ -901,7 +901,7 @@ static int __init pseries_cpu_hotplug_init(void)
return 0;
}
- ppc_md.cpu_die = pseries_mach_cpu_die;
+ smp_ops->cpu_offline_self = pseries_cpu_offline_self;
smp_ops->cpu_disable = pseries_cpu_disable;
smp_ops->cpu_die = pseries_cpu_die;
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 5d545b78111f..7efe6ec5d14a 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -30,12 +30,17 @@ unsigned long pseries_memory_block_size(void)
np = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
if (np) {
- const __be64 *size;
+ int len;
+ int size_cells;
+ const __be32 *prop;
- size = of_get_property(np, "ibm,lmb-size", NULL);
- if (size)
- memblock_size = be64_to_cpup(size);
+ size_cells = of_n_size_cells(np);
+
+ prop = of_get_property(np, "ibm,lmb-size", &len);
+ if (prop && len >= size_cells * sizeof(__be32))
+ memblock_size = of_read_number(prop, size_cells);
of_node_put(np);
+
} else if (machine_is(pseries)) {
/* This fallback really only applies to pseries */
unsigned int memzero_size = 0;
@@ -277,7 +282,7 @@ static int dlpar_offline_lmb(struct drmem_lmb *lmb)
return dlpar_change_lmb_state(lmb, false);
}
-static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size)
+static int pseries_remove_memblock(unsigned long base, unsigned long memblock_size)
{
unsigned long block_sz, start_pfn;
int sections_per_block;
@@ -308,10 +313,11 @@ out:
static int pseries_remove_mem_node(struct device_node *np)
{
- const __be32 *regs;
+ const __be32 *prop;
unsigned long base;
- unsigned int lmb_size;
+ unsigned long lmb_size;
int ret = -EINVAL;
+ int addr_cells, size_cells;
/*
* Check to see if we are actually removing memory
@@ -322,12 +328,19 @@ static int pseries_remove_mem_node(struct device_node *np)
/*
* Find the base address and size of the memblock
*/
- regs = of_get_property(np, "reg", NULL);
- if (!regs)
+ prop = of_get_property(np, "reg", NULL);
+ if (!prop)
return ret;
- base = be64_to_cpu(*(unsigned long *)regs);
- lmb_size = be32_to_cpu(regs[3]);
+ addr_cells = of_n_addr_cells(np);
+ size_cells = of_n_size_cells(np);
+
+ /*
+ * "reg" property represents (addr,size) tuple.
+ */
+ base = of_read_number(prop, addr_cells);
+ prop += addr_cells;
+ lmb_size = of_read_number(prop, size_cells);
pseries_remove_memblock(base, lmb_size);
return 0;
@@ -354,25 +367,32 @@ static int dlpar_add_lmb(struct drmem_lmb *);
static int dlpar_remove_lmb(struct drmem_lmb *lmb)
{
+ struct memory_block *mem_block;
unsigned long block_sz;
int rc;
if (!lmb_is_removable(lmb))
return -EINVAL;
+ mem_block = lmb_to_memblock(lmb);
+ if (mem_block == NULL)
+ return -EINVAL;
+
rc = dlpar_offline_lmb(lmb);
- if (rc)
+ if (rc) {
+ put_device(&mem_block->dev);
return rc;
+ }
block_sz = pseries_memory_block_size();
- __remove_memory(lmb->nid, lmb->base_addr, block_sz);
+ __remove_memory(mem_block->nid, lmb->base_addr, block_sz);
+ put_device(&mem_block->dev);
/* Update memory regions for memory remove */
memblock_remove(lmb->base_addr, block_sz);
invalidate_lmb_associativity_index(lmb);
- lmb_clear_nid(lmb);
lmb->flags &= ~DRCONF_MEM_ASSIGNED;
return 0;
@@ -557,7 +577,7 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
#else
static inline int pseries_remove_memblock(unsigned long base,
- unsigned int memblock_size)
+ unsigned long memblock_size)
{
return -EOPNOTSUPP;
}
@@ -591,7 +611,7 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
static int dlpar_add_lmb(struct drmem_lmb *lmb)
{
unsigned long block_sz;
- int rc;
+ int nid, rc;
if (lmb->flags & DRCONF_MEM_ASSIGNED)
return -EINVAL;
@@ -602,11 +622,15 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
return rc;
}
- lmb_set_nid(lmb);
block_sz = memory_block_size_bytes();
+ /* Find the node id for this LMB. Fake one if necessary. */
+ nid = of_drconf_to_nid_single(lmb);
+ if (nid < 0 || !node_possible(nid))
+ nid = first_online_node;
+
/* Add the memory */
- rc = __add_memory(lmb->nid, lmb->base_addr, block_sz);
+ rc = __add_memory(nid, lmb->base_addr, block_sz, MHP_NONE);
if (rc) {
invalidate_lmb_associativity_index(lmb);
return rc;
@@ -614,9 +638,8 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
rc = dlpar_online_lmb(lmb);
if (rc) {
- __remove_memory(lmb->nid, lmb->base_addr, block_sz);
+ __remove_memory(nid, lmb->base_addr, block_sz);
invalidate_lmb_associativity_index(lmb);
- lmb_clear_nid(lmb);
} else {
lmb->flags |= DRCONF_MEM_ASSIGNED;
}
@@ -878,10 +901,11 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
static int pseries_add_mem_node(struct device_node *np)
{
- const __be32 *regs;
+ const __be32 *prop;
unsigned long base;
- unsigned int lmb_size;
+ unsigned long lmb_size;
int ret = -EINVAL;
+ int addr_cells, size_cells;
/*
* Check to see if we are actually adding memory
@@ -892,12 +916,18 @@ static int pseries_add_mem_node(struct device_node *np)
/*
* Find the base and size of the memblock
*/
- regs = of_get_property(np, "reg", NULL);
- if (!regs)
+ prop = of_get_property(np, "reg", NULL);
+ if (!prop)
return ret;
- base = be64_to_cpu(*(unsigned long *)regs);
- lmb_size = be32_to_cpu(regs[3]);
+ addr_cells = of_n_addr_cells(np);
+ size_cells = of_n_size_cells(np);
+ /*
+ * "reg" property represents (addr,size) tuple.
+ */
+ base = of_read_number(prop, addr_cells);
+ prop += addr_cells;
+ lmb_size = of_read_number(prop, size_cells);
/*
* Update memory region to represent the memory add
diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c
index c40c62ec432e..2c59b4986ea5 100644
--- a/arch/powerpc/platforms/pseries/hvCall_inst.c
+++ b/arch/powerpc/platforms/pseries/hvCall_inst.c
@@ -70,31 +70,14 @@ static int hc_show(struct seq_file *m, void *p)
return 0;
}
-static const struct seq_operations hcall_inst_seq_ops = {
+static const struct seq_operations hcall_inst_sops = {
.start = hc_start,
.next = hc_next,
.stop = hc_stop,
.show = hc_show
};
-static int hcall_inst_seq_open(struct inode *inode, struct file *file)
-{
- int rc;
- struct seq_file *seq;
-
- rc = seq_open(file, &hcall_inst_seq_ops);
- seq = file->private_data;
- seq->private = file_inode(file)->i_private;
-
- return rc;
-}
-
-static const struct file_operations hcall_inst_seq_fops = {
- .open = hcall_inst_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
+DEFINE_SEQ_ATTRIBUTE(hcall_inst);
#define HCALL_ROOT_DIR "hcall_inst"
#define CPU_NAME_BUF_SIZE 32
@@ -149,7 +132,7 @@ static int __init hcall_inst_init(void)
snprintf(cpu_name_buf, CPU_NAME_BUF_SIZE, "cpu%d", cpu);
debugfs_create_file(cpu_name_buf, 0444, hcall_root,
per_cpu(hcall_stats, cpu),
- &hcall_inst_seq_fops);
+ &hcall_inst_fops);
}
return 0;
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 6d47b4a3ce39..e4198700ed1a 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -39,6 +39,20 @@
#include "pseries.h"
+enum {
+ DDW_QUERY_PE_DMA_WIN = 0,
+ DDW_CREATE_PE_DMA_WIN = 1,
+ DDW_REMOVE_PE_DMA_WIN = 2,
+
+ DDW_APPLICABLE_SIZE
+};
+
+enum {
+ DDW_EXT_SIZE = 0,
+ DDW_EXT_RESET_DMA_WIN = 1,
+ DDW_EXT_QUERY_OUT_SIZE = 2
+};
+
static struct iommu_table_group *iommu_pseries_alloc_group(int node)
{
struct iommu_table_group *table_group;
@@ -334,7 +348,7 @@ struct direct_window {
/* Dynamic DMA Window support */
struct ddw_query_response {
u32 windows_available;
- u32 largest_available_block;
+ u64 largest_available_block;
u32 page_size;
u32 migration_capable;
};
@@ -767,25 +781,14 @@ static int __init disable_ddw_setup(char *str)
early_param("disable_ddw", disable_ddw_setup);
-static void remove_ddw(struct device_node *np, bool remove_prop)
+static void remove_dma_window(struct device_node *np, u32 *ddw_avail,
+ struct property *win)
{
struct dynamic_dma_window_prop *dwp;
- struct property *win64;
- u32 ddw_avail[3];
u64 liobn;
- int ret = 0;
-
- ret = of_property_read_u32_array(np, "ibm,ddw-applicable",
- &ddw_avail[0], 3);
-
- win64 = of_find_property(np, DIRECT64_PROPNAME, NULL);
- if (!win64)
- return;
-
- if (ret || win64->length < sizeof(*dwp))
- goto delprop;
+ int ret;
- dwp = win64->value;
+ dwp = win->value;
liobn = (u64)be32_to_cpu(dwp->liobn);
/* clear the whole window, note the arg is in kernel pages */
@@ -798,19 +801,39 @@ static void remove_ddw(struct device_node *np, bool remove_prop)
pr_debug("%pOF successfully cleared tces in window.\n",
np);
- ret = rtas_call(ddw_avail[2], 1, 1, NULL, liobn);
+ ret = rtas_call(ddw_avail[DDW_REMOVE_PE_DMA_WIN], 1, 1, NULL, liobn);
if (ret)
pr_warn("%pOF: failed to remove direct window: rtas returned "
"%d to ibm,remove-pe-dma-window(%x) %llx\n",
- np, ret, ddw_avail[2], liobn);
+ np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn);
else
pr_debug("%pOF: successfully removed direct window: rtas returned "
"%d to ibm,remove-pe-dma-window(%x) %llx\n",
- np, ret, ddw_avail[2], liobn);
+ np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn);
+}
+
+static void remove_ddw(struct device_node *np, bool remove_prop)
+{
+ struct property *win;
+ u32 ddw_avail[DDW_APPLICABLE_SIZE];
+ int ret = 0;
-delprop:
- if (remove_prop)
- ret = of_remove_property(np, win64);
+ ret = of_property_read_u32_array(np, "ibm,ddw-applicable",
+ &ddw_avail[0], DDW_APPLICABLE_SIZE);
+ if (ret)
+ return;
+
+ win = of_find_property(np, DIRECT64_PROPNAME, NULL);
+ if (!win)
+ return;
+
+ if (win->length >= sizeof(struct dynamic_dma_window_prop))
+ remove_dma_window(np, ddw_avail, win);
+
+ if (!remove_prop)
+ return;
+
+ ret = of_remove_property(np, win);
if (ret)
pr_warn("%pOF: failed to remove direct window property: %d\n",
np, ret);
@@ -869,14 +892,62 @@ static int find_existing_ddw_windows(void)
}
machine_arch_initcall(pseries, find_existing_ddw_windows);
+/**
+ * ddw_read_ext - Get the value of an DDW extension
+ * @np: device node from which the extension value is to be read.
+ * @extnum: index number of the extension.
+ * @value: pointer to return value, modified when extension is available.
+ *
+ * Checks if "ibm,ddw-extensions" exists for this node, and get the value
+ * on index 'extnum'.
+ * It can be used only to check if a property exists, passing value == NULL.
+ *
+ * Returns:
+ * 0 if extension successfully read
+ * -EINVAL if the "ibm,ddw-extensions" does not exist,
+ * -ENODATA if "ibm,ddw-extensions" does not have a value, and
+ * -EOVERFLOW if "ibm,ddw-extensions" does not contain this extension.
+ */
+static inline int ddw_read_ext(const struct device_node *np, int extnum,
+ u32 *value)
+{
+ static const char propname[] = "ibm,ddw-extensions";
+ u32 count;
+ int ret;
+
+ ret = of_property_read_u32_index(np, propname, DDW_EXT_SIZE, &count);
+ if (ret)
+ return ret;
+
+ if (count < extnum)
+ return -EOVERFLOW;
+
+ if (!value)
+ value = &count;
+
+ return of_property_read_u32_index(np, propname, extnum, value);
+}
+
static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
- struct ddw_query_response *query)
+ struct ddw_query_response *query,
+ struct device_node *parent)
{
struct device_node *dn;
struct pci_dn *pdn;
- u32 cfg_addr;
+ u32 cfg_addr, ext_query, query_out[5];
u64 buid;
- int ret;
+ int ret, out_sz;
+
+ /*
+ * From LoPAR level 2.8, "ibm,ddw-extensions" index 3 can rule how many
+ * output parameters ibm,query-pe-dma-windows will have, ranging from
+ * 5 to 6.
+ */
+ ret = ddw_read_ext(parent, DDW_EXT_QUERY_OUT_SIZE, &ext_query);
+ if (!ret && ext_query == 1)
+ out_sz = 6;
+ else
+ out_sz = 5;
/*
* Get the config address and phb buid of the PE window.
@@ -889,11 +960,28 @@ static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
buid = pdn->phb->buid;
cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8));
- ret = rtas_call(ddw_avail[0], 3, 5, (u32 *)query,
- cfg_addr, BUID_HI(buid), BUID_LO(buid));
- dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x"
- " returned %d\n", ddw_avail[0], cfg_addr, BUID_HI(buid),
- BUID_LO(buid), ret);
+ ret = rtas_call(ddw_avail[DDW_QUERY_PE_DMA_WIN], 3, out_sz, query_out,
+ cfg_addr, BUID_HI(buid), BUID_LO(buid));
+ dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x returned %d\n",
+ ddw_avail[DDW_QUERY_PE_DMA_WIN], cfg_addr, BUID_HI(buid),
+ BUID_LO(buid), ret);
+
+ switch (out_sz) {
+ case 5:
+ query->windows_available = query_out[0];
+ query->largest_available_block = query_out[1];
+ query->page_size = query_out[2];
+ query->migration_capable = query_out[3];
+ break;
+ case 6:
+ query->windows_available = query_out[0];
+ query->largest_available_block = ((u64)query_out[1] << 32) |
+ query_out[2];
+ query->page_size = query_out[3];
+ query->migration_capable = query_out[4];
+ break;
+ }
+
return ret;
}
@@ -920,15 +1008,16 @@ static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail,
do {
/* extra outputs are LIOBN and dma-addr (hi, lo) */
- ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create,
- cfg_addr, BUID_HI(buid), BUID_LO(buid),
- page_shift, window_shift);
+ ret = rtas_call(ddw_avail[DDW_CREATE_PE_DMA_WIN], 5, 4,
+ (u32 *)create, cfg_addr, BUID_HI(buid),
+ BUID_LO(buid), page_shift, window_shift);
} while (rtas_busy_delay(ret));
dev_info(&dev->dev,
"ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d "
- "(liobn = 0x%x starting addr = %x %x)\n", ddw_avail[1],
- cfg_addr, BUID_HI(buid), BUID_LO(buid), page_shift,
- window_shift, ret, create->liobn, create->addr_hi, create->addr_lo);
+ "(liobn = 0x%x starting addr = %x %x)\n",
+ ddw_avail[DDW_CREATE_PE_DMA_WIN], cfg_addr, BUID_HI(buid),
+ BUID_LO(buid), page_shift, window_shift, ret, create->liobn,
+ create->addr_hi, create->addr_lo);
return ret;
}
@@ -978,6 +1067,38 @@ static phys_addr_t ddw_memory_hotplug_max(void)
}
/*
+ * Platforms supporting the DDW option starting with LoPAR level 2.7 implement
+ * ibm,ddw-extensions, which carries the rtas token for
+ * ibm,reset-pe-dma-windows.
+ * That rtas-call can be used to restore the default DMA window for the device.
+ */
+static void reset_dma_window(struct pci_dev *dev, struct device_node *par_dn)
+{
+ int ret;
+ u32 cfg_addr, reset_dma_win;
+ u64 buid;
+ struct device_node *dn;
+ struct pci_dn *pdn;
+
+ ret = ddw_read_ext(par_dn, DDW_EXT_RESET_DMA_WIN, &reset_dma_win);
+ if (ret)
+ return;
+
+ dn = pci_device_to_OF_node(dev);
+ pdn = PCI_DN(dn);
+ buid = pdn->phb->buid;
+ cfg_addr = (pdn->busno << 16) | (pdn->devfn << 8);
+
+ ret = rtas_call(reset_dma_win, 3, 1, NULL, cfg_addr, BUID_HI(buid),
+ BUID_LO(buid));
+ if (ret)
+ dev_info(&dev->dev,
+ "ibm,reset-pe-dma-windows(%x) %x %x %x returned %d ",
+ reset_dma_win, cfg_addr, BUID_HI(buid), BUID_LO(buid),
+ ret);
+}
+
+/*
* If the PE supports dynamic dma windows, and there is space for a table
* that can map all pages in a linear offset, then setup such a table,
* and record the dma-offset in the struct device.
@@ -996,11 +1117,12 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
int page_shift;
u64 dma_addr, max_addr;
struct device_node *dn;
- u32 ddw_avail[3];
+ u32 ddw_avail[DDW_APPLICABLE_SIZE];
struct direct_window *window;
struct property *win64;
struct dynamic_dma_window_prop *ddwprop;
struct failed_ddw_pdn *fpdn;
+ bool default_win_removed = false;
mutex_lock(&direct_window_init_mutex);
@@ -1029,7 +1151,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
* the property is actually in the parent, not the PE
*/
ret = of_property_read_u32_array(pdn, "ibm,ddw-applicable",
- &ddw_avail[0], 3);
+ &ddw_avail[0], DDW_APPLICABLE_SIZE);
if (ret)
goto out_failed;
@@ -1040,18 +1162,42 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
* of page sizes: supported and supported for migrate-dma.
*/
dn = pci_device_to_OF_node(dev);
- ret = query_ddw(dev, ddw_avail, &query);
+ ret = query_ddw(dev, ddw_avail, &query, pdn);
if (ret != 0)
goto out_failed;
+ /*
+ * If there is no window available, remove the default DMA window,
+ * if it's present. This will make all the resources available to the
+ * new DDW window.
+ * If anything fails after this, we need to restore it, so also check
+ * for extensions presence.
+ */
if (query.windows_available == 0) {
- /*
- * no additional windows are available for this device.
- * We might be able to reallocate the existing window,
- * trading in for a larger page size.
- */
- dev_dbg(&dev->dev, "no free dynamic windows");
- goto out_failed;
+ struct property *default_win;
+ int reset_win_ext;
+
+ default_win = of_find_property(pdn, "ibm,dma-window", NULL);
+ if (!default_win)
+ goto out_failed;
+
+ reset_win_ext = ddw_read_ext(pdn, DDW_EXT_RESET_DMA_WIN, NULL);
+ if (reset_win_ext)
+ goto out_failed;
+
+ remove_dma_window(pdn, ddw_avail, default_win);
+ default_win_removed = true;
+
+ /* Query again, to check if the window is available */
+ ret = query_ddw(dev, ddw_avail, &query, pdn);
+ if (ret != 0)
+ goto out_failed;
+
+ if (query.windows_available == 0) {
+ /* no windows are available for this device. */
+ dev_dbg(&dev->dev, "no free dynamic windows");
+ goto out_failed;
+ }
}
if (query.page_size & 4) {
page_shift = 24; /* 16MB */
@@ -1068,7 +1214,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
/* check largest block * page size > max memory hotplug addr */
max_addr = ddw_memory_hotplug_max();
if (query.largest_available_block < (max_addr >> page_shift)) {
- dev_dbg(&dev->dev, "can't map partition max 0x%llx with %u "
+ dev_dbg(&dev->dev, "can't map partition max 0x%llx with %llu "
"%llu-sized pages\n", max_addr, query.largest_available_block,
1ULL << page_shift);
goto out_failed;
@@ -1142,6 +1288,8 @@ out_free_prop:
kfree(win64);
out_failed:
+ if (default_win_removed)
+ reset_dma_window(dev, pdn);
fpdn = kzalloc(sizeof(*fpdn), GFP_KERNEL);
if (!fpdn)
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index baf24eacd268..764170fdb0f7 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -1724,6 +1724,7 @@ void __init hpte_init_pseries(void)
pseries_lpar_register_process_table(0, 0, 0);
}
+#ifdef CONFIG_PPC_RADIX_MMU
void radix_init_pseries(void)
{
pr_info("Using radix MMU under hypervisor\n");
@@ -1731,6 +1732,7 @@ void radix_init_pseries(void)
pseries_lpar_register_process_table(__pa(process_tb),
0, PRTB_SIZE_SHIFT - 12);
}
+#endif
#ifdef CONFIG_PPC_SMLPAR
#define CMO_FREE_HINT_DEFAULT 1
diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
index b8d28ab88178..e278390ab28d 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -136,6 +136,39 @@ static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data)
return rc;
}
+static void show_gpci_data(struct seq_file *m)
+{
+ struct hv_gpci_request_buffer *buf;
+ unsigned int affinity_score;
+ long ret;
+
+ buf = kmalloc(sizeof(*buf), GFP_KERNEL);
+ if (buf == NULL)
+ return;
+
+ /*
+ * Show the local LPAR's affinity score.
+ *
+ * 0xB1 selects the Affinity_Domain_Info_By_Partition subcall.
+ * The score is at byte 0xB in the output buffer.
+ */
+ memset(&buf->params, 0, sizeof(buf->params));
+ buf->params.counter_request = cpu_to_be32(0xB1);
+ buf->params.starting_index = cpu_to_be32(-1); /* local LPAR */
+ buf->params.counter_info_version_in = 0x5; /* v5+ for score */
+ ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, virt_to_phys(buf),
+ sizeof(*buf));
+ if (ret != H_SUCCESS) {
+ pr_debug("hcall failed: H_GET_PERF_COUNTER_INFO: %ld, %x\n",
+ ret, be32_to_cpu(buf->params.detail_rc));
+ goto out;
+ }
+ affinity_score = buf->bytes[0xB];
+ seq_printf(m, "partition_affinity_score=%u\n", affinity_score);
+out:
+ kfree(buf);
+}
+
static unsigned h_pic(unsigned long *pool_idle_time,
unsigned long *num_procs)
{
@@ -487,6 +520,8 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
partition_active_processors * 100);
}
+ show_gpci_data(m);
+
seq_printf(m, "partition_active_processors=%d\n",
partition_active_processors);
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index a88a707a608a..835163f54244 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -785,7 +785,8 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc,
static ssize_t perf_stats_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- int index, rc;
+ int index;
+ ssize_t rc;
struct seq_buf s;
struct papr_scm_perf_stat *stat;
struct papr_scm_perf_stats *stats;
@@ -820,9 +821,9 @@ static ssize_t perf_stats_show(struct device *dev,
free_stats:
kfree(stats);
- return rc ? rc : seq_buf_used(&s);
+ return rc ? rc : (ssize_t)seq_buf_used(&s);
}
-DEVICE_ATTR_ADMIN_RO(perf_stats);
+static DEVICE_ATTR_ADMIN_RO(perf_stats);
static ssize_t flags_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -897,6 +898,9 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
p->bus_desc.of_node = p->pdev->dev.of_node;
p->bus_desc.provider_name = kstrdup(p->pdev->name, GFP_KERNEL);
+ /* Set the dimm command family mask to accept PDSMs */
+ set_bit(NVDIMM_FAMILY_PAPR, &p->bus_desc.dimm_family_mask);
+
if (!p->bus_desc.provider_name)
return -ENOMEM;
diff --git a/arch/powerpc/platforms/pseries/rng.c b/arch/powerpc/platforms/pseries/rng.c
index bbb97169bf63..6268545947b8 100644
--- a/arch/powerpc/platforms/pseries/rng.c
+++ b/arch/powerpc/platforms/pseries/rng.c
@@ -36,6 +36,7 @@ static __init int rng_init(void)
ppc_md.get_random_seed = pseries_get_random_long;
+ of_node_put(dn);
return 0;
}
machine_subsys_initcall(pseries, rng_init);
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 2f4ee0a90284..633c45ec406d 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -519,9 +519,15 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result)
if (result->character & H_CPU_CHAR_BCCTR_FLUSH_ASSIST)
security_ftr_set(SEC_FTR_BCCTR_FLUSH_ASSIST);
+ if (result->character & H_CPU_CHAR_BCCTR_LINK_FLUSH_ASSIST)
+ security_ftr_set(SEC_FTR_BCCTR_LINK_FLUSH_ASSIST);
+
if (result->behaviour & H_CPU_BEHAV_FLUSH_COUNT_CACHE)
security_ftr_set(SEC_FTR_FLUSH_COUNT_CACHE);
+ if (result->behaviour & H_CPU_BEHAV_FLUSH_LINK_STACK)
+ security_ftr_set(SEC_FTR_FLUSH_LINK_STACK);
+
/*
* The features below are enabled by default, so we instead look to see
* if firmware has *disabled* them, and clear them if so.
diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c
index e6d7a344d9f2..7b739cc7a8a9 100644
--- a/arch/powerpc/platforms/pseries/svm.c
+++ b/arch/powerpc/platforms/pseries/svm.c
@@ -7,6 +7,7 @@
*/
#include <linux/mm.h>
+#include <linux/memblock.h>
#include <asm/machdep.h>
#include <asm/svm.h>
#include <asm/swiotlb.h>
@@ -35,6 +36,31 @@ static int __init init_svm(void)
}
machine_early_initcall(pseries, init_svm);
+/*
+ * Initialize SWIOTLB. Essentially the same as swiotlb_init(), except that it
+ * can allocate the buffer anywhere in memory. Since the hypervisor doesn't have
+ * any addressing limitation, we don't need to allocate it in low addresses.
+ */
+void __init svm_swiotlb_init(void)
+{
+ unsigned char *vstart;
+ unsigned long bytes, io_tlb_nslabs;
+
+ io_tlb_nslabs = (swiotlb_size_or_default() >> IO_TLB_SHIFT);
+ io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+
+ bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+
+ vstart = memblock_alloc(PAGE_ALIGN(bytes), PAGE_SIZE);
+ if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, false))
+ return;
+
+ if (io_tlb_start)
+ memblock_free_early(io_tlb_start,
+ PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
+ panic("SVM: Cannot allocate SWIOTLB buffer");
+}
+
int set_memory_encrypted(unsigned long addr, int numpages)
{
if (!PAGE_ALIGNED(addr))
diff --git a/arch/powerpc/sysdev/xics/icp-hv.c b/arch/powerpc/sysdev/xics/icp-hv.c
index ad8117148ea3..21b9d1bf39ff 100644
--- a/arch/powerpc/sysdev/xics/icp-hv.c
+++ b/arch/powerpc/sysdev/xics/icp-hv.c
@@ -174,6 +174,7 @@ int icp_hv_init(void)
icp_ops = &icp_hv_ops;
+ of_node_put(np);
return 0;
}
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index f591be9f01f4..a80440af491a 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -1565,7 +1565,7 @@ static int __init xive_off(char *arg)
}
__setup("xive=off", xive_off);
-void xive_debug_show_cpu(struct seq_file *m, int cpu)
+static void xive_debug_show_cpu(struct seq_file *m, int cpu)
{
struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
@@ -1599,7 +1599,7 @@ void xive_debug_show_cpu(struct seq_file *m, int cpu)
seq_puts(m, "\n");
}
-void xive_debug_show_irq(struct seq_file *m, u32 hw_irq, struct irq_data *d)
+static void xive_debug_show_irq(struct seq_file *m, u32 hw_irq, struct irq_data *d)
{
struct irq_chip *chip = irq_data_get_irq_chip(d);
int rc;
diff --git a/arch/powerpc/tools/checkpatch.sh b/arch/powerpc/tools/checkpatch.sh
index 3ce5c093b19d..91c04802ec31 100755
--- a/arch/powerpc/tools/checkpatch.sh
+++ b/arch/powerpc/tools/checkpatch.sh
@@ -9,7 +9,6 @@ script_base=$(realpath $(dirname $0))
exec $script_base/../../../scripts/checkpatch.pl \
--subjective \
--no-summary \
- --max-line-length=90 \
--show-types \
--ignore ARCH_INCLUDE_LINUX \
--ignore BIT_MACRO \
diff --git a/arch/powerpc/tools/unrel_branch_check.sh b/arch/powerpc/tools/unrel_branch_check.sh
index 6e6a30aea3ed..8301efee1e6c 100755
--- a/arch/powerpc/tools/unrel_branch_check.sh
+++ b/arch/powerpc/tools/unrel_branch_check.sh
@@ -1,60 +1,79 @@
-# Copyright © 2016 IBM Corporation
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+# Copyright © 2016,2020 IBM Corporation
#
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License
-# as published by the Free Software Foundation; either version
-# 2 of the License, or (at your option) any later version.
-#
-# This script checks the relocations of a vmlinux for "suspicious"
-# branches from unrelocated code (head_64.S code).
-
-# Turn this on if you want more debug output:
-# set -x
+# This script checks the unrelocated code of a vmlinux for "suspicious"
+# branches to relocated code (head_64.S code).
-# Have Kbuild supply the path to objdump so we handle cross compilation.
+# Have Kbuild supply the path to objdump and nm so we handle cross compilation.
objdump="$1"
-vmlinux="$2"
-
-#__end_interrupts should be located within the first 64K
-
-end_intr=0x$(
-$objdump -R "$vmlinux" -d --start-address=0xc000000000000000 \
- --stop-address=0xc000000000010000 |
-grep '\<__end_interrupts>:' |
-awk '{print $1}'
-)
-
-BRANCHES=$(
-$objdump -R "$vmlinux" -D --start-address=0xc000000000000000 \
- --stop-address=${end_intr} |
-grep -e "^c[0-9a-f]*:[[:space:]]*\([0-9a-f][0-9a-f][[:space:]]\)\{4\}[[:space:]]*b" |
-grep -v '\<__start_initialization_multiplatform>' |
-grep -v -e 'b.\?.\?ctr' |
-grep -v -e 'b.\?.\?lr' |
-sed -e 's/\bbt.\?[[:space:]]*[[:digit:]][[:digit:]]*,/beq/' \
- -e 's/\bbf.\?[[:space:]]*[[:digit:]][[:digit:]]*,/bne/' \
- -e 's/[[:space:]]0x/ /' \
- -e 's/://' |
-awk '{ print $1 ":" $6 ":0x" $7 ":" $8 " "}'
-)
-
-for tuple in $BRANCHES
-do
- from=`echo $tuple | cut -d':' -f1`
- branch=`echo $tuple | cut -d':' -f2`
- to=`echo $tuple | cut -d':' -f3 | sed 's/cr[0-7],//'`
- sym=`echo $tuple | cut -d':' -f4`
-
- if (( $to > $end_intr ))
- then
- if [ -z "$bad_branches" ]; then
- echo "WARNING: Unrelocated relative branches"
- bad_branches="yes"
+nm="$2"
+vmlinux="$3"
+
+kstart=0xc000000000000000
+
+end_intr=0x$($nm -p "$vmlinux" |
+ sed -E -n '/\s+[[:alpha:]]\s+__end_interrupts\s*$/{s///p;q}')
+if [ "$end_intr" = "0x" ]; then
+ exit 0
+fi
+
+# we know that there is a correct branch to
+# __start_initialization_multiplatform, so find its address
+# so we can exclude it.
+sim=0x$($nm -p "$vmlinux" |
+ sed -E -n '/\s+[[:alpha:]]\s+__start_initialization_multiplatform\s*$/{s///p;q}')
+
+$objdump -D --no-show-raw-insn --start-address="$kstart" --stop-address="$end_intr" "$vmlinux" |
+sed -E -n '
+# match lines that start with a kernel address
+/^c[0-9a-f]*:\s*b/ {
+ # drop branches via ctr or lr
+ /\<b.?.?(ct|l)r/d
+ # cope with some differences between Clang and GNU objdumps
+ s/\<bt.?\s*[[:digit:]]+,/beq/
+ s/\<bf.?\s*[[:digit:]]+,/bne/
+ # tidy up
+ s/\s0x/ /
+ s/://
+ # format for the loop below
+ s/^(\S+)\s+(\S+)\s+(\S+)\s*(\S*).*$/\1:\2:\3:\4/
+ # strip out condition registers
+ s/:cr[0-7],/:/
+ p
+}' | {
+
+all_good=true
+while IFS=: read -r from branch to sym; do
+ case "$to" in
+ c*) to="0x$to"
+ ;;
+ .+*)
+ to=${to#.+}
+ if [ "$branch" = 'b' ]; then
+ if (( to >= 0x2000000 )); then
+ to=$(( to - 0x4000000 ))
+ fi
+ elif (( to >= 0x8000 )); then
+ to=$(( to - 0x10000 ))
+ fi
+ printf -v to '0x%x' $(( "0x$from" + to ))
+ ;;
+ *) printf 'Unkown branch format\n'
+ ;;
+ esac
+ if [ "$to" = "$sim" ]; then
+ continue
+ fi
+ if (( to > end_intr )); then
+ if $all_good; then
+ printf '%s\n' 'WARNING: Unrelocated relative branches'
+ all_good=false
fi
- echo "$from $branch-> $to $sym"
+ printf '%s %s-> %s %s\n' "$from" "$branch" "$to" "$sym"
fi
done
-if [ -z "$bad_branches" ]; then
- exit 0
-fi
+$all_good
+
+}
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index df7bca00f5ec..55c43a6c9111 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -969,6 +969,7 @@ static void insert_cpu_bpts(void)
brk.address = dabr[i].address;
brk.type = (dabr[i].enabled & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL;
brk.len = 8;
+ brk.hw_len = 8;
__set_breakpoint(i, &brk);
}
}
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index b7821ac36d28..483fc555fc34 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -38,6 +38,7 @@ config RISCV
select GENERIC_ARCH_TOPOLOGY if SMP
select GENERIC_ATOMIC64 if !64BIT
select GENERIC_CLOCKEVENTS
+ select GENERIC_EARLY_IOREMAP
select GENERIC_GETTIMEOFDAY if HAVE_GENERIC_VDSO
select GENERIC_IOREMAP
select GENERIC_IRQ_MULTI_HANDLER
@@ -388,6 +389,28 @@ config CMDLINE_FORCE
endchoice
+config EFI_STUB
+ bool
+
+config EFI
+ bool "UEFI runtime support"
+ depends on OF
+ select LIBFDT
+ select UCS2_STRING
+ select EFI_PARAMS_FROM_FDT
+ select EFI_STUB
+ select EFI_GENERIC_STUB
+ select EFI_RUNTIME_WRAPPERS
+ select RISCV_ISA_C
+ depends on MMU
+ default y
+ help
+ This option provides support for runtime services provided
+ by UEFI firmware (such as non-volatile variables, realtime
+ clock, and platform reset). A UEFI stub is also provided to
+ allow the kernel to be booted as an EFI application. This
+ is only useful on systems that have UEFI firmware.
+
endmenu
config BUILTIN_DTB
@@ -400,3 +423,5 @@ menu "Power management options"
source "kernel/power/Kconfig"
endmenu
+
+source "drivers/firmware/Kconfig"
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index fb6e37db836d..10df59f28add 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -80,6 +80,7 @@ head-y := arch/riscv/kernel/head.o
core-y += arch/riscv/
libs-y += arch/riscv/lib/
+libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
PHONY += vdso_install
vdso_install:
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index d58c93efb603..d222d353d86d 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -130,3 +130,4 @@ CONFIG_DEBUG_BLOCK_EXT_DEVT=y
# CONFIG_RUNTIME_TESTING_MENU is not set
CONFIG_MEMTEST=y
# CONFIG_SYSFS_SYSCALL is not set
+CONFIG_EFI=y
diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
index 3d9410bb4de0..59dd7be55005 100644
--- a/arch/riscv/include/asm/Kbuild
+++ b/arch/riscv/include/asm/Kbuild
@@ -1,4 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
+generic-y += early_ioremap.h
generic-y += extable.h
generic-y += flat.h
generic-y += kvm_para.h
diff --git a/arch/riscv/include/asm/cacheinfo.h b/arch/riscv/include/asm/cacheinfo.h
index 5d9662e9aba8..d1a365215ec0 100644
--- a/arch/riscv/include/asm/cacheinfo.h
+++ b/arch/riscv/include/asm/cacheinfo.h
@@ -1,4 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 SiFive
+ */
#ifndef _ASM_RISCV_CACHEINFO_H
#define _ASM_RISCV_CACHEINFO_H
@@ -11,5 +14,7 @@ struct riscv_cacheinfo_ops {
};
void riscv_set_cacheinfo_ops(struct riscv_cacheinfo_ops *ops);
+uintptr_t get_cache_size(u32 level, enum cache_type type);
+uintptr_t get_cache_geometry(u32 level, enum cache_type type);
#endif /* _ASM_RISCV_CACHEINFO_H */
diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h
new file mode 100644
index 000000000000..7542282f1141
--- /dev/null
+++ b/arch/riscv/include/asm/efi.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ */
+#ifndef _ASM_EFI_H
+#define _ASM_EFI_H
+
+#include <asm/csr.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/ptrace.h>
+#include <asm/tlbflush.h>
+
+#ifdef CONFIG_EFI
+extern void efi_init(void);
+#else
+#define efi_init()
+#endif
+
+int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md);
+int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
+
+#define arch_efi_call_virt_setup() efi_virtmap_load()
+#define arch_efi_call_virt_teardown() efi_virtmap_unload()
+
+#define arch_efi_call_virt(p, f, args...) p->f(args)
+
+#define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE)
+
+/* on RISC-V, the FDT may be located anywhere in system RAM */
+static inline unsigned long efi_get_max_fdt_addr(unsigned long image_addr)
+{
+ return ULONG_MAX;
+}
+
+/* Load initrd at enough distance from DRAM start */
+static inline unsigned long efi_get_max_initrd_addr(unsigned long image_addr)
+{
+ return image_addr + SZ_256M;
+}
+
+#define alloc_screen_info(x...) (&screen_info)
+
+static inline void free_screen_info(struct screen_info *si)
+{
+}
+
+static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt)
+{
+}
+
+void efi_virtmap_load(void);
+void efi_virtmap_unload(void);
+
+#endif /* _ASM_EFI_H */
diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h
index d83a4efd052b..5c725e1df58b 100644
--- a/arch/riscv/include/asm/elf.h
+++ b/arch/riscv/include/asm/elf.h
@@ -11,6 +11,7 @@
#include <uapi/asm/elf.h>
#include <asm/auxvec.h>
#include <asm/byteorder.h>
+#include <asm/cacheinfo.h>
/*
* These are used to set parameters in the core dumps.
@@ -61,6 +62,18 @@ extern unsigned long elf_hwcap;
do { \
NEW_AUX_ENT(AT_SYSINFO_EHDR, \
(elf_addr_t)current->mm->context.vdso); \
+ NEW_AUX_ENT(AT_L1I_CACHESIZE, \
+ get_cache_size(1, CACHE_TYPE_INST)); \
+ NEW_AUX_ENT(AT_L1I_CACHEGEOMETRY, \
+ get_cache_geometry(1, CACHE_TYPE_INST)); \
+ NEW_AUX_ENT(AT_L1D_CACHESIZE, \
+ get_cache_size(1, CACHE_TYPE_DATA)); \
+ NEW_AUX_ENT(AT_L1D_CACHEGEOMETRY, \
+ get_cache_geometry(1, CACHE_TYPE_DATA)); \
+ NEW_AUX_ENT(AT_L2_CACHESIZE, \
+ get_cache_size(2, CACHE_TYPE_UNIFIED)); \
+ NEW_AUX_ENT(AT_L2_CACHEGEOMETRY, \
+ get_cache_geometry(2, CACHE_TYPE_UNIFIED)); \
} while (0)
#define ARCH_HAS_SETUP_ADDITIONAL_PAGES
struct linux_binprm;
diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h
index 1ff075a8dfc7..54cbf07fb4e9 100644
--- a/arch/riscv/include/asm/fixmap.h
+++ b/arch/riscv/include/asm/fixmap.h
@@ -22,14 +22,24 @@
*/
enum fixed_addresses {
FIX_HOLE,
-#define FIX_FDT_SIZE SZ_1M
- FIX_FDT_END,
- FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1,
FIX_PTE,
FIX_PMD,
FIX_TEXT_POKE1,
FIX_TEXT_POKE0,
FIX_EARLYCON_MEM_BASE,
+
+ __end_of_permanent_fixed_addresses,
+ /*
+ * Temporary boot-time mappings, used by early_ioremap(),
+ * before ioremap() is functional.
+ */
+#define NR_FIX_BTMAPS (SZ_256K / PAGE_SIZE)
+#define FIX_BTMAPS_SLOTS 7
+#define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS)
+
+ FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
+ FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,
+
__end_of_fixed_addresses
};
diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h
index 3835c3295dc5..c025a746a148 100644
--- a/arch/riscv/include/asm/io.h
+++ b/arch/riscv/include/asm/io.h
@@ -14,6 +14,7 @@
#include <linux/types.h>
#include <linux/pgtable.h>
#include <asm/mmiowb.h>
+#include <asm/early_ioremap.h>
/*
* MMIO access functions are separated out to break dependency cycles
diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h
index 967eacb01ab5..dabcf2cfb3dc 100644
--- a/arch/riscv/include/asm/mmu.h
+++ b/arch/riscv/include/asm/mmu.h
@@ -20,6 +20,8 @@ typedef struct {
#endif
} mm_context_t;
+void __init create_pgd_mapping(pgd_t *pgdp, uintptr_t va, phys_addr_t pa,
+ phys_addr_t sz, pgprot_t prot);
#endif /* __ASSEMBLY__ */
#endif /* _ASM_RISCV_MMU_H */
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index eaea1f717010..183f1f4b2ae6 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -100,6 +100,10 @@
#define PAGE_KERNEL __pgprot(_PAGE_KERNEL)
#define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL | _PAGE_EXEC)
+#define PAGE_KERNEL_READ __pgprot(_PAGE_KERNEL & ~_PAGE_WRITE)
+#define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL | _PAGE_EXEC)
+#define PAGE_KERNEL_READ_EXEC __pgprot((_PAGE_KERNEL & ~_PAGE_WRITE) \
+ | _PAGE_EXEC)
#define PAGE_TABLE __pgprot(_PAGE_TABLE)
@@ -464,6 +468,7 @@ static inline void __kernel_map_pages(struct page *page, int numpages, int enabl
#define kern_addr_valid(addr) (1) /* FIXME */
extern void *dtb_early_va;
+extern uintptr_t dtb_early_pa;
void setup_bootmem(void);
void paging_init(void);
diff --git a/arch/riscv/include/asm/sections.h b/arch/riscv/include/asm/sections.h
new file mode 100644
index 000000000000..3a9971b1210f
--- /dev/null
+++ b/arch/riscv/include/asm/sections.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ */
+#ifndef __ASM_SECTIONS_H
+#define __ASM_SECTIONS_H
+
+#include <asm-generic/sections.h>
+
+extern char _start[];
+extern char _start_kernel[];
+
+#endif /* __ASM_SECTIONS_H */
diff --git a/arch/riscv/include/uapi/asm/auxvec.h b/arch/riscv/include/uapi/asm/auxvec.h
index d86cb17bbabe..32c73ba1d531 100644
--- a/arch/riscv/include/uapi/asm/auxvec.h
+++ b/arch/riscv/include/uapi/asm/auxvec.h
@@ -10,4 +10,28 @@
/* vDSO location */
#define AT_SYSINFO_EHDR 33
+/*
+ * The set of entries below represent more extensive information
+ * about the caches, in the form of two entry per cache type,
+ * one entry containing the cache size in bytes, and the other
+ * containing the cache line size in bytes in the bottom 16 bits
+ * and the cache associativity in the next 16 bits.
+ *
+ * The associativity is such that if N is the 16-bit value, the
+ * cache is N way set associative. A value if 0xffff means fully
+ * associative, a value of 1 means directly mapped.
+ *
+ * For all these fields, a value of 0 means that the information
+ * is not known.
+ */
+#define AT_L1I_CACHESIZE 40
+#define AT_L1I_CACHEGEOMETRY 41
+#define AT_L1D_CACHESIZE 42
+#define AT_L1D_CACHEGEOMETRY 43
+#define AT_L2_CACHESIZE 44
+#define AT_L2_CACHEGEOMETRY 45
+
+/* entries in ARCH_DLINFO */
+#define AT_VECTOR_SIZE_ARCH 7
+
#endif /* _UAPI_ASM_RISCV_AUXVEC_H */
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index dc93710f0b2f..fa896c5f7ccb 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -55,4 +55,6 @@ obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_JUMP_LABEL) += jump_label.o
+obj-$(CONFIG_EFI) += efi.o
+
clean:
diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c
index bd0f122965c3..de59dd457b41 100644
--- a/arch/riscv/kernel/cacheinfo.c
+++ b/arch/riscv/kernel/cacheinfo.c
@@ -3,7 +3,6 @@
* Copyright (C) 2017 SiFive
*/
-#include <linux/cacheinfo.h>
#include <linux/cpu.h>
#include <linux/of.h>
#include <linux/of_device.h>
@@ -25,12 +24,84 @@ cache_get_priv_group(struct cacheinfo *this_leaf)
return NULL;
}
-static void ci_leaf_init(struct cacheinfo *this_leaf,
- struct device_node *node,
- enum cache_type type, unsigned int level)
+static struct cacheinfo *get_cacheinfo(u32 level, enum cache_type type)
+{
+ struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(smp_processor_id());
+ struct cacheinfo *this_leaf;
+ int index;
+
+ for (index = 0; index < this_cpu_ci->num_leaves; index++) {
+ this_leaf = this_cpu_ci->info_list + index;
+ if (this_leaf->level == level && this_leaf->type == type)
+ return this_leaf;
+ }
+
+ return NULL;
+}
+
+uintptr_t get_cache_size(u32 level, enum cache_type type)
+{
+ struct cacheinfo *this_leaf = get_cacheinfo(level, type);
+
+ return this_leaf ? this_leaf->size : 0;
+}
+
+uintptr_t get_cache_geometry(u32 level, enum cache_type type)
+{
+ struct cacheinfo *this_leaf = get_cacheinfo(level, type);
+
+ return this_leaf ? (this_leaf->ways_of_associativity << 16 |
+ this_leaf->coherency_line_size) :
+ 0;
+}
+
+static void ci_leaf_init(struct cacheinfo *this_leaf, enum cache_type type,
+ unsigned int level, unsigned int size,
+ unsigned int sets, unsigned int line_size)
{
this_leaf->level = level;
this_leaf->type = type;
+ this_leaf->size = size;
+ this_leaf->number_of_sets = sets;
+ this_leaf->coherency_line_size = line_size;
+
+ /*
+ * If the cache is fully associative, there is no need to
+ * check the other properties.
+ */
+ if (sets == 1)
+ return;
+
+ /*
+ * Set the ways number for n-ways associative, make sure
+ * all properties are big than zero.
+ */
+ if (sets > 0 && size > 0 && line_size > 0)
+ this_leaf->ways_of_associativity = (size / sets) / line_size;
+}
+
+static void fill_cacheinfo(struct cacheinfo **this_leaf,
+ struct device_node *node, unsigned int level)
+{
+ unsigned int size, sets, line_size;
+
+ if (!of_property_read_u32(node, "cache-size", &size) &&
+ !of_property_read_u32(node, "cache-block-size", &line_size) &&
+ !of_property_read_u32(node, "cache-sets", &sets)) {
+ ci_leaf_init((*this_leaf)++, CACHE_TYPE_UNIFIED, level, size, sets, line_size);
+ }
+
+ if (!of_property_read_u32(node, "i-cache-size", &size) &&
+ !of_property_read_u32(node, "i-cache-sets", &sets) &&
+ !of_property_read_u32(node, "i-cache-block-size", &line_size)) {
+ ci_leaf_init((*this_leaf)++, CACHE_TYPE_INST, level, size, sets, line_size);
+ }
+
+ if (!of_property_read_u32(node, "d-cache-size", &size) &&
+ !of_property_read_u32(node, "d-cache-sets", &sets) &&
+ !of_property_read_u32(node, "d-cache-block-size", &line_size)) {
+ ci_leaf_init((*this_leaf)++, CACHE_TYPE_DATA, level, size, sets, line_size);
+ }
}
static int __init_cache_level(unsigned int cpu)
@@ -83,29 +154,24 @@ static int __populate_cache_leaves(unsigned int cpu)
struct device_node *prev = NULL;
int levels = 1, level = 1;
- if (of_property_read_bool(np, "cache-size"))
- ci_leaf_init(this_leaf++, np, CACHE_TYPE_UNIFIED, level);
- if (of_property_read_bool(np, "i-cache-size"))
- ci_leaf_init(this_leaf++, np, CACHE_TYPE_INST, level);
- if (of_property_read_bool(np, "d-cache-size"))
- ci_leaf_init(this_leaf++, np, CACHE_TYPE_DATA, level);
+ /* Level 1 caches in cpu node */
+ fill_cacheinfo(&this_leaf, np, level);
+ /* Next level caches in cache nodes */
prev = np;
while ((np = of_find_next_cache_node(np))) {
of_node_put(prev);
prev = np;
+
if (!of_device_is_compatible(np, "cache"))
break;
if (of_property_read_u32(np, "cache-level", &level))
break;
if (level <= levels)
break;
- if (of_property_read_bool(np, "cache-size"))
- ci_leaf_init(this_leaf++, np, CACHE_TYPE_UNIFIED, level);
- if (of_property_read_bool(np, "i-cache-size"))
- ci_leaf_init(this_leaf++, np, CACHE_TYPE_INST, level);
- if (of_property_read_bool(np, "d-cache-size"))
- ci_leaf_init(this_leaf++, np, CACHE_TYPE_DATA, level);
+
+ fill_cacheinfo(&this_leaf, np, level);
+
levels = level;
}
of_node_put(np);
diff --git a/arch/riscv/kernel/efi-header.S b/arch/riscv/kernel/efi-header.S
new file mode 100644
index 000000000000..8e733aa48ba6
--- /dev/null
+++ b/arch/riscv/kernel/efi-header.S
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ * Adapted from arch/arm64/kernel/efi-header.S
+ */
+
+#include <linux/pe.h>
+#include <linux/sizes.h>
+
+ .macro __EFI_PE_HEADER
+ .long PE_MAGIC
+coff_header:
+#ifdef CONFIG_64BIT
+ .short IMAGE_FILE_MACHINE_RISCV64 // Machine
+#else
+ .short IMAGE_FILE_MACHINE_RISCV32 // Machine
+#endif
+ .short section_count // NumberOfSections
+ .long 0 // TimeDateStamp
+ .long 0 // PointerToSymbolTable
+ .long 0 // NumberOfSymbols
+ .short section_table - optional_header // SizeOfOptionalHeader
+ .short IMAGE_FILE_DEBUG_STRIPPED | \
+ IMAGE_FILE_EXECUTABLE_IMAGE | \
+ IMAGE_FILE_LINE_NUMS_STRIPPED // Characteristics
+
+optional_header:
+#ifdef CONFIG_64BIT
+ .short PE_OPT_MAGIC_PE32PLUS // PE32+ format
+#else
+ .short PE_OPT_MAGIC_PE32 // PE32 format
+#endif
+ .byte 0x02 // MajorLinkerVersion
+ .byte 0x14 // MinorLinkerVersion
+ .long __pecoff_text_end - efi_header_end // SizeOfCode
+ .long __pecoff_data_virt_size // SizeOfInitializedData
+ .long 0 // SizeOfUninitializedData
+ .long __efistub_efi_pe_entry - _start // AddressOfEntryPoint
+ .long efi_header_end - _start // BaseOfCode
+#ifdef CONFIG_32BIT
+ .long __pecoff_text_end - _start // BaseOfData
+#endif
+
+extra_header_fields:
+ .quad 0 // ImageBase
+ .long PECOFF_SECTION_ALIGNMENT // SectionAlignment
+ .long PECOFF_FILE_ALIGNMENT // FileAlignment
+ .short 0 // MajorOperatingSystemVersion
+ .short 0 // MinorOperatingSystemVersion
+ .short LINUX_EFISTUB_MAJOR_VERSION // MajorImageVersion
+ .short LINUX_EFISTUB_MINOR_VERSION // MinorImageVersion
+ .short 0 // MajorSubsystemVersion
+ .short 0 // MinorSubsystemVersion
+ .long 0 // Win32VersionValue
+
+ .long _end - _start // SizeOfImage
+
+ // Everything before the kernel image is considered part of the header
+ .long efi_header_end - _start // SizeOfHeaders
+ .long 0 // CheckSum
+ .short IMAGE_SUBSYSTEM_EFI_APPLICATION // Subsystem
+ .short 0 // DllCharacteristics
+ .quad 0 // SizeOfStackReserve
+ .quad 0 // SizeOfStackCommit
+ .quad 0 // SizeOfHeapReserve
+ .quad 0 // SizeOfHeapCommit
+ .long 0 // LoaderFlags
+ .long (section_table - .) / 8 // NumberOfRvaAndSizes
+
+ .quad 0 // ExportTable
+ .quad 0 // ImportTable
+ .quad 0 // ResourceTable
+ .quad 0 // ExceptionTable
+ .quad 0 // CertificationTable
+ .quad 0 // BaseRelocationTable
+
+ // Section table
+section_table:
+ .ascii ".text\0\0\0"
+ .long __pecoff_text_end - efi_header_end // VirtualSize
+ .long efi_header_end - _start // VirtualAddress
+ .long __pecoff_text_end - efi_header_end // SizeOfRawData
+ .long efi_header_end - _start // PointerToRawData
+
+ .long 0 // PointerToRelocations
+ .long 0 // PointerToLineNumbers
+ .short 0 // NumberOfRelocations
+ .short 0 // NumberOfLineNumbers
+ .long IMAGE_SCN_CNT_CODE | \
+ IMAGE_SCN_MEM_READ | \
+ IMAGE_SCN_MEM_EXECUTE // Characteristics
+
+ .ascii ".data\0\0\0"
+ .long __pecoff_data_virt_size // VirtualSize
+ .long __pecoff_text_end - _start // VirtualAddress
+ .long __pecoff_data_raw_size // SizeOfRawData
+ .long __pecoff_text_end - _start // PointerToRawData
+
+ .long 0 // PointerToRelocations
+ .long 0 // PointerToLineNumbers
+ .short 0 // NumberOfRelocations
+ .short 0 // NumberOfLineNumbers
+ .long IMAGE_SCN_CNT_INITIALIZED_DATA | \
+ IMAGE_SCN_MEM_READ | \
+ IMAGE_SCN_MEM_WRITE // Characteristics
+
+ .set section_count, (. - section_table) / 40
+
+ .balign 0x1000
+efi_header_end:
+ .endm
diff --git a/arch/riscv/kernel/efi.c b/arch/riscv/kernel/efi.c
new file mode 100644
index 000000000000..024159298231
--- /dev/null
+++ b/arch/riscv/kernel/efi.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ * Adapted from arch/arm64/kernel/efi.c
+ */
+
+#include <linux/efi.h>
+#include <linux/init.h>
+
+#include <asm/efi.h>
+#include <asm/pgtable.h>
+#include <asm/pgtable-bits.h>
+
+/*
+ * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be
+ * executable, everything else can be mapped with the XN bits
+ * set. Also take the new (optional) RO/XP bits into account.
+ */
+static __init pgprot_t efimem_to_pgprot_map(efi_memory_desc_t *md)
+{
+ u64 attr = md->attribute;
+ u32 type = md->type;
+
+ if (type == EFI_MEMORY_MAPPED_IO)
+ return PAGE_KERNEL;
+
+ /* R-- */
+ if ((attr & (EFI_MEMORY_XP | EFI_MEMORY_RO)) ==
+ (EFI_MEMORY_XP | EFI_MEMORY_RO))
+ return PAGE_KERNEL_READ;
+
+ /* R-X */
+ if (attr & EFI_MEMORY_RO)
+ return PAGE_KERNEL_READ_EXEC;
+
+ /* RW- */
+ if (((attr & (EFI_MEMORY_RP | EFI_MEMORY_WP | EFI_MEMORY_XP)) ==
+ EFI_MEMORY_XP) ||
+ type != EFI_RUNTIME_SERVICES_CODE)
+ return PAGE_KERNEL;
+
+ /* RWX */
+ return PAGE_KERNEL_EXEC;
+}
+
+int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
+{
+ pgprot_t prot = __pgprot(pgprot_val(efimem_to_pgprot_map(md)) &
+ ~(_PAGE_GLOBAL));
+ int i;
+
+ /* RISC-V maps one page at a time */
+ for (i = 0; i < md->num_pages; i++)
+ create_pgd_mapping(mm->pgd, md->virt_addr + i * PAGE_SIZE,
+ md->phys_addr + i * PAGE_SIZE,
+ PAGE_SIZE, prot);
+ return 0;
+}
+
+static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data)
+{
+ efi_memory_desc_t *md = data;
+ pte_t pte = READ_ONCE(*ptep);
+ unsigned long val;
+
+ if (md->attribute & EFI_MEMORY_RO) {
+ val = pte_val(pte) & ~_PAGE_WRITE;
+ val = pte_val(pte) | _PAGE_READ;
+ pte = __pte(val);
+ }
+ if (md->attribute & EFI_MEMORY_XP) {
+ val = pte_val(pte) & ~_PAGE_EXEC;
+ pte = __pte(val);
+ }
+ set_pte(ptep, pte);
+
+ return 0;
+}
+
+int __init efi_set_mapping_permissions(struct mm_struct *mm,
+ efi_memory_desc_t *md)
+{
+ BUG_ON(md->type != EFI_RUNTIME_SERVICES_CODE &&
+ md->type != EFI_RUNTIME_SERVICES_DATA);
+
+ /*
+ * Calling apply_to_page_range() is only safe on regions that are
+ * guaranteed to be mapped down to pages. Since we are only called
+ * for regions that have been mapped using efi_create_mapping() above
+ * (and this is checked by the generic Memory Attributes table parsing
+ * routines), there is no need to check that again here.
+ */
+ return apply_to_page_range(mm, md->virt_addr,
+ md->num_pages << EFI_PAGE_SHIFT,
+ set_permissions, md);
+}
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index 0a4e81b8dc79..11e2a4fe66e0 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -3,7 +3,6 @@
* Copyright (C) 2012 Regents of the University of California
*/
-#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
#include <asm/asm.h>
#include <linux/init.h>
@@ -13,6 +12,7 @@
#include <asm/csr.h>
#include <asm/hwcap.h>
#include <asm/image.h>
+#include "efi-header.S"
__HEAD
ENTRY(_start)
@@ -22,10 +22,18 @@ ENTRY(_start)
* Do not modify it without modifying the structure and all bootloaders
* that expects this header format!!
*/
+#ifdef CONFIG_EFI
+ /*
+ * This instruction decodes to "MZ" ASCII required by UEFI.
+ */
+ c.li s4,-13
+ j _start_kernel
+#else
/* jump to start kernel */
j _start_kernel
/* reserved */
.word 0
+#endif
.balign 8
#if __riscv_xlen == 64
/* Image load offset(2MB) from start of RAM */
@@ -43,7 +51,14 @@ ENTRY(_start)
.ascii RISCV_IMAGE_MAGIC
.balign 4
.ascii RISCV_IMAGE_MAGIC2
+#ifdef CONFIG_EFI
+ .word pe_head_start - _start
+pe_head_start:
+
+ __EFI_PE_HEADER
+#else
.word 0
+#endif
.align 2
#ifdef CONFIG_MMU
@@ -259,7 +274,6 @@ clear_bss_done:
#endif
/* Start the kernel */
call soc_early_init
- call parse_dtb
tail start_kernel
.Lsecondary_start:
diff --git a/arch/riscv/kernel/head.h b/arch/riscv/kernel/head.h
index 105fb0496b24..b48dda3d04f6 100644
--- a/arch/riscv/kernel/head.h
+++ b/arch/riscv/kernel/head.h
@@ -16,6 +16,4 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa);
extern void *__cpu_up_stack_pointer[];
extern void *__cpu_up_task_pointer[];
-void __init parse_dtb(void);
-
#endif /* __ASM_HEAD_H */
diff --git a/arch/riscv/kernel/image-vars.h b/arch/riscv/kernel/image-vars.h
new file mode 100644
index 000000000000..8c212efb37a6
--- /dev/null
+++ b/arch/riscv/kernel/image-vars.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ * Linker script variables to be set after section resolution, as
+ * ld.lld does not like variables assigned before SECTIONS is processed.
+ * Based on arch/arm64/kerne/image-vars.h
+ */
+#ifndef __RISCV_KERNEL_IMAGE_VARS_H
+#define __RISCV_KERNEL_IMAGE_VARS_H
+
+#ifndef LINKER_SCRIPT
+#error This file should only be included in vmlinux.lds.S
+#endif
+
+#ifdef CONFIG_EFI
+
+/*
+ * The EFI stub has its own symbol namespace prefixed by __efistub_, to
+ * isolate it from the kernel proper. The following symbols are legally
+ * accessed by the stub, so provide some aliases to make them accessible.
+ * Only include data symbols here, or text symbols of functions that are
+ * guaranteed to be safe when executed at another offset than they were
+ * linked at. The routines below are all implemented in assembler in a
+ * position independent manner
+ */
+__efistub_memcmp = memcmp;
+__efistub_memchr = memchr;
+__efistub_memcpy = memcpy;
+__efistub_memmove = memmove;
+__efistub_memset = memset;
+__efistub_strlen = strlen;
+__efistub_strnlen = strnlen;
+__efistub_strcmp = strcmp;
+__efistub_strncmp = strncmp;
+__efistub_strrchr = strrchr;
+
+#ifdef CONFIG_KASAN
+__efistub___memcpy = memcpy;
+__efistub___memmove = memmove;
+__efistub___memset = memset;
+#endif
+
+__efistub__start = _start;
+__efistub__start_kernel = _start_kernel;
+__efistub__end = _end;
+__efistub__edata = _edata;
+__efistub_screen_info = screen_info;
+
+#endif
+
+#endif /* __RISCV_KERNEL_IMAGE_VARS_H */
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 2c6dd329312b..4c96ac198e14 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -17,19 +17,22 @@
#include <linux/sched/task.h>
#include <linux/swiotlb.h>
#include <linux/smp.h>
+#include <linux/efi.h>
#include <asm/cpu_ops.h>
+#include <asm/early_ioremap.h>
#include <asm/setup.h>
#include <asm/sections.h>
#include <asm/sbi.h>
#include <asm/tlbflush.h>
#include <asm/thread_info.h>
#include <asm/kasan.h>
+#include <asm/efi.h>
#include "head.h"
-#ifdef CONFIG_DUMMY_CONSOLE
-struct screen_info screen_info = {
+#if defined(CONFIG_DUMMY_CONSOLE) || defined(CONFIG_EFI)
+struct screen_info screen_info __section(.data) = {
.orig_video_lines = 30,
.orig_video_cols = 80,
.orig_video_mode = 0,
@@ -48,8 +51,9 @@ atomic_t hart_lottery __section(.sdata);
unsigned long boot_cpu_hartid;
static DEFINE_PER_CPU(struct cpu, cpu_devices);
-void __init parse_dtb(void)
+static void __init parse_dtb(void)
{
+ /* Early scan of device tree from init memory */
if (early_init_dt_scan(dtb_early_va))
return;
@@ -62,6 +66,7 @@ void __init parse_dtb(void)
void __init setup_arch(char **cmdline_p)
{
+ parse_dtb();
init_mm.start_code = (unsigned long) _stext;
init_mm.end_code = (unsigned long) _etext;
init_mm.end_data = (unsigned long) _edata;
@@ -69,14 +74,19 @@ void __init setup_arch(char **cmdline_p)
*cmdline_p = boot_command_line;
+ early_ioremap_setup();
parse_early_param();
+ efi_init();
setup_bootmem();
paging_init();
#if IS_ENABLED(CONFIG_BUILTIN_DTB)
unflatten_and_copy_device_tree();
#else
- unflatten_device_tree();
+ if (early_init_dt_verify(__va(dtb_early_pa)))
+ unflatten_device_tree();
+ else
+ pr_err("No DTB found in kernel mappings\n");
#endif
#ifdef CONFIG_SWIOTLB
diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
index 67db80e12d1f..3ffbd6cbdb86 100644
--- a/arch/riscv/kernel/vmlinux.lds.S
+++ b/arch/riscv/kernel/vmlinux.lds.S
@@ -10,6 +10,7 @@
#include <asm/cache.h>
#include <asm/thread_info.h>
#include <asm/set_memory.h>
+#include "image-vars.h"
#include <linux/sizes.h>
OUTPUT_ARCH(riscv)
@@ -17,6 +18,9 @@ ENTRY(_start)
jiffies = jiffies_64;
+PECOFF_SECTION_ALIGNMENT = 0x1000;
+PECOFF_FILE_ALIGNMENT = 0x200;
+
SECTIONS
{
/* Beginning of code and text segment */
@@ -66,6 +70,11 @@ SECTIONS
_etext = .;
}
+#ifdef CONFIG_EFI
+ . = ALIGN(PECOFF_SECTION_ALIGNMENT);
+ __pecoff_text_end = .;
+#endif
+
INIT_DATA_SECTION(16)
/* Start of data section */
@@ -84,16 +93,26 @@ SECTIONS
.sdata : {
__global_pointer$ = . + 0x800;
*(.sdata*)
- /* End of data section */
- _edata = .;
}
+#ifdef CONFIG_EFI
+ .pecoff_edata_padding : { BYTE(0); . = ALIGN(PECOFF_FILE_ALIGNMENT); }
+ __pecoff_data_raw_size = ABSOLUTE(. - __pecoff_text_end);
+#endif
+
+ /* End of data section */
+ _edata = .;
+
BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0)
.rel.dyn : {
*(.rel.dyn*)
}
+#ifdef CONFIG_EFI
+ . = ALIGN(PECOFF_SECTION_ALIGNMENT);
+ __pecoff_data_virt_size = ABSOLUTE(. - __pecoff_text_end);
+#endif
_end = .;
STABS_DEBUG
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index 716d64e36f83..1359e21c0c62 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -19,6 +19,167 @@
#include "../kernel/head.h"
+static inline void no_context(struct pt_regs *regs, unsigned long addr)
+{
+ /* Are we prepared to handle this kernel fault? */
+ if (fixup_exception(regs))
+ return;
+
+ /*
+ * Oops. The kernel tried to access some bad page. We'll have to
+ * terminate things with extreme prejudice.
+ */
+ bust_spinlocks(1);
+ pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n",
+ (addr < PAGE_SIZE) ? "NULL pointer dereference" :
+ "paging request", addr);
+ die(regs, "Oops");
+ do_exit(SIGKILL);
+}
+
+static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault)
+{
+ if (fault & VM_FAULT_OOM) {
+ /*
+ * We ran out of memory, call the OOM killer, and return the userspace
+ * (which will retry the fault, or kill us if we got oom-killed).
+ */
+ if (!user_mode(regs)) {
+ no_context(regs, addr);
+ return;
+ }
+ pagefault_out_of_memory();
+ return;
+ } else if (fault & VM_FAULT_SIGBUS) {
+ /* Kernel mode? Handle exceptions or die */
+ if (!user_mode(regs)) {
+ no_context(regs, addr);
+ return;
+ }
+ do_trap(regs, SIGBUS, BUS_ADRERR, addr);
+ return;
+ }
+ BUG();
+}
+
+static inline void bad_area(struct pt_regs *regs, struct mm_struct *mm, int code, unsigned long addr)
+{
+ /*
+ * Something tried to access memory that isn't in our memory map.
+ * Fix it, but check if it's kernel or user first.
+ */
+ mmap_read_unlock(mm);
+ /* User mode accesses just cause a SIGSEGV */
+ if (user_mode(regs)) {
+ do_trap(regs, SIGSEGV, code, addr);
+ return;
+ }
+
+ no_context(regs, addr);
+}
+
+static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long addr)
+{
+ pgd_t *pgd, *pgd_k;
+ pud_t *pud, *pud_k;
+ p4d_t *p4d, *p4d_k;
+ pmd_t *pmd, *pmd_k;
+ pte_t *pte_k;
+ int index;
+
+ /* User mode accesses just cause a SIGSEGV */
+ if (user_mode(regs))
+ return do_trap(regs, SIGSEGV, code, addr);
+
+ /*
+ * Synchronize this task's top level page-table
+ * with the 'reference' page table.
+ *
+ * Do _not_ use "tsk->active_mm->pgd" here.
+ * We might be inside an interrupt in the middle
+ * of a task switch.
+ */
+ index = pgd_index(addr);
+ pgd = (pgd_t *)pfn_to_virt(csr_read(CSR_SATP)) + index;
+ pgd_k = init_mm.pgd + index;
+
+ if (!pgd_present(*pgd_k)) {
+ no_context(regs, addr);
+ return;
+ }
+ set_pgd(pgd, *pgd_k);
+
+ p4d = p4d_offset(pgd, addr);
+ p4d_k = p4d_offset(pgd_k, addr);
+ if (!p4d_present(*p4d_k)) {
+ no_context(regs, addr);
+ return;
+ }
+
+ pud = pud_offset(p4d, addr);
+ pud_k = pud_offset(p4d_k, addr);
+ if (!pud_present(*pud_k)) {
+ no_context(regs, addr);
+ return;
+ }
+
+ /*
+ * Since the vmalloc area is global, it is unnecessary
+ * to copy individual PTEs
+ */
+ pmd = pmd_offset(pud, addr);
+ pmd_k = pmd_offset(pud_k, addr);
+ if (!pmd_present(*pmd_k)) {
+ no_context(regs, addr);
+ return;
+ }
+ set_pmd(pmd, *pmd_k);
+
+ /*
+ * Make sure the actual PTE exists as well to
+ * catch kernel vmalloc-area accesses to non-mapped
+ * addresses. If we don't do this, this will just
+ * silently loop forever.
+ */
+ pte_k = pte_offset_kernel(pmd_k, addr);
+ if (!pte_present(*pte_k)) {
+ no_context(regs, addr);
+ return;
+ }
+
+ /*
+ * The kernel assumes that TLBs don't cache invalid
+ * entries, but in RISC-V, SFENCE.VMA specifies an
+ * ordering constraint, not a cache flush; it is
+ * necessary even after writing invalid entries.
+ */
+ local_flush_tlb_page(addr);
+}
+
+static inline bool access_error(unsigned long cause, struct vm_area_struct *vma)
+{
+ switch (cause) {
+ case EXC_INST_PAGE_FAULT:
+ if (!(vma->vm_flags & VM_EXEC)) {
+ return true;
+ }
+ break;
+ case EXC_LOAD_PAGE_FAULT:
+ if (!(vma->vm_flags & VM_READ)) {
+ return true;
+ }
+ break;
+ case EXC_STORE_PAGE_FAULT:
+ if (!(vma->vm_flags & VM_WRITE)) {
+ return true;
+ }
+ break;
+ default:
+ panic("%s: unhandled cause %lu", __func__, cause);
+ }
+ return false;
+}
+
/*
* This routine handles page faults. It determines the address and the
* problem, and then passes it off to one of the appropriate routines.
@@ -48,8 +209,10 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
* only copy the information from the master page table,
* nothing more.
*/
- if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END)))
- goto vmalloc_fault;
+ if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END))) {
+ vmalloc_fault(regs, code, addr);
+ return;
+ }
/* Enable interrupts if they were enabled in the parent context. */
if (likely(regs->status & SR_PIE))
@@ -59,25 +222,37 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
* If we're in an interrupt, have no user context, or are running
* in an atomic region, then we must not take the fault.
*/
- if (unlikely(faulthandler_disabled() || !mm))
- goto no_context;
+ if (unlikely(faulthandler_disabled() || !mm)) {
+ no_context(regs, addr);
+ return;
+ }
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
+ if (cause == EXC_STORE_PAGE_FAULT)
+ flags |= FAULT_FLAG_WRITE;
+ else if (cause == EXC_INST_PAGE_FAULT)
+ flags |= FAULT_FLAG_INSTRUCTION;
retry:
mmap_read_lock(mm);
vma = find_vma(mm, addr);
- if (unlikely(!vma))
- goto bad_area;
+ if (unlikely(!vma)) {
+ bad_area(regs, mm, code, addr);
+ return;
+ }
if (likely(vma->vm_start <= addr))
goto good_area;
- if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
- goto bad_area;
- if (unlikely(expand_stack(vma, addr)))
- goto bad_area;
+ if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
+ bad_area(regs, mm, code, addr);
+ return;
+ }
+ if (unlikely(expand_stack(vma, addr))) {
+ bad_area(regs, mm, code, addr);
+ return;
+ }
/*
* Ok, we have a good vm_area for this memory access, so
@@ -86,22 +261,9 @@ retry:
good_area:
code = SEGV_ACCERR;
- switch (cause) {
- case EXC_INST_PAGE_FAULT:
- if (!(vma->vm_flags & VM_EXEC))
- goto bad_area;
- break;
- case EXC_LOAD_PAGE_FAULT:
- if (!(vma->vm_flags & VM_READ))
- goto bad_area;
- break;
- case EXC_STORE_PAGE_FAULT:
- if (!(vma->vm_flags & VM_WRITE))
- goto bad_area;
- flags |= FAULT_FLAG_WRITE;
- break;
- default:
- panic("%s: unhandled cause %lu", __func__, cause);
+ if (unlikely(access_error(cause, vma))) {
+ bad_area(regs, mm, code, addr);
+ return;
}
/*
@@ -119,144 +281,22 @@ good_area:
if (fault_signal_pending(fault, regs))
return;
- if (unlikely(fault & VM_FAULT_ERROR)) {
- if (fault & VM_FAULT_OOM)
- goto out_of_memory;
- else if (fault & VM_FAULT_SIGBUS)
- goto do_sigbus;
- BUG();
- }
-
- if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_RETRY) {
- flags |= FAULT_FLAG_TRIED;
+ if (unlikely((fault & VM_FAULT_RETRY) && (flags & FAULT_FLAG_ALLOW_RETRY))) {
+ flags |= FAULT_FLAG_TRIED;
- /*
- * No need to mmap_read_unlock(mm) as we would
- * have already released it in __lock_page_or_retry
- * in mm/filemap.c.
- */
- goto retry;
- }
+ /*
+ * No need to mmap_read_unlock(mm) as we would
+ * have already released it in __lock_page_or_retry
+ * in mm/filemap.c.
+ */
+ goto retry;
}
mmap_read_unlock(mm);
- return;
- /*
- * Something tried to access memory that isn't in our memory map.
- * Fix it, but check if it's kernel or user first.
- */
-bad_area:
- mmap_read_unlock(mm);
- /* User mode accesses just cause a SIGSEGV */
- if (user_mode(regs)) {
- do_trap(regs, SIGSEGV, code, addr);
+ if (unlikely(fault & VM_FAULT_ERROR)) {
+ mm_fault_error(regs, addr, fault);
return;
}
-
-no_context:
- /* Are we prepared to handle this kernel fault? */
- if (fixup_exception(regs))
- return;
-
- /*
- * Oops. The kernel tried to access some bad page. We'll have to
- * terminate things with extreme prejudice.
- */
- bust_spinlocks(1);
- pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n",
- (addr < PAGE_SIZE) ? "NULL pointer dereference" :
- "paging request", addr);
- die(regs, "Oops");
- do_exit(SIGKILL);
-
- /*
- * We ran out of memory, call the OOM killer, and return the userspace
- * (which will retry the fault, or kill us if we got oom-killed).
- */
-out_of_memory:
- mmap_read_unlock(mm);
- if (!user_mode(regs))
- goto no_context;
- pagefault_out_of_memory();
- return;
-
-do_sigbus:
- mmap_read_unlock(mm);
- /* Kernel mode? Handle exceptions or die */
- if (!user_mode(regs))
- goto no_context;
- do_trap(regs, SIGBUS, BUS_ADRERR, addr);
return;
-
-vmalloc_fault:
- {
- pgd_t *pgd, *pgd_k;
- pud_t *pud, *pud_k;
- p4d_t *p4d, *p4d_k;
- pmd_t *pmd, *pmd_k;
- pte_t *pte_k;
- int index;
-
- /* User mode accesses just cause a SIGSEGV */
- if (user_mode(regs))
- return do_trap(regs, SIGSEGV, code, addr);
-
- /*
- * Synchronize this task's top level page-table
- * with the 'reference' page table.
- *
- * Do _not_ use "tsk->active_mm->pgd" here.
- * We might be inside an interrupt in the middle
- * of a task switch.
- */
- index = pgd_index(addr);
- pgd = (pgd_t *)pfn_to_virt(csr_read(CSR_SATP)) + index;
- pgd_k = init_mm.pgd + index;
-
- if (!pgd_present(*pgd_k))
- goto no_context;
- set_pgd(pgd, *pgd_k);
-
- p4d = p4d_offset(pgd, addr);
- p4d_k = p4d_offset(pgd_k, addr);
- if (!p4d_present(*p4d_k))
- goto no_context;
-
- pud = pud_offset(p4d, addr);
- pud_k = pud_offset(p4d_k, addr);
- if (!pud_present(*pud_k))
- goto no_context;
-
- /*
- * Since the vmalloc area is global, it is unnecessary
- * to copy individual PTEs
- */
- pmd = pmd_offset(pud, addr);
- pmd_k = pmd_offset(pud_k, addr);
- if (!pmd_present(*pmd_k))
- goto no_context;
- set_pmd(pmd, *pmd_k);
-
- /*
- * Make sure the actual PTE exists as well to
- * catch kernel vmalloc-area accesses to non-mapped
- * addresses. If we don't do this, this will just
- * silently loop forever.
- */
- pte_k = pte_offset_kernel(pmd_k, addr);
- if (!pte_present(*pte_k))
- goto no_context;
-
- /*
- * The kernel assumes that TLBs don't cache invalid
- * entries, but in RISC-V, SFENCE.VMA specifies an
- * ordering constraint, not a cache flush; it is
- * necessary even after writing invalid entries.
- */
- local_flush_tlb_page(addr);
-
- return;
- }
}
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 1dc89303b679..ea933b789a88 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -28,7 +28,18 @@ unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
EXPORT_SYMBOL(empty_zero_page);
extern char _start[];
-void *dtb_early_va;
+#define DTB_EARLY_BASE_VA PGDIR_SIZE
+void *dtb_early_va __initdata;
+uintptr_t dtb_early_pa __initdata;
+
+struct pt_alloc_ops {
+ pte_t *(*get_pte_virt)(phys_addr_t pa);
+ phys_addr_t (*alloc_pte)(uintptr_t va);
+#ifndef __PAGETABLE_PMD_FOLDED
+ pmd_t *(*get_pmd_virt)(phys_addr_t pa);
+ phys_addr_t (*alloc_pmd)(uintptr_t va);
+#endif
+};
static void __init zone_sizes_init(void)
{
@@ -141,8 +152,6 @@ disable:
}
#endif /* CONFIG_BLK_DEV_INITRD */
-static phys_addr_t dtb_early_pa __initdata;
-
void __init setup_bootmem(void)
{
phys_addr_t mem_size = 0;
@@ -194,6 +203,8 @@ void __init setup_bootmem(void)
}
#ifdef CONFIG_MMU
+static struct pt_alloc_ops pt_ops;
+
unsigned long va_pa_offset;
EXPORT_SYMBOL(va_pa_offset);
unsigned long pfn_base;
@@ -202,7 +213,6 @@ EXPORT_SYMBOL(pfn_base);
pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
-static bool mmu_enabled;
#define MAX_EARLY_MAPPING_SIZE SZ_128M
@@ -224,27 +234,46 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
local_flush_tlb_page(addr);
}
-static pte_t *__init get_pte_virt(phys_addr_t pa)
+static inline pte_t *__init get_pte_virt_early(phys_addr_t pa)
{
- if (mmu_enabled) {
- clear_fixmap(FIX_PTE);
- return (pte_t *)set_fixmap_offset(FIX_PTE, pa);
- } else {
- return (pte_t *)((uintptr_t)pa);
- }
+ return (pte_t *)((uintptr_t)pa);
}
-static phys_addr_t __init alloc_pte(uintptr_t va)
+static inline pte_t *__init get_pte_virt_fixmap(phys_addr_t pa)
+{
+ clear_fixmap(FIX_PTE);
+ return (pte_t *)set_fixmap_offset(FIX_PTE, pa);
+}
+
+static inline pte_t *get_pte_virt_late(phys_addr_t pa)
+{
+ return (pte_t *) __va(pa);
+}
+
+static inline phys_addr_t __init alloc_pte_early(uintptr_t va)
{
/*
* We only create PMD or PGD early mappings so we
* should never reach here with MMU disabled.
*/
- BUG_ON(!mmu_enabled);
+ BUG();
+}
+static inline phys_addr_t __init alloc_pte_fixmap(uintptr_t va)
+{
return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
}
+static phys_addr_t alloc_pte_late(uintptr_t va)
+{
+ unsigned long vaddr;
+
+ vaddr = __get_free_page(GFP_KERNEL);
+ if (!vaddr || !pgtable_pte_page_ctor(virt_to_page(vaddr)))
+ BUG();
+ return __pa(vaddr);
+}
+
static void __init create_pte_mapping(pte_t *ptep,
uintptr_t va, phys_addr_t pa,
phys_addr_t sz, pgprot_t prot)
@@ -269,28 +298,46 @@ pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss;
#endif
pmd_t early_pmd[PTRS_PER_PMD * NUM_EARLY_PMDS] __initdata __aligned(PAGE_SIZE);
-static pmd_t *__init get_pmd_virt(phys_addr_t pa)
+static pmd_t *__init get_pmd_virt_early(phys_addr_t pa)
{
- if (mmu_enabled) {
- clear_fixmap(FIX_PMD);
- return (pmd_t *)set_fixmap_offset(FIX_PMD, pa);
- } else {
- return (pmd_t *)((uintptr_t)pa);
- }
+ /* Before MMU is enabled */
+ return (pmd_t *)((uintptr_t)pa);
}
-static phys_addr_t __init alloc_pmd(uintptr_t va)
+static pmd_t *__init get_pmd_virt_fixmap(phys_addr_t pa)
{
- uintptr_t pmd_num;
+ clear_fixmap(FIX_PMD);
+ return (pmd_t *)set_fixmap_offset(FIX_PMD, pa);
+}
- if (mmu_enabled)
- return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
+static pmd_t *get_pmd_virt_late(phys_addr_t pa)
+{
+ return (pmd_t *) __va(pa);
+}
+
+static phys_addr_t __init alloc_pmd_early(uintptr_t va)
+{
+ uintptr_t pmd_num;
pmd_num = (va - PAGE_OFFSET) >> PGDIR_SHIFT;
BUG_ON(pmd_num >= NUM_EARLY_PMDS);
return (uintptr_t)&early_pmd[pmd_num * PTRS_PER_PMD];
}
+static phys_addr_t __init alloc_pmd_fixmap(uintptr_t va)
+{
+ return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
+}
+
+static phys_addr_t alloc_pmd_late(uintptr_t va)
+{
+ unsigned long vaddr;
+
+ vaddr = __get_free_page(GFP_KERNEL);
+ BUG_ON(!vaddr);
+ return __pa(vaddr);
+}
+
static void __init create_pmd_mapping(pmd_t *pmdp,
uintptr_t va, phys_addr_t pa,
phys_addr_t sz, pgprot_t prot)
@@ -306,34 +353,34 @@ static void __init create_pmd_mapping(pmd_t *pmdp,
}
if (pmd_none(pmdp[pmd_idx])) {
- pte_phys = alloc_pte(va);
+ pte_phys = pt_ops.alloc_pte(va);
pmdp[pmd_idx] = pfn_pmd(PFN_DOWN(pte_phys), PAGE_TABLE);
- ptep = get_pte_virt(pte_phys);
+ ptep = pt_ops.get_pte_virt(pte_phys);
memset(ptep, 0, PAGE_SIZE);
} else {
pte_phys = PFN_PHYS(_pmd_pfn(pmdp[pmd_idx]));
- ptep = get_pte_virt(pte_phys);
+ ptep = pt_ops.get_pte_virt(pte_phys);
}
create_pte_mapping(ptep, va, pa, sz, prot);
}
#define pgd_next_t pmd_t
-#define alloc_pgd_next(__va) alloc_pmd(__va)
-#define get_pgd_next_virt(__pa) get_pmd_virt(__pa)
+#define alloc_pgd_next(__va) pt_ops.alloc_pmd(__va)
+#define get_pgd_next_virt(__pa) pt_ops.get_pmd_virt(__pa)
#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \
create_pmd_mapping(__nextp, __va, __pa, __sz, __prot)
#define fixmap_pgd_next fixmap_pmd
#else
#define pgd_next_t pte_t
-#define alloc_pgd_next(__va) alloc_pte(__va)
-#define get_pgd_next_virt(__pa) get_pte_virt(__pa)
+#define alloc_pgd_next(__va) pt_ops.alloc_pte(__va)
+#define get_pgd_next_virt(__pa) pt_ops.get_pte_virt(__pa)
#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \
create_pte_mapping(__nextp, __va, __pa, __sz, __prot)
#define fixmap_pgd_next fixmap_pte
#endif
-static void __init create_pgd_mapping(pgd_t *pgdp,
+void __init create_pgd_mapping(pgd_t *pgdp,
uintptr_t va, phys_addr_t pa,
phys_addr_t sz, pgprot_t prot)
{
@@ -389,10 +436,13 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
asmlinkage void __init setup_vm(uintptr_t dtb_pa)
{
- uintptr_t va, end_va;
+ uintptr_t va, pa, end_va;
uintptr_t load_pa = (uintptr_t)(&_start);
uintptr_t load_sz = (uintptr_t)(&_end) - load_pa;
uintptr_t map_size = best_map_size(load_pa, MAX_EARLY_MAPPING_SIZE);
+#ifndef __PAGETABLE_PMD_FOLDED
+ pmd_t fix_bmap_spmd, fix_bmap_epmd;
+#endif
va_pa_offset = PAGE_OFFSET - load_pa;
pfn_base = PFN_DOWN(load_pa);
@@ -408,6 +458,12 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
BUG_ON((load_pa % map_size) != 0);
BUG_ON(load_sz > MAX_EARLY_MAPPING_SIZE);
+ pt_ops.alloc_pte = alloc_pte_early;
+ pt_ops.get_pte_virt = get_pte_virt_early;
+#ifndef __PAGETABLE_PMD_FOLDED
+ pt_ops.alloc_pmd = alloc_pmd_early;
+ pt_ops.get_pmd_virt = get_pmd_virt_early;
+#endif
/* Setup early PGD for fixmap */
create_pgd_mapping(early_pg_dir, FIXADDR_START,
(uintptr_t)fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
@@ -438,17 +494,44 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
load_pa + (va - PAGE_OFFSET),
map_size, PAGE_KERNEL_EXEC);
- /* Create fixed mapping for early FDT parsing */
- end_va = __fix_to_virt(FIX_FDT) + FIX_FDT_SIZE;
- for (va = __fix_to_virt(FIX_FDT); va < end_va; va += PAGE_SIZE)
- create_pte_mapping(fixmap_pte, va,
- dtb_pa + (va - __fix_to_virt(FIX_FDT)),
- PAGE_SIZE, PAGE_KERNEL);
-
- /* Save pointer to DTB for early FDT parsing */
- dtb_early_va = (void *)fix_to_virt(FIX_FDT) + (dtb_pa & ~PAGE_MASK);
- /* Save physical address for memblock reservation */
+ /* Create two consecutive PGD mappings for FDT early scan */
+ pa = dtb_pa & ~(PGDIR_SIZE - 1);
+ create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
+ pa, PGDIR_SIZE, PAGE_KERNEL);
+ create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA + PGDIR_SIZE,
+ pa + PGDIR_SIZE, PGDIR_SIZE, PAGE_KERNEL);
+ dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PGDIR_SIZE - 1));
dtb_early_pa = dtb_pa;
+
+ /*
+ * Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap
+ * range can not span multiple pmds.
+ */
+ BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
+ != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
+
+#ifndef __PAGETABLE_PMD_FOLDED
+ /*
+ * Early ioremap fixmap is already created as it lies within first 2MB
+ * of fixmap region. We always map PMD_SIZE. Thus, both FIX_BTMAP_END
+ * FIX_BTMAP_BEGIN should lie in the same pmd. Verify that and warn
+ * the user if not.
+ */
+ fix_bmap_spmd = fixmap_pmd[pmd_index(__fix_to_virt(FIX_BTMAP_BEGIN))];
+ fix_bmap_epmd = fixmap_pmd[pmd_index(__fix_to_virt(FIX_BTMAP_END))];
+ if (pmd_val(fix_bmap_spmd) != pmd_val(fix_bmap_epmd)) {
+ WARN_ON(1);
+ pr_warn("fixmap btmap start [%08lx] != end [%08lx]\n",
+ pmd_val(fix_bmap_spmd), pmd_val(fix_bmap_epmd));
+ pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
+ fix_to_virt(FIX_BTMAP_BEGIN));
+ pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n",
+ fix_to_virt(FIX_BTMAP_END));
+
+ pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END);
+ pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN);
+ }
+#endif
}
static void __init setup_vm_final(void)
@@ -457,9 +540,16 @@ static void __init setup_vm_final(void)
phys_addr_t pa, start, end;
u64 i;
- /* Set mmu_enabled flag */
- mmu_enabled = true;
-
+ /**
+ * MMU is enabled at this point. But page table setup is not complete yet.
+ * fixmap page table alloc functions should be used at this point
+ */
+ pt_ops.alloc_pte = alloc_pte_fixmap;
+ pt_ops.get_pte_virt = get_pte_virt_fixmap;
+#ifndef __PAGETABLE_PMD_FOLDED
+ pt_ops.alloc_pmd = alloc_pmd_fixmap;
+ pt_ops.get_pmd_virt = get_pmd_virt_fixmap;
+#endif
/* Setup swapper PGD for fixmap */
create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
__pa_symbol(fixmap_pgd_next),
@@ -488,6 +578,14 @@ static void __init setup_vm_final(void)
/* Move to swapper page table */
csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE);
local_flush_tlb_all();
+
+ /* generic page allocation functions must be used to setup page table */
+ pt_ops.alloc_pte = alloc_pte_late;
+ pt_ops.get_pte_virt = get_pte_virt_late;
+#ifndef __PAGETABLE_PMD_FOLDED
+ pt_ops.alloc_pmd = alloc_pmd_late;
+ pt_ops.get_pmd_virt = get_pmd_virt_late;
+#endif
}
#else
asmlinkage void __init setup_vm(uintptr_t dtb_pa)
diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c
index 0831c2e61a8f..ace74dec7492 100644
--- a/arch/riscv/mm/ptdump.c
+++ b/arch/riscv/mm/ptdump.c
@@ -3,6 +3,7 @@
* Copyright (C) 2019 SiFive
*/
+#include <linux/efi.h>
#include <linux/init.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
@@ -49,6 +50,14 @@ struct addr_marker {
const char *name;
};
+/* Private information for debugfs */
+struct ptd_mm_info {
+ struct mm_struct *mm;
+ const struct addr_marker *markers;
+ unsigned long base_addr;
+ unsigned long end;
+};
+
static struct addr_marker address_markers[] = {
#ifdef CONFIG_KASAN
{KASAN_SHADOW_START, "Kasan shadow start"},
@@ -68,6 +77,28 @@ static struct addr_marker address_markers[] = {
{-1, NULL},
};
+static struct ptd_mm_info kernel_ptd_info = {
+ .mm = &init_mm,
+ .markers = address_markers,
+ .base_addr = KERN_VIRT_START,
+ .end = ULONG_MAX,
+};
+
+#ifdef CONFIG_EFI
+static struct addr_marker efi_addr_markers[] = {
+ { 0, "UEFI runtime start" },
+ { SZ_1G, "UEFI runtime end" },
+ { -1, NULL }
+};
+
+static struct ptd_mm_info efi_ptd_info = {
+ .mm = &efi_mm,
+ .markers = efi_addr_markers,
+ .base_addr = 0,
+ .end = SZ_2G,
+};
+#endif
+
/* Page Table Entry */
struct prot_bits {
u64 mask;
@@ -245,22 +276,22 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr,
}
}
-static void ptdump_walk(struct seq_file *s)
+static void ptdump_walk(struct seq_file *s, struct ptd_mm_info *pinfo)
{
struct pg_state st = {
.seq = s,
- .marker = address_markers,
+ .marker = pinfo->markers,
.level = -1,
.ptdump = {
.note_page = note_page,
.range = (struct ptdump_range[]) {
- {KERN_VIRT_START, ULONG_MAX},
+ {pinfo->base_addr, pinfo->end},
{0, 0}
}
}
};
- ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
+ ptdump_walk_pgd(&st.ptdump, pinfo->mm, NULL);
}
void ptdump_check_wx(void)
@@ -293,7 +324,7 @@ void ptdump_check_wx(void)
static int ptdump_show(struct seq_file *m, void *v)
{
- ptdump_walk(m);
+ ptdump_walk(m, m->private);
return 0;
}
@@ -308,8 +339,13 @@ static int ptdump_init(void)
for (j = 0; j < ARRAY_SIZE(pte_bits); j++)
pg_level[i].mask |= pte_bits[j].mask;
- debugfs_create_file("kernel_page_tables", 0400, NULL, NULL,
+ debugfs_create_file("kernel_page_tables", 0400, NULL, &kernel_ptd_info,
&ptdump_fops);
+#ifdef CONFIG_EFI
+ if (efi_enabled(EFI_RUNTIME_SERVICES))
+ debugfs_create_file("efi_page_tables", 0400, NULL, &efi_ptd_info,
+ &ptdump_fops);
+#endif
return 0;
}
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index d509bf23ef78..4a00351dec89 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -60,6 +60,7 @@ config S390
def_bool y
select ARCH_BINFMT_ELF_STATE
select ARCH_HAS_DEBUG_VM_PGTABLE
+ select ARCH_HAS_DEBUG_WX
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FORTIFY_SOURCE
@@ -73,6 +74,7 @@ config S390
select ARCH_HAS_STRICT_MODULE_RWX
select ARCH_HAS_SYSCALL_WRAPPER
select ARCH_HAS_UBSAN_SANITIZE_ALL
+ select ARCH_HAS_VDSO_DATA
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_INLINE_READ_LOCK
select ARCH_INLINE_READ_LOCK_BH
@@ -118,6 +120,8 @@ config S390
select GENERIC_CPU_AUTOPROBE
select GENERIC_CPU_VULNERABILITIES
select GENERIC_FIND_FIRST_BIT
+ select GENERIC_GETTIMEOFDAY
+ select GENERIC_PTDUMP
select GENERIC_SMP_IDLE_THREAD
select GENERIC_TIME_VSYSCALL
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
@@ -149,6 +153,7 @@ config S390
select HAVE_FUNCTION_TRACER
select HAVE_FUTEX_CMPXCHG if FUTEX
select HAVE_GCC_PLUGINS
+ select HAVE_GENERIC_VDSO
select HAVE_KERNEL_BZIP2
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_LZ4
diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug
index 761fe2b0b2f6..ab48b694ade8 100644
--- a/arch/s390/Kconfig.debug
+++ b/arch/s390/Kconfig.debug
@@ -3,17 +3,5 @@
config TRACE_IRQFLAGS_SUPPORT
def_bool y
-config S390_PTDUMP
- bool "Export kernel pagetable layout to userspace via debugfs"
- depends on DEBUG_KERNEL
- select DEBUG_FS
- help
- Say Y here if you want to show the kernel pagetable layout in a
- debugfs file. This information is only useful for kernel developers
- who are working in architecture specific areas of the kernel.
- It is probably not a good idea to enable this feature in a production
- kernel.
- If in doubt, say "N"
-
config EARLY_PRINTK
def_bool y
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index 45b33b83de08..41a64b8dce25 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -73,7 +73,3 @@ $(obj)/startup.a: $(OBJECTS) FORCE
install:
sh -x $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/bzImage \
System.map "$(INSTALL_PATH)"
-
-chkbss := $(obj-y)
-chkbss-target := startup.a
-include $(srctree)/arch/s390/scripts/Makefile.chkbss
diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile
index fa529c5b4486..b235ed95a3d8 100644
--- a/arch/s390/boot/compressed/Makefile
+++ b/arch/s390/boot/compressed/Makefile
@@ -62,7 +62,3 @@ $(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) FORCE
OBJCOPYFLAGS_piggy.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.vmlinux.bin.compressed
$(obj)/piggy.o: $(obj)/vmlinux.bin$(suffix-y) FORCE
$(call if_changed,objcopy)
-
-chkbss := $(filter-out piggy.o info.o, $(obj-y))
-chkbss-target := vmlinux.bin
-include $(srctree)/arch/s390/scripts/Makefile.chkbss
diff --git a/arch/s390/boot/compressed/decompressor.c b/arch/s390/boot/compressed/decompressor.c
index 368fd372c875..3061b11c4d27 100644
--- a/arch/s390/boot/compressed/decompressor.c
+++ b/arch/s390/boot/compressed/decompressor.c
@@ -16,7 +16,6 @@
* gzip declarations
*/
#define STATIC static
-#define STATIC_RW_DATA static __section(.data)
#undef memset
#undef memcpy
diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S
index 44561b2c3712..9427e2cd0c15 100644
--- a/arch/s390/boot/compressed/vmlinux.lds.S
+++ b/arch/s390/boot/compressed/vmlinux.lds.S
@@ -59,6 +59,19 @@ SECTIONS
BOOT_DATA_PRESERVED
/*
+ * This is the BSS section of the decompressor and not of the decompressed Linux kernel.
+ * It will consume place in the decompressor's image.
+ */
+ . = ALIGN(8);
+ .bss : {
+ _bss = . ;
+ *(.bss)
+ *(.bss.*)
+ *(COMMON)
+ _ebss = .;
+ }
+
+ /*
* uncompressed image info used by the decompressor it should match
* struct vmlinux_info. It comes from .vmlinux.info section of
* uncompressed vmlinux in a form of info.o
@@ -81,15 +94,6 @@ SECTIONS
FILL(0xff);
. = ALIGN(4096);
}
- . = ALIGN(256);
- .bss : {
- _bss = . ;
- *(.bss)
- *(.bss.*)
- *(COMMON)
- . = ALIGN(8); /* For convenience during zeroing */
- _ebss = .;
- }
_end = .;
/* Sections to be discarded */
diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S
index dae10961d072..1a2c2b1ed964 100644
--- a/arch/s390/boot/head.S
+++ b/arch/s390/boot/head.S
@@ -360,22 +360,23 @@ ENTRY(startup_kdump)
# the save area and does disabled wait with a faulty address.
#
ENTRY(startup_pgm_check_handler)
- stmg %r0,%r15,__LC_SAVE_AREA_SYNC
- la %r1,4095
- stctg %c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r1)
- mvc __LC_GPREGS_SAVE_AREA-4095(128,%r1),__LC_SAVE_AREA_SYNC
- mvc __LC_PSW_SAVE_AREA-4095(16,%r1),__LC_PGM_OLD_PSW
+ stmg %r8,%r15,__LC_SAVE_AREA_SYNC
+ la %r8,4095
+ stctg %c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r8)
+ stmg %r0,%r7,__LC_GPREGS_SAVE_AREA-4095(%r8)
+ mvc __LC_GPREGS_SAVE_AREA-4095+64(64,%r8),__LC_SAVE_AREA_SYNC
+ mvc __LC_PSW_SAVE_AREA-4095(16,%r8),__LC_PGM_OLD_PSW
mvc __LC_RETURN_PSW(16),__LC_PGM_OLD_PSW
ni __LC_RETURN_PSW,0xfc # remove IO and EX bits
ni __LC_RETURN_PSW+1,0xfb # remove MCHK bit
oi __LC_RETURN_PSW+1,0x2 # set wait state bit
- larl %r2,.Lold_psw_disabled_wait
- stg %r2,__LC_PGM_NEW_PSW+8
- l %r15,.Ldump_info_stack-.Lold_psw_disabled_wait(%r2)
+ larl %r9,.Lold_psw_disabled_wait
+ stg %r9,__LC_PGM_NEW_PSW+8
+ l %r15,.Ldump_info_stack-.Lold_psw_disabled_wait(%r9)
brasl %r14,print_pgm_check_info
.Lold_psw_disabled_wait:
- la %r1,4095
- lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)
+ la %r8,4095
+ lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r8)
lpswe __LC_RETURN_PSW # disabled wait
.Ldump_info_stack:
.long 0x5000 + PAGE_SIZE - STACK_FRAME_OVERHEAD
diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c
index 8e222a666025..f94b91d72620 100644
--- a/arch/s390/boot/ipl_parm.c
+++ b/arch/s390/boot/ipl_parm.c
@@ -21,7 +21,7 @@ unsigned long __bootdata(memory_end);
int __bootdata(memory_end_set);
int __bootdata(noexec_disabled);
-int kaslr_enabled __section(.data);
+int kaslr_enabled;
static inline int __diag308(unsigned long subcode, void *addr)
{
@@ -70,30 +70,44 @@ static size_t scpdata_length(const u8 *buf, size_t count)
static size_t ipl_block_get_ascii_scpdata(char *dest, size_t size,
const struct ipl_parameter_block *ipb)
{
- size_t count;
- size_t i;
+ const __u8 *scp_data;
+ __u32 scp_data_len;
int has_lowercase;
+ size_t count = 0;
+ size_t i;
+
+ switch (ipb->pb0_hdr.pbt) {
+ case IPL_PBT_FCP:
+ scp_data_len = ipb->fcp.scp_data_len;
+ scp_data = ipb->fcp.scp_data;
+ break;
+ case IPL_PBT_NVME:
+ scp_data_len = ipb->nvme.scp_data_len;
+ scp_data = ipb->nvme.scp_data;
+ break;
+ default:
+ goto out;
+ }
- count = min(size - 1, scpdata_length(ipb->fcp.scp_data,
- ipb->fcp.scp_data_len));
+ count = min(size - 1, scpdata_length(scp_data, scp_data_len));
if (!count)
goto out;
has_lowercase = 0;
for (i = 0; i < count; i++) {
- if (!isascii(ipb->fcp.scp_data[i])) {
+ if (!isascii(scp_data[i])) {
count = 0;
goto out;
}
- if (!has_lowercase && islower(ipb->fcp.scp_data[i]))
+ if (!has_lowercase && islower(scp_data[i]))
has_lowercase = 1;
}
if (has_lowercase)
- memcpy(dest, ipb->fcp.scp_data, count);
+ memcpy(dest, scp_data, count);
else
for (i = 0; i < count; i++)
- dest[i] = tolower(ipb->fcp.scp_data[i]);
+ dest[i] = tolower(scp_data[i]);
out:
dest[count] = '\0';
return count;
@@ -115,6 +129,7 @@ static void append_ipl_block_parm(void)
parm, COMMAND_LINE_SIZE - len - 1, &ipl_block);
break;
case IPL_PBT_FCP:
+ case IPL_PBT_NVME:
rc = ipl_block_get_ascii_scpdata(
parm, COMMAND_LINE_SIZE - len - 1, &ipl_block);
break;
@@ -209,7 +224,7 @@ static void modify_fac_list(char *str)
check_cleared_facilities();
}
-static char command_line_buf[COMMAND_LINE_SIZE] __section(.data);
+static char command_line_buf[COMMAND_LINE_SIZE];
void parse_boot_command_line(void)
{
char *param, *val;
@@ -230,7 +245,7 @@ void parse_boot_command_line(void)
if (!strcmp(param, "vmalloc") && val)
vmalloc_size = round_up(memparse(val, NULL), PAGE_SIZE);
- if (!strcmp(param, "dfltcc")) {
+ if (!strcmp(param, "dfltcc") && val) {
if (!strcmp(val, "off"))
zlib_dfltcc_support = ZLIB_DFLTCC_DISABLED;
else if (!strcmp(val, "on"))
@@ -254,17 +269,34 @@ void parse_boot_command_line(void)
if (!strcmp(param, "nokaslr"))
kaslr_enabled = 0;
+
+#if IS_ENABLED(CONFIG_KVM)
+ if (!strcmp(param, "prot_virt")) {
+ rc = kstrtobool(val, &enabled);
+ if (!rc && enabled)
+ prot_virt_host = 1;
+ }
+#endif
}
}
+static inline bool is_ipl_block_dump(void)
+{
+ if (ipl_block.pb0_hdr.pbt == IPL_PBT_FCP &&
+ ipl_block.fcp.opt == IPL_PB0_FCP_OPT_DUMP)
+ return true;
+ if (ipl_block.pb0_hdr.pbt == IPL_PBT_NVME &&
+ ipl_block.nvme.opt == IPL_PB0_NVME_OPT_DUMP)
+ return true;
+ return false;
+}
+
void setup_memory_end(void)
{
#ifdef CONFIG_CRASH_DUMP
if (OLDMEM_BASE) {
kaslr_enabled = 0;
- } else if (ipl_block_valid &&
- ipl_block.pb0_hdr.pbt == IPL_PBT_FCP &&
- ipl_block.fcp.opt == IPL_PB0_FCP_OPT_DUMP) {
+ } else if (ipl_block_valid && is_ipl_block_dump()) {
kaslr_enabled = 0;
if (!sclp_early_get_hsa_size(&memory_end) && memory_end)
memory_end_set = 1;
diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c
index d4442163ffa9..d844a5ef9089 100644
--- a/arch/s390/boot/kaslr.c
+++ b/arch/s390/boot/kaslr.c
@@ -42,7 +42,7 @@ static int check_prng(void)
return PRNG_MODE_TDES;
}
-static unsigned long get_random(unsigned long limit)
+static int get_random(unsigned long limit, unsigned long *value)
{
struct prng_parm prng = {
/* initial parameter block for tdes mode, copied from libica */
@@ -84,19 +84,101 @@ static unsigned long get_random(unsigned long limit)
(u8 *) &random, sizeof(random));
break;
default:
- random = 0;
+ return -1;
}
- return random % limit;
+ *value = random % limit;
+ return 0;
+}
+
+/*
+ * To randomize kernel base address we have to consider several facts:
+ * 1. physical online memory might not be continuous and have holes. mem_detect
+ * info contains list of online memory ranges we should consider.
+ * 2. we have several memory regions which are occupied and we should not
+ * overlap and destroy them. Currently safe_addr tells us the border below
+ * which all those occupied regions are. We are safe to use anything above
+ * safe_addr.
+ * 3. the upper limit might apply as well, even if memory above that limit is
+ * online. Currently those limitations are:
+ * 3.1. Limit set by "mem=" kernel command line option
+ * 3.2. memory reserved at the end for kasan initialization.
+ * 4. kernel base address must be aligned to THREAD_SIZE (kernel stack size).
+ * Which is required for CONFIG_CHECK_STACK. Currently THREAD_SIZE is 4 pages
+ * (16 pages when the kernel is built with kasan enabled)
+ * Assumptions:
+ * 1. kernel size (including .bss size) and upper memory limit are page aligned.
+ * 2. mem_detect memory region start is THREAD_SIZE aligned / end is PAGE_SIZE
+ * aligned (in practice memory configurations granularity on z/VM and LPAR
+ * is 1mb).
+ *
+ * To guarantee uniform distribution of kernel base address among all suitable
+ * addresses we generate random value just once. For that we need to build a
+ * continuous range in which every value would be suitable. We can build this
+ * range by simply counting all suitable addresses (let's call them positions)
+ * which would be valid as kernel base address. To count positions we iterate
+ * over online memory ranges. For each range which is big enough for the
+ * kernel image we count all suitable addresses we can put the kernel image at
+ * that is
+ * (end - start - kernel_size) / THREAD_SIZE + 1
+ * Two functions count_valid_kernel_positions and position_to_address help
+ * to count positions in memory range given and then convert position back
+ * to address.
+ */
+static unsigned long count_valid_kernel_positions(unsigned long kernel_size,
+ unsigned long _min,
+ unsigned long _max)
+{
+ unsigned long start, end, pos = 0;
+ int i;
+
+ for_each_mem_detect_block(i, &start, &end) {
+ if (_min >= end)
+ continue;
+ if (start >= _max)
+ break;
+ start = max(_min, start);
+ end = min(_max, end);
+ if (end - start < kernel_size)
+ continue;
+ pos += (end - start - kernel_size) / THREAD_SIZE + 1;
+ }
+
+ return pos;
+}
+
+static unsigned long position_to_address(unsigned long pos, unsigned long kernel_size,
+ unsigned long _min, unsigned long _max)
+{
+ unsigned long start, end;
+ int i;
+
+ for_each_mem_detect_block(i, &start, &end) {
+ if (_min >= end)
+ continue;
+ if (start >= _max)
+ break;
+ start = max(_min, start);
+ end = min(_max, end);
+ if (end - start < kernel_size)
+ continue;
+ if ((end - start - kernel_size) / THREAD_SIZE + 1 >= pos)
+ return start + (pos - 1) * THREAD_SIZE;
+ pos -= (end - start - kernel_size) / THREAD_SIZE + 1;
+ }
+
+ return 0;
}
unsigned long get_random_base(unsigned long safe_addr)
{
- unsigned long memory_limit = memory_end_set ? memory_end : 0;
- unsigned long base, start, end, kernel_size;
- unsigned long block_sum, offset;
+ unsigned long memory_limit = get_mem_detect_end();
+ unsigned long base_pos, max_pos, kernel_size;
unsigned long kasan_needs;
int i;
+ if (memory_end_set)
+ memory_limit = min(memory_limit, memory_end);
+
if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE) {
if (safe_addr < INITRD_START + INITRD_SIZE)
safe_addr = INITRD_START + INITRD_SIZE;
@@ -126,45 +208,17 @@ unsigned long get_random_base(unsigned long safe_addr)
}
kernel_size = vmlinux.image_size + vmlinux.bss_size;
- block_sum = 0;
- for_each_mem_detect_block(i, &start, &end) {
- if (memory_limit) {
- if (start >= memory_limit)
- break;
- if (end > memory_limit)
- end = memory_limit;
- }
- if (end - start < kernel_size)
- continue;
- block_sum += end - start - kernel_size;
- }
- if (!block_sum) {
+ if (safe_addr + kernel_size > memory_limit)
+ return 0;
+
+ max_pos = count_valid_kernel_positions(kernel_size, safe_addr, memory_limit);
+ if (!max_pos) {
sclp_early_printk("KASLR disabled: not enough memory\n");
return 0;
}
- base = get_random(block_sum);
- if (base == 0)
+ /* we need a value in the range [1, base_pos] inclusive */
+ if (get_random(max_pos, &base_pos))
return 0;
- if (base < safe_addr)
- base = safe_addr;
- block_sum = offset = 0;
- for_each_mem_detect_block(i, &start, &end) {
- if (memory_limit) {
- if (start >= memory_limit)
- break;
- if (end > memory_limit)
- end = memory_limit;
- }
- if (end - start < kernel_size)
- continue;
- block_sum += end - start - kernel_size;
- if (base <= block_sum) {
- base = start + base - offset;
- base = ALIGN_DOWN(base, THREAD_SIZE);
- break;
- }
- offset = block_sum;
- }
- return base;
+ return position_to_address(base_pos + 1, kernel_size, safe_addr, memory_limit);
}
diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c
index 83b5b7915c32..a3c9862bcede 100644
--- a/arch/s390/boot/pgm_check_info.c
+++ b/arch/s390/boot/pgm_check_info.c
@@ -2,6 +2,7 @@
#include <linux/kernel.h>
#include <linux/string.h>
#include <asm/lowcore.h>
+#include <asm/setup.h>
#include <asm/sclp.h>
#include "boot.h"
@@ -32,7 +33,8 @@ void print_pgm_check_info(void)
char *p;
add_str(buf, "Linux version ");
- strlcat(buf, kernel_version, sizeof(buf));
+ strlcat(buf, kernel_version, sizeof(buf) - 1);
+ strlcat(buf, "\n", sizeof(buf));
sclp_early_printk(buf);
p = add_str(buf, "Kernel fault: interruption code ");
@@ -42,6 +44,13 @@ void print_pgm_check_info(void)
add_str(p, "\n");
sclp_early_printk(buf);
+ if (kaslr_enabled) {
+ p = add_str(buf, "Kernel random base: ");
+ p = add_val_as_hex(p, __kaslr_offset);
+ add_str(p, "\n");
+ sclp_early_printk(buf);
+ }
+
p = add_str(buf, "PSW : ");
p = add_val_as_hex(p, S390_lowcore.psw_save_area.mask);
p = add_str(p, " ");
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index 3b3a11f95269..90842936545b 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -48,8 +48,6 @@ struct diag_ops __bootdata_preserved(diag_dma_ops) = {
};
static struct diag210 _diag210_tmp_dma __section(.dma.data);
struct diag210 *__bootdata_preserved(__diag210_tmp_dma) = &_diag210_tmp_dma;
-void _swsusp_reset_dma(void);
-unsigned long __bootdata_preserved(__swsusp_reset_dma) = __pa(_swsusp_reset_dma);
void error(char *x)
{
@@ -120,6 +118,9 @@ static void handle_relocs(unsigned long offset)
}
}
+/*
+ * This function clears the BSS section of the decompressed Linux kernel and NOT the decompressor's.
+ */
static void clear_bss_section(void)
{
memset((void *)vmlinux.default_lma + vmlinux.image_size, 0, vmlinux.bss_size);
diff --git a/arch/s390/boot/text_dma.S b/arch/s390/boot/text_dma.S
index 9715715c4c28..f7c77cd518f2 100644
--- a/arch/s390/boot/text_dma.S
+++ b/arch/s390/boot/text_dma.S
@@ -97,23 +97,6 @@ ENTRY(_diag0c_dma)
ENDPROC(_diag0c_dma)
/*
- * void _swsusp_reset_dma(void)
- */
-ENTRY(_swsusp_reset_dma)
- larl %r1,restart_entry
- larl %r2,.Lrestart_diag308_psw
- og %r1,0(%r2)
- stg %r1,0(%r0)
- lghi %r0,0
- diag %r0,%r0,0x308
-restart_entry:
- lhi %r1,1
- sigp %r1,%r0,SIGP_SET_ARCHITECTURE
- sam64
- BR_EX_DMA_r14
-ENDPROC(_swsusp_reset_dma)
-
-/*
* void _diag308_reset_dma(void)
*
* Calls diag 308 subcode 1 and continues execution
diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c
index f887a479cdc7..a15c033f53ca 100644
--- a/arch/s390/boot/uv.c
+++ b/arch/s390/boot/uv.c
@@ -7,6 +7,9 @@
#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
int __bootdata_preserved(prot_virt_guest);
#endif
+#if IS_ENABLED(CONFIG_KVM)
+int __bootdata_preserved(prot_virt_host);
+#endif
struct uv_info __bootdata_preserved(uv_info);
void uv_query_info(void)
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 7228aabe9da6..0784bf3caf43 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -775,6 +775,8 @@ CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_PAGEALLOC=y
CONFIG_PAGE_OWNER=y
CONFIG_DEBUG_RODATA_TEST=y
+CONFIG_DEBUG_WX=y
+CONFIG_PTDUMP_DEBUGFS=y
CONFIG_DEBUG_OBJECTS=y
CONFIG_DEBUG_OBJECTS_SELFTEST=y
CONFIG_DEBUG_OBJECTS_FREE=y
@@ -822,7 +824,6 @@ CONFIG_FTRACE_SYSCALLS=y
CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_BPF_KPROBE_OVERRIDE=y
CONFIG_HIST_TRIGGERS=y
-CONFIG_S390_PTDUMP=y
CONFIG_NOTIFIER_ERROR_INJECTION=m
CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m
CONFIG_FAULT_INJECTION=y
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index fab03b7a6932..905bc8c4cfaf 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -759,6 +759,8 @@ CONFIG_GDB_SCRIPTS=y
CONFIG_FRAME_WARN=1024
CONFIG_DEBUG_SECTION_MISMATCH=y
CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_WX=y
+CONFIG_PTDUMP_DEBUGFS=y
CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_PANIC_ON_OOPS=y
CONFIG_TEST_LOCKUP=m
@@ -775,7 +777,6 @@ CONFIG_FTRACE_SYSCALLS=y
CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_BPF_KPROBE_OVERRIDE=y
CONFIG_HIST_TRIGGERS=y
-CONFIG_S390_PTDUMP=y
CONFIG_LKDTM=m
CONFIG_PERCPU_TEST=m
CONFIG_ATOMIC64_SELFTEST=y
diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h
index 6813bfa1eeb7..a8c02cfbc712 100644
--- a/arch/s390/include/asm/checksum.h
+++ b/arch/s390/include/asm/checksum.h
@@ -13,21 +13,21 @@
#define _S390_CHECKSUM_H
#include <linux/uaccess.h>
+#include <linux/in6.h>
/*
- * computes the checksum of a memory block at buff, length len,
- * and adds in "sum" (32-bit)
+ * Computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit).
*
- * returns a 32-bit number suitable for feeding into itself
- * or csum_tcpudp_magic
+ * Returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic.
*
- * this function must be called with even lengths, except
- * for the last fragment, which may be odd
+ * This function must be called with even lengths, except
+ * for the last fragment, which may be odd.
*
- * it's best to have buff aligned on a 32-bit boundary
+ * It's best to have buff aligned on a 32-bit boundary.
*/
-static inline __wsum
-csum_partial(const void *buff, int len, __wsum sum)
+static inline __wsum csum_partial(const void *buff, int len, __wsum sum)
{
register unsigned long reg2 asm("2") = (unsigned long) buff;
register unsigned long reg3 asm("3") = (unsigned long) len;
@@ -40,74 +40,91 @@ csum_partial(const void *buff, int len, __wsum sum)
}
/*
- * Fold a partial checksum without adding pseudo headers
+ * Fold a partial checksum without adding pseudo headers.
*/
static inline __sum16 csum_fold(__wsum sum)
{
u32 csum = (__force u32) sum;
- csum += (csum >> 16) + (csum << 16);
+ csum += (csum >> 16) | (csum << 16);
csum >>= 16;
return (__force __sum16) ~csum;
}
/*
- * This is a version of ip_compute_csum() optimized for IP headers,
- * which always checksum on 4 octet boundaries.
- *
+ * This is a version of ip_compute_csum() optimized for IP headers,
+ * which always checksums on 4 octet boundaries.
*/
static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
{
- return csum_fold(csum_partial(iph, ihl*4, 0));
+ __u64 csum = 0;
+ __u32 *ptr = (u32 *)iph;
+
+ csum += *ptr++;
+ csum += *ptr++;
+ csum += *ptr++;
+ csum += *ptr++;
+ ihl -= 4;
+ while (ihl--)
+ csum += *ptr++;
+ csum += (csum >> 32) | (csum << 32);
+ return csum_fold((__force __wsum)(csum >> 32));
}
/*
- * computes the checksum of the TCP/UDP pseudo-header
- * returns a 32-bit checksum
+ * Computes the checksum of the TCP/UDP pseudo-header.
+ * Returns a 32-bit checksum.
*/
-static inline __wsum
-csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto,
- __wsum sum)
+static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len,
+ __u8 proto, __wsum sum)
{
- __u32 csum = (__force __u32)sum;
+ __u64 csum = (__force __u64)sum;
csum += (__force __u32)saddr;
- if (csum < (__force __u32)saddr)
- csum++;
-
csum += (__force __u32)daddr;
- if (csum < (__force __u32)daddr)
- csum++;
-
- csum += len + proto;
- if (csum < len + proto)
- csum++;
-
- return (__force __wsum)csum;
+ csum += len;
+ csum += proto;
+ csum += (csum >> 32) | (csum << 32);
+ return (__force __wsum)(csum >> 32);
}
/*
- * computes the checksum of the TCP/UDP pseudo-header
- * returns a 16-bit checksum, already complemented
+ * Computes the checksum of the TCP/UDP pseudo-header.
+ * Returns a 16-bit checksum, already complemented.
*/
-
-static inline __sum16
-csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto,
- __wsum sum)
+static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len,
+ __u8 proto, __wsum sum)
{
- return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum));
+ return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
}
/*
- * this routine is used for miscellaneous IP-like checksums, mainly
- * in icmp.c
+ * Used for miscellaneous IP-like checksums, mainly icmp.
*/
-
static inline __sum16 ip_compute_csum(const void *buff, int len)
{
return csum_fold(csum_partial(buff, len, 0));
}
-#endif /* _S390_CHECKSUM_H */
-
+#define _HAVE_ARCH_IPV6_CSUM
+static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ __u32 len, __u8 proto, __wsum csum)
+{
+ __u64 sum = (__force __u64)csum;
+
+ sum += (__force __u32)saddr->s6_addr32[0];
+ sum += (__force __u32)saddr->s6_addr32[1];
+ sum += (__force __u32)saddr->s6_addr32[2];
+ sum += (__force __u32)saddr->s6_addr32[3];
+ sum += (__force __u32)daddr->s6_addr32[0];
+ sum += (__force __u32)daddr->s6_addr32[1];
+ sum += (__force __u32)daddr->s6_addr32[2];
+ sum += (__force __u32)daddr->s6_addr32[3];
+ sum += len;
+ sum += proto;
+ sum += (sum >> 32) | (sum << 32);
+ return csum_fold((__force __wsum)(sum >> 32));
+}
+#endif /* _S390_CHECKSUM_H */
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
index b5bfb3123cb1..5c58756d6476 100644
--- a/arch/s390/include/asm/cio.h
+++ b/arch/s390/include/asm/cio.h
@@ -356,7 +356,6 @@ static inline u8 pathmask_to_pos(u8 mask)
return 8 - ffs(mask);
}
-void channel_subsystem_reinit(void);
extern void css_schedule_reprobe(void);
extern void *cio_dma_zalloc(size_t size);
@@ -372,6 +371,7 @@ struct gen_pool *cio_gp_dma_create(struct device *dma_dev, int nr_pages);
/* Function from drivers/s390/cio/chsc.c */
int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta);
int chsc_sstpi(void *page, void *result, size_t size);
+int chsc_stzi(void *page, void *result, size_t size);
int chsc_sgib(u32 origin);
#endif
diff --git a/arch/s390/include/asm/clocksource.h b/arch/s390/include/asm/clocksource.h
new file mode 100644
index 000000000000..03434369fce4
--- /dev/null
+++ b/arch/s390/include/asm/clocksource.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* s390-specific clocksource additions */
+
+#ifndef _ASM_S390_CLOCKSOURCE_H
+#define _ASM_S390_CLOCKSOURCE_H
+
+#endif /* _ASM_S390_CLOCKSOURCE_H */
diff --git a/arch/s390/include/asm/clp.h b/arch/s390/include/asm/clp.h
index 3925b0f085b7..10919eeb7533 100644
--- a/arch/s390/include/asm/clp.h
+++ b/arch/s390/include/asm/clp.h
@@ -5,6 +5,9 @@
/* CLP common request & response block size */
#define CLP_BLK_SIZE PAGE_SIZE
+/* Call Logical Processor - Command Code */
+#define CLP_SLPC 0x0001
+
#define CLP_LPS_BASE 0
#define CLP_LPS_PCI 2
diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h
index a816fb4734b8..40264f60b0da 100644
--- a/arch/s390/include/asm/gmap.h
+++ b/arch/s390/include/asm/gmap.h
@@ -140,8 +140,6 @@ int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte);
void gmap_register_pte_notifier(struct gmap_notifier *);
void gmap_unregister_pte_notifier(struct gmap_notifier *);
-void gmap_pte_notify(struct mm_struct *, unsigned long addr, pte_t *,
- unsigned long bits);
int gmap_mprotect_notify(struct gmap *, unsigned long start,
unsigned long len, int prot);
diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h
index da014e4f8113..28664ee0abc1 100644
--- a/arch/s390/include/asm/io.h
+++ b/arch/s390/include/asm/io.h
@@ -12,6 +12,7 @@
#include <linux/kernel.h>
#include <asm/page.h>
+#include <asm/pgtable.h>
#include <asm/pci_io.h>
#define xlate_dev_mem_ptr xlate_dev_mem_ptr
@@ -26,7 +27,10 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr);
#define IO_SPACE_LIMIT 0
+void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot);
void __iomem *ioremap(phys_addr_t addr, size_t size);
+void __iomem *ioremap_wc(phys_addr_t addr, size_t size);
+void __iomem *ioremap_wt(phys_addr_t addr, size_t size);
void iounmap(volatile void __iomem *addr);
static inline void __iomem *ioport_map(unsigned long port, unsigned int nr)
@@ -52,6 +56,10 @@ static inline void ioport_unmap(void __iomem *p)
#define pci_iomap_wc pci_iomap_wc
#define pci_iomap_wc_range pci_iomap_wc_range
+#define ioremap ioremap
+#define ioremap_wt ioremap_wt
+#define ioremap_wc ioremap_wc
+
#define memcpy_fromio(dst, src, count) zpci_memcpy_fromio(dst, src, count)
#define memcpy_toio(dst, src, count) zpci_memcpy_toio(dst, src, count)
#define memset_io(dst, val, count) zpci_memset_io(dst, val, count)
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index 7d5cfdda5277..a9e2c7295b35 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -66,6 +66,7 @@ enum ipl_type {
IPL_TYPE_FCP_DUMP = 8,
IPL_TYPE_NSS = 16,
IPL_TYPE_NVME = 32,
+ IPL_TYPE_NVME_DUMP = 64,
};
struct ipl_info
@@ -94,6 +95,12 @@ extern struct ipl_info ipl_info;
extern void setup_ipl(void);
extern void set_os_info_reipl_block(void);
+static inline bool is_ipl_type_dump(void)
+{
+ return (ipl_info.type == IPL_TYPE_FCP_DUMP) ||
+ (ipl_info.type == IPL_TYPE_NVME_DUMP);
+}
+
struct ipl_report {
struct ipl_parameter_block *ipib;
struct list_head components;
diff --git a/arch/s390/include/asm/kasan.h b/arch/s390/include/asm/kasan.h
index 89d6886040c8..e9bf486de136 100644
--- a/arch/s390/include/asm/kasan.h
+++ b/arch/s390/include/asm/kasan.h
@@ -19,6 +19,7 @@
extern void kasan_early_init(void);
extern void kasan_copy_shadow(pgd_t *dst);
extern void kasan_free_early_identity(void);
+extern unsigned long kasan_vmax;
#else
static inline void kasan_early_init(void) { }
static inline void kasan_copy_shadow(pgd_t *dst) { }
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 99b92c3e46b0..b5380a251df2 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -208,9 +208,8 @@ int zpci_unregister_ioat(struct zpci_dev *, u8);
void zpci_remove_reserved_devices(void);
/* CLP */
+int clp_setup_writeback_mio(void);
int clp_scan_pci_devices(void);
-int clp_rescan_pci_devices(void);
-int clp_rescan_pci_devices_simple(u32 *fid);
int clp_add_pci_device(u32, u32, int);
int clp_enable_fh(struct zpci_dev *, u8);
int clp_disable_fh(struct zpci_dev *);
@@ -232,12 +231,10 @@ static inline bool zpci_use_mio(struct zpci_dev *zdev)
/* Error handling and recovery */
void zpci_event_error(void *);
void zpci_event_availability(void *);
-void zpci_rescan(void);
bool zpci_is_enabled(void);
#else /* CONFIG_PCI */
static inline void zpci_event_error(void *e) {}
static inline void zpci_event_availability(void *e) {}
-static inline void zpci_rescan(void) {}
#endif /* CONFIG_PCI */
#ifdef CONFIG_HOTPLUG_PCI_S390
@@ -282,7 +279,6 @@ int zpci_debug_init(void);
void zpci_debug_exit(void);
void zpci_debug_init_device(struct zpci_dev *, const char *);
void zpci_debug_exit_device(struct zpci_dev *);
-void zpci_debug_info(struct zpci_dev *, struct seq_file *);
/* Error reporting */
int zpci_report_error(struct pci_dev *, struct zpci_report_error_header *);
diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h
index eb51272dd2cc..1f4b666e85ee 100644
--- a/arch/s390/include/asm/pci_clp.h
+++ b/arch/s390/include/asm/pci_clp.h
@@ -7,6 +7,7 @@
/*
* Call Logical Processor - Command Codes
*/
+#define CLP_SLPC 0x0001
#define CLP_LIST_PCI 0x0002
#define CLP_QUERY_PCI_FN 0x0003
#define CLP_QUERY_PCI_FNGRP 0x0004
@@ -51,6 +52,19 @@ struct clp_fh_list_entry {
extern bool zpci_unique_uid;
+struct clp_rsp_slpc_pci {
+ struct clp_rsp_hdr hdr;
+ u32 reserved2[4];
+ u32 lpif[8];
+ u32 reserved3[4];
+ u32 vwb : 1;
+ u32 : 1;
+ u32 mio_wb : 6;
+ u32 : 24;
+ u32 reserved5[3];
+ u32 lpic[8];
+} __packed;
+
/* List PCI functions request */
struct clp_req_list_pci {
struct clp_req_hdr hdr;
@@ -172,6 +186,11 @@ struct clp_rsp_set_pci {
} __packed;
/* Combined request/response block structures used by clp insn */
+struct clp_req_rsp_slpc_pci {
+ struct clp_req_slpc request;
+ struct clp_rsp_slpc_pci response;
+} __packed;
+
struct clp_req_rsp_list_pci {
struct clp_req_list_pci request;
struct clp_rsp_list_pci response;
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 74a352f8c0d1..d1297d6bbdcf 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -146,8 +146,6 @@ static inline void pmd_populate(struct mm_struct *mm,
#define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
#define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
-extern void rcu_table_freelist_finish(void);
-
void vmem_map_init(void);
void *vmem_crst_alloc(unsigned long val);
pte_t *vmem_pte_alloc(void);
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index b55561cc8786..6b8d8c69b1a1 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -89,6 +89,7 @@ extern unsigned long VMALLOC_START;
extern unsigned long VMALLOC_END;
#define VMALLOC_DEFAULT_SIZE ((128UL << 30) - MODULES_LEN)
extern struct page *vmemmap;
+extern unsigned long vmemmap_size;
#define VMEM_MAX_PHYS ((unsigned long) vmemmap)
@@ -1186,6 +1187,12 @@ void gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr);
void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr);
void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr);
+#define pgprot_writecombine pgprot_writecombine
+pgprot_t pgprot_writecombine(pgprot_t prot);
+
+#define pgprot_writethrough pgprot_writethrough
+pgprot_t pgprot_writethrough(pgprot_t prot);
+
/*
* Certain architectures need to do special things when PTEs
* within a page table are directly modified. Thus, the following
@@ -1209,7 +1216,8 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
{
pte_t __pte;
- pte_val(__pte) = physpage + pgprot_val(pgprot);
+
+ pte_val(__pte) = physpage | pgprot_val(pgprot);
if (!MACHINE_HAS_NX)
pte_val(__pte) &= ~_PAGE_NOEXEC;
return pte_mkyoung(__pte);
diff --git a/arch/s390/include/asm/ptdump.h b/arch/s390/include/asm/ptdump.h
new file mode 100644
index 000000000000..f960b2896606
--- /dev/null
+++ b/arch/s390/include/asm/ptdump.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_S390_PTDUMP_H
+#define _ASM_S390_PTDUMP_H
+
+void ptdump_check_wx(void);
+
+static inline void debug_checkwx(void)
+{
+ if (IS_ENABLED(CONFIG_DEBUG_WX))
+ ptdump_check_wx();
+}
+
+#endif /* _ASM_S390_PTDUMP_H */
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index e69dbf438f99..19e84c95d1e7 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -26,9 +26,9 @@
/**
* struct qdesfmt0 - queue descriptor, format 0
- * @sliba: storage list information block address
- * @sla: storage list address
- * @slsba: storage list state block address
+ * @sliba: absolute address of storage list information block
+ * @sla: absolute address of storage list
+ * @slsba: absolute address of storage list state block
* @akey: access key for SLIB
* @bkey: access key for SL
* @ckey: access key for SBALs
@@ -56,7 +56,7 @@ struct qdesfmt0 {
* @oqdcnt: output queue descriptor count
* @iqdsz: input queue descriptor size
* @oqdsz: output queue descriptor size
- * @qiba: queue information block address
+ * @qiba: absolute address of queue information block
* @qkey: queue information block key
* @qdf0: queue descriptions
*/
@@ -327,7 +327,6 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int,
* struct qdio_initialize - qdio initialization data
* @q_format: queue format
* @qdr_ac: feature flags to set
- * @adapter_name: name for the adapter
* @qib_param_field_format: format for qib_parm_field
* @qib_param_field: pointer to 128 bytes or NULL, if no param field
* @qib_rflags: rflags to set
@@ -347,7 +346,6 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int,
struct qdio_initialize {
unsigned char q_format;
unsigned char qdr_ac;
- unsigned char adapter_name[8];
unsigned int qib_param_field_format;
unsigned char *qib_param_field;
unsigned char qib_rflags;
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index c563f8368b19..a7bdd128d85b 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -114,8 +114,7 @@ int sclp_early_get_core_info(struct sclp_core_info *info);
void sclp_early_get_ipl_info(struct sclp_ipl_info *info);
void sclp_early_detect(void);
void sclp_early_printk(const char *s);
-void sclp_early_printk_force(const char *s);
-void __sclp_early_printk(const char *s, unsigned int len, unsigned int force);
+void __sclp_early_printk(const char *s, unsigned int len);
int sclp_early_get_memsize(unsigned long *mem);
int sclp_early_get_hsa_size(unsigned long *hsa_size);
@@ -129,6 +128,8 @@ int sclp_chp_deconfigure(struct chp_id chpid);
int sclp_chp_read_info(struct sclp_chp_info *info);
int sclp_pci_configure(u32 fid);
int sclp_pci_deconfigure(u32 fid);
+int sclp_ap_configure(u32 apid);
+int sclp_ap_deconfigure(u32 apid);
int sclp_pci_report(struct zpci_report_error_header *report, u32 fh, u32 fid);
int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count);
int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count);
diff --git a/arch/s390/include/asm/set_memory.h b/arch/s390/include/asm/set_memory.h
index c59a83536c70..a22a5a81811c 100644
--- a/arch/s390/include/asm/set_memory.h
+++ b/arch/s390/include/asm/set_memory.h
@@ -2,6 +2,10 @@
#ifndef _ASMS390_SET_MEMORY_H
#define _ASMS390_SET_MEMORY_H
+#include <linux/mutex.h>
+
+extern struct mutex cpa_mutex;
+
#define SET_MEMORY_RO 1UL
#define SET_MEMORY_RW 2UL
#define SET_MEMORY_NX 4UL
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 534f212753d6..bdb242a1544e 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -92,7 +92,9 @@ extern int memory_end_set;
extern unsigned long memory_end;
extern unsigned long vmalloc_size;
extern unsigned long max_physmem_end;
-extern unsigned long __swsusp_reset_dma;
+
+/* The Write Back bit position in the physaddr is given by the SLPC PCI */
+extern unsigned long mio_wb_bit_mask;
#define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM)
#define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM)
@@ -119,9 +121,6 @@ extern unsigned int console_mode;
extern unsigned int console_devno;
extern unsigned int console_irq;
-extern char vmhalt_cmd[];
-extern char vmpoff_cmd[];
-
#define CONSOLE_IS_UNDEFINED (console_mode == 0)
#define CONSOLE_IS_SCLP (console_mode == 1)
#define CONSOLE_IS_3215 (console_mode == 2)
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index 7e155fb6c254..01e360004481 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -31,7 +31,6 @@ extern void smp_emergency_stop(void);
extern int smp_find_processor_id(u16 address);
extern int smp_store_status(int cpu);
extern void smp_save_dump_cpus(void);
-extern int smp_vcpu_scheduled(int cpu);
extern void smp_yield_cpu(int cpu);
extern void smp_cpu_set_polarization(int cpu, int val);
extern int smp_cpu_get_polarization(int cpu);
diff --git a/arch/s390/include/asm/stp.h b/arch/s390/include/asm/stp.h
index f0ddefb06ec8..ba07463897c1 100644
--- a/arch/s390/include/asm/stp.h
+++ b/arch/s390/include/asm/stp.h
@@ -6,43 +6,89 @@
#ifndef __S390_STP_H
#define __S390_STP_H
+#include <linux/compiler.h>
+
/* notifier for syncs */
extern struct atomic_notifier_head s390_epoch_delta_notifier;
/* STP interruption parameter */
struct stp_irq_parm {
- unsigned int _pad0 : 14;
- unsigned int tsc : 1; /* Timing status change */
- unsigned int lac : 1; /* Link availability change */
- unsigned int tcpc : 1; /* Time control parameter change */
- unsigned int _pad2 : 15;
-} __attribute__ ((packed));
+ u32 : 14;
+ u32 tsc : 1; /* Timing status change */
+ u32 lac : 1; /* Link availability change */
+ u32 tcpc : 1; /* Time control parameter change */
+ u32 : 15;
+} __packed;
#define STP_OP_SYNC 1
#define STP_OP_CTRL 3
struct stp_sstpi {
- unsigned int rsvd0;
- unsigned int rsvd1 : 8;
- unsigned int stratum : 8;
- unsigned int vbits : 16;
- unsigned int leaps : 16;
- unsigned int tmd : 4;
- unsigned int ctn : 4;
- unsigned int rsvd2 : 3;
- unsigned int c : 1;
- unsigned int tst : 4;
- unsigned int tzo : 16;
- unsigned int dsto : 16;
- unsigned int ctrl : 16;
- unsigned int rsvd3 : 16;
- unsigned int tto;
- unsigned int rsvd4;
- unsigned int ctnid[3];
- unsigned int rsvd5;
- unsigned int todoff[4];
- unsigned int rsvd6[48];
-} __attribute__ ((packed));
+ u32 : 32;
+ u32 tu : 1;
+ u32 lu : 1;
+ u32 : 6;
+ u32 stratum : 8;
+ u32 vbits : 16;
+ u32 leaps : 16;
+ u32 tmd : 4;
+ u32 ctn : 4;
+ u32 : 3;
+ u32 c : 1;
+ u32 tst : 4;
+ u32 tzo : 16;
+ u32 dsto : 16;
+ u32 ctrl : 16;
+ u32 : 16;
+ u32 tto;
+ u32 : 32;
+ u32 ctnid[3];
+ u32 : 32;
+ u32 todoff[4];
+ u32 rsvd[48];
+} __packed;
+
+struct stp_tzib {
+ u32 tzan : 16;
+ u32 : 16;
+ u32 tzo : 16;
+ u32 dsto : 16;
+ u32 stn;
+ u32 dstn;
+ u64 dst_on_alg;
+ u64 dst_off_alg;
+} __packed;
+
+struct stp_tcpib {
+ u32 atcode : 4;
+ u32 ntcode : 4;
+ u32 d : 1;
+ u32 : 23;
+ s32 tto;
+ struct stp_tzib atzib;
+ struct stp_tzib ntzib;
+ s32 adst_offset : 16;
+ s32 ndst_offset : 16;
+ u32 rsvd1;
+ u64 ntzib_update;
+ u64 ndsto_update;
+} __packed;
+
+struct stp_lsoib {
+ u32 p : 1;
+ u32 : 31;
+ s32 also : 16;
+ s32 nlso : 16;
+ u64 nlsout;
+} __packed;
+
+struct stp_stzi {
+ u32 rsvd0[3];
+ u64 data_ts;
+ u32 rsvd1[22];
+ struct stp_tcpib tcpib;
+ struct stp_lsoib lsoib;
+} __packed;
/* Functions needed by the machine check handler */
int stp_sync_check(void);
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index acce6a08a1fa..6448bb5be10c 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -30,8 +30,6 @@ static inline void __tlb_flush_idte(unsigned long asce)
: : "a" (opt), "a" (asce) : "cc");
}
-void smp_ptlb_all(void);
-
/*
* Flush all TLB entries on all CPUs.
*/
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index f09444d6aeab..c868e7ee49b3 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -60,6 +60,9 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n);
#define INLINE_COPY_TO_USER
#endif
+int __put_user_bad(void) __attribute__((noreturn));
+int __get_user_bad(void) __attribute__((noreturn));
+
#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
#define __put_get_user_asm(to, from, size, spec) \
@@ -109,6 +112,9 @@ static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned lon
(unsigned long *)x,
size, spec);
break;
+ default:
+ __put_user_bad();
+ break;
}
return rc;
}
@@ -139,6 +145,9 @@ static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsign
(unsigned long __user *)ptr,
size, spec);
break;
+ default:
+ __get_user_bad();
+ break;
}
return rc;
}
@@ -179,7 +188,7 @@ static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long s
default: \
__put_user_bad(); \
break; \
- } \
+ } \
__builtin_expect(__pu_err, 0); \
})
@@ -190,8 +199,6 @@ static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long s
})
-int __put_user_bad(void) __attribute__((noreturn));
-
#define __get_user(x, ptr) \
({ \
int __gu_err = -EFAULT; \
@@ -238,8 +245,6 @@ int __put_user_bad(void) __attribute__((noreturn));
__get_user(x, ptr); \
})
-int __get_user_bad(void) __attribute__((noreturn));
-
unsigned long __must_check
raw_copy_in_user(void __user *to, const void __user *from, unsigned long n);
@@ -278,4 +283,115 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo
int copy_to_user_real(void __user *dest, void *src, unsigned long count);
void *s390_kernel_write(void *dst, const void *src, size_t size);
+#define HAVE_GET_KERNEL_NOFAULT
+
+int __noreturn __put_kernel_bad(void);
+
+#define __put_kernel_asm(val, to, insn) \
+({ \
+ int __rc; \
+ \
+ asm volatile( \
+ "0: " insn " %2,%1\n" \
+ "1: xr %0,%0\n" \
+ "2:\n" \
+ ".pushsection .fixup, \"ax\"\n" \
+ "3: lhi %0,%3\n" \
+ " jg 2b\n" \
+ ".popsection\n" \
+ EX_TABLE(0b,3b) EX_TABLE(1b,3b) \
+ : "=d" (__rc), "+Q" (*(to)) \
+ : "d" (val), "K" (-EFAULT) \
+ : "cc"); \
+ __rc; \
+})
+
+#define __put_kernel_nofault(dst, src, type, err_label) \
+do { \
+ u64 __x = (u64)(*((type *)(src))); \
+ int __pk_err; \
+ \
+ switch (sizeof(type)) { \
+ case 1: \
+ __pk_err = __put_kernel_asm(__x, (type *)(dst), "stc"); \
+ break; \
+ case 2: \
+ __pk_err = __put_kernel_asm(__x, (type *)(dst), "sth"); \
+ break; \
+ case 4: \
+ __pk_err = __put_kernel_asm(__x, (type *)(dst), "st"); \
+ break; \
+ case 8: \
+ __pk_err = __put_kernel_asm(__x, (type *)(dst), "stg"); \
+ break; \
+ default: \
+ __pk_err = __put_kernel_bad(); \
+ break; \
+ } \
+ if (unlikely(__pk_err)) \
+ goto err_label; \
+} while (0)
+
+int __noreturn __get_kernel_bad(void);
+
+#define __get_kernel_asm(val, from, insn) \
+({ \
+ int __rc; \
+ \
+ asm volatile( \
+ "0: " insn " %1,%2\n" \
+ "1: xr %0,%0\n" \
+ "2:\n" \
+ ".pushsection .fixup, \"ax\"\n" \
+ "3: lhi %0,%3\n" \
+ " jg 2b\n" \
+ ".popsection\n" \
+ EX_TABLE(0b,3b) EX_TABLE(1b,3b) \
+ : "=d" (__rc), "+d" (val) \
+ : "Q" (*(from)), "K" (-EFAULT) \
+ : "cc"); \
+ __rc; \
+})
+
+#define __get_kernel_nofault(dst, src, type, err_label) \
+do { \
+ int __gk_err; \
+ \
+ switch (sizeof(type)) { \
+ case 1: { \
+ u8 __x = 0; \
+ \
+ __gk_err = __get_kernel_asm(__x, (type *)(src), "ic"); \
+ *((type *)(dst)) = (type)__x; \
+ break; \
+ }; \
+ case 2: { \
+ u16 __x = 0; \
+ \
+ __gk_err = __get_kernel_asm(__x, (type *)(src), "lh"); \
+ *((type *)(dst)) = (type)__x; \
+ break; \
+ }; \
+ case 4: { \
+ u32 __x = 0; \
+ \
+ __gk_err = __get_kernel_asm(__x, (type *)(src), "l"); \
+ *((type *)(dst)) = (type)__x; \
+ break; \
+ }; \
+ case 8: { \
+ u64 __x = 0; \
+ \
+ __gk_err = __get_kernel_asm(__x, (type *)(src), "lg"); \
+ *((type *)(dst)) = (type)__x; \
+ break; \
+ }; \
+ default: \
+ __gk_err = __get_kernel_bad(); \
+ break; \
+ } \
+ if (unlikely(__gk_err)) \
+ goto err_label; \
+} while (0)
+
#endif /* __S390_UACCESS_H */
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
index cff4b4c99b75..0325fc0469b7 100644
--- a/arch/s390/include/asm/uv.h
+++ b/arch/s390/include/asm/uv.h
@@ -33,6 +33,7 @@
#define UVC_CMD_DESTROY_SEC_CPU 0x0121
#define UVC_CMD_CONV_TO_SEC_STOR 0x0200
#define UVC_CMD_CONV_FROM_SEC_STOR 0x0201
+#define UVC_CMD_DESTR_SEC_STOR 0x0202
#define UVC_CMD_SET_SEC_CONF_PARAMS 0x0300
#define UVC_CMD_UNPACK_IMG 0x0301
#define UVC_CMD_VERIFY_IMG 0x0302
@@ -344,6 +345,7 @@ static inline int is_prot_virt_host(void)
}
int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
+int uv_destroy_page(unsigned long paddr);
int uv_convert_from_secure(unsigned long paddr);
int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr);
@@ -354,6 +356,11 @@ void adjust_to_uv_max(unsigned long *vmax);
static inline void setup_uv(void) {}
static inline void adjust_to_uv_max(unsigned long *vmax) {}
+static inline int uv_destroy_page(unsigned long paddr)
+{
+ return 0;
+}
+
static inline int uv_convert_from_secure(unsigned long paddr)
{
return 0;
diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h
index 0cd085cdeb4f..29b44a930e71 100644
--- a/arch/s390/include/asm/vdso.h
+++ b/arch/s390/include/asm/vdso.h
@@ -2,6 +2,8 @@
#ifndef __S390_VDSO_H__
#define __S390_VDSO_H__
+#include <vdso/datapage.h>
+
/* Default link addresses for the vDSOs */
#define VDSO32_LBASE 0
#define VDSO64_LBASE 0
@@ -18,30 +20,7 @@
* itself and may change without notice.
*/
-struct vdso_data {
- __u64 tb_update_count; /* Timebase atomicity ctr 0x00 */
- __u64 xtime_tod_stamp; /* TOD clock for xtime 0x08 */
- __u64 xtime_clock_sec; /* Kernel time 0x10 */
- __u64 xtime_clock_nsec; /* 0x18 */
- __u64 xtime_coarse_sec; /* Coarse kernel time 0x20 */
- __u64 xtime_coarse_nsec; /* 0x28 */
- __u64 wtom_clock_sec; /* Wall to monotonic clock 0x30 */
- __u64 wtom_clock_nsec; /* 0x38 */
- __u64 wtom_coarse_sec; /* Coarse wall to monotonic 0x40 */
- __u64 wtom_coarse_nsec; /* 0x48 */
- __u32 tz_minuteswest; /* Minutes west of Greenwich 0x50 */
- __u32 tz_dsttime; /* Type of dst correction 0x54 */
- __u32 ectg_available; /* ECTG instruction present 0x58 */
- __u32 tk_mult; /* Mult. used for xtime_nsec 0x5c */
- __u32 tk_shift; /* Shift used for xtime_nsec 0x60 */
- __u32 ts_dir; /* TOD steering direction 0x64 */
- __u64 ts_end; /* TOD steering end 0x68 */
- __u32 hrtimer_res; /* hrtimer resolution 0x70 */
-};
-
struct vdso_per_cpu_data {
- __u64 ectg_timer_base;
- __u64 ectg_user_time;
/*
* Note: node_id and cpu_nr must be at adjacent memory locations.
* VDSO userspace must read both values with a single instruction.
@@ -56,9 +35,7 @@ struct vdso_per_cpu_data {
};
extern struct vdso_data *vdso_data;
-extern struct vdso_data boot_vdso_data;
-void vdso_alloc_boot_cpu(struct lowcore *lowcore);
int vdso_alloc_per_cpu(struct lowcore *lowcore);
void vdso_free_per_cpu(struct lowcore *lowcore);
diff --git a/arch/s390/include/asm/vdso/clocksource.h b/arch/s390/include/asm/vdso/clocksource.h
new file mode 100644
index 000000000000..a93eda0ce7bb
--- /dev/null
+++ b/arch/s390/include/asm/vdso/clocksource.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_VDSO_CLOCKSOURCE_H
+#define __ASM_VDSO_CLOCKSOURCE_H
+
+#define VDSO_ARCH_CLOCKMODES \
+ VDSO_CLOCKMODE_TOD
+
+#endif /* __ASM_VDSO_CLOCKSOURCE_H */
diff --git a/arch/s390/include/asm/vdso/data.h b/arch/s390/include/asm/vdso/data.h
new file mode 100644
index 000000000000..7b3cdb4a5f48
--- /dev/null
+++ b/arch/s390/include/asm/vdso/data.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __S390_ASM_VDSO_DATA_H
+#define __S390_ASM_VDSO_DATA_H
+
+#include <linux/types.h>
+#include <vdso/datapage.h>
+
+struct arch_vdso_data {
+ __u64 tod_steering_delta;
+ __u64 tod_steering_end;
+};
+
+#endif /* __S390_ASM_VDSO_DATA_H */
diff --git a/arch/s390/include/asm/vdso/gettimeofday.h b/arch/s390/include/asm/vdso/gettimeofday.h
new file mode 100644
index 000000000000..bf123065ad3b
--- /dev/null
+++ b/arch/s390/include/asm/vdso/gettimeofday.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ASM_VDSO_GETTIMEOFDAY_H
+#define ASM_VDSO_GETTIMEOFDAY_H
+
+#define VDSO_HAS_TIME 1
+
+#define VDSO_HAS_CLOCK_GETRES 1
+
+#include <asm/timex.h>
+#include <asm/unistd.h>
+#include <asm/vdso.h>
+#include <linux/compiler.h>
+
+#define vdso_calc_delta __arch_vdso_calc_delta
+static __always_inline u64 __arch_vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
+{
+ return (cycles - last) * mult;
+}
+
+static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
+{
+ return _vdso_data;
+}
+
+static inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_data *vd)
+{
+ const struct vdso_data *vdso = __arch_get_vdso_data();
+ u64 adj, now;
+
+ now = get_tod_clock();
+ adj = vdso->arch_data.tod_steering_end - now;
+ if (unlikely((s64) adj > 0))
+ now += (vdso->arch_data.tod_steering_delta < 0) ? (adj >> 15) : -(adj >> 15);
+ return now;
+}
+
+static __always_inline
+long clock_gettime_fallback(clockid_t clkid, struct __kernel_timespec *ts)
+{
+ register unsigned long r1 __asm__("r1") = __NR_clock_gettime;
+ register unsigned long r2 __asm__("r2") = (unsigned long)clkid;
+ register void *r3 __asm__("r3") = ts;
+
+ asm ("svc 0\n" : "+d" (r2) : "d" (r1), "d" (r3) : "cc", "memory");
+ return r2;
+}
+
+static __always_inline
+long gettimeofday_fallback(register struct __kernel_old_timeval *tv,
+ register struct timezone *tz)
+{
+ register unsigned long r1 __asm__("r1") = __NR_gettimeofday;
+ register unsigned long r2 __asm__("r2") = (unsigned long)tv;
+ register void *r3 __asm__("r3") = tz;
+
+ asm ("svc 0\n" : "+d" (r2) : "d" (r1), "d" (r3) : "cc", "memory");
+ return r2;
+}
+
+static __always_inline
+long clock_getres_fallback(clockid_t clkid, struct __kernel_timespec *ts)
+{
+ register unsigned long r1 __asm__("r1") = __NR_clock_getres;
+ register unsigned long r2 __asm__("r2") = (unsigned long)clkid;
+ register void *r3 __asm__("r3") = ts;
+
+ asm ("svc 0\n" : "+d" (r2) : "d" (r1), "d" (r3) : "cc", "memory");
+ return r2;
+}
+
+#endif
diff --git a/arch/s390/include/asm/vdso/processor.h b/arch/s390/include/asm/vdso/processor.h
new file mode 100644
index 000000000000..cfcc3e117c4c
--- /dev/null
+++ b/arch/s390/include/asm/vdso/processor.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_VDSO_PROCESSOR_H
+#define __ASM_VDSO_PROCESSOR_H
+
+#define cpu_relax() barrier()
+
+#endif /* __ASM_VDSO_PROCESSOR_H */
diff --git a/arch/s390/include/asm/vdso/vdso.h b/arch/s390/include/asm/vdso/vdso.h
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/arch/s390/include/asm/vdso/vdso.h
diff --git a/arch/s390/include/asm/vdso/vsyscall.h b/arch/s390/include/asm/vdso/vsyscall.h
new file mode 100644
index 000000000000..6c67c08cefdd
--- /dev/null
+++ b/arch/s390/include/asm/vdso/vsyscall.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_VDSO_VSYSCALL_H
+#define __ASM_VDSO_VSYSCALL_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/hrtimer.h>
+#include <linux/timekeeper_internal.h>
+#include <vdso/datapage.h>
+#include <asm/vdso.h>
+/*
+ * Update the vDSO data page to keep in sync with kernel timekeeping.
+ */
+
+static __always_inline struct vdso_data *__s390_get_k_vdso_data(void)
+{
+ return vdso_data;
+}
+#define __arch_get_k_vdso_data __s390_get_k_vdso_data
+
+/* The asm-generic header needs to be included after the definitions above */
+#include <asm-generic/vdso/vsyscall.h>
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_VSYSCALL_H */
diff --git a/arch/s390/include/asm/vtimer.h b/arch/s390/include/asm/vtimer.h
index 42f707d1c1e8..e601adaa6320 100644
--- a/arch/s390/include/asm/vtimer.h
+++ b/arch/s390/include/asm/vtimer.h
@@ -25,8 +25,6 @@ extern void add_virt_timer_periodic(struct vtimer_list *timer);
extern int mod_virt_timer(struct vtimer_list *timer, u64 expires);
extern int mod_virt_timer_periodic(struct vtimer_list *timer, u64 expires);
extern int del_virt_timer(struct vtimer_list *timer);
-
-extern void init_cpu_vtimer(void);
extern void vtime_init(void);
#endif /* _ASM_S390_TIMER_H */
diff --git a/arch/s390/include/uapi/asm/pkey.h b/arch/s390/include/uapi/asm/pkey.h
index d27d7d329263..7349e96d28a0 100644
--- a/arch/s390/include/uapi/asm/pkey.h
+++ b/arch/s390/include/uapi/asm/pkey.h
@@ -35,12 +35,16 @@
#define PKEY_KEYTYPE_AES_128 1
#define PKEY_KEYTYPE_AES_192 2
#define PKEY_KEYTYPE_AES_256 3
+#define PKEY_KEYTYPE_ECC 4
/* the newer ioctls use a pkey_key_type enum for type information */
enum pkey_key_type {
PKEY_TYPE_CCA_DATA = (__u32) 1,
PKEY_TYPE_CCA_CIPHER = (__u32) 2,
PKEY_TYPE_EP11 = (__u32) 3,
+ PKEY_TYPE_CCA_ECC = (__u32) 0x1f,
+ PKEY_TYPE_EP11_AES = (__u32) 6,
+ PKEY_TYPE_EP11_ECC = (__u32) 7,
};
/* the newer ioctls use a pkey_key_size enum for key size information */
@@ -89,6 +93,20 @@ struct pkey_clrkey {
};
/*
+ * EP11 key blobs of type PKEY_TYPE_EP11_AES and PKEY_TYPE_EP11_ECC
+ * are ep11 blobs prepended by this header:
+ */
+struct ep11kblob_header {
+ __u8 type; /* always 0x00 */
+ __u8 hver; /* header version, currently needs to be 0x00 */
+ __u16 len; /* total length in bytes (including this header) */
+ __u8 version; /* PKEY_TYPE_EP11_AES or PKEY_TYPE_EP11_ECC */
+ __u8 res0; /* unused */
+ __u16 bitlen; /* clear key bit len, 0 for unknown */
+ __u8 res1[8]; /* unused */
+} __packed;
+
+/*
* Generate CCA AES secure key.
*/
struct pkey_genseck {
@@ -304,7 +322,7 @@ struct pkey_verifykey2 {
#define PKEY_VERIFYKEY2 _IOWR(PKEY_IOCTL_MAGIC, 0x17, struct pkey_verifykey2)
/*
- * Transform a key blob (of any type) into a protected key, version 2.
+ * Transform a key blob into a protected key, version 2.
* There needs to be a list of apqns given with at least one entry in there.
* All apqns in the list need to be exact apqns, 0xFFFF as ANY card or domain
* is not supported. The implementation walks through the list of apqns and
@@ -313,6 +331,8 @@ struct pkey_verifykey2 {
* list is tried until success (return 0) or the end of the list is reached
* (return -1 with errno ENODEV). You may use the PKEY_APQNS4K ioctl to
* generate a list of apqns based on the key.
+ * Deriving ECC protected keys from ECC secure keys is not supported with
+ * this ioctl, use PKEY_KBLOB2PROTK3 for this purpose.
*/
struct pkey_kblob2pkey2 {
__u8 __user *key; /* in: pointer to key blob */
@@ -326,17 +346,17 @@ struct pkey_kblob2pkey2 {
/*
* Build a list of APQNs based on a key blob given.
* Is able to find out which type of secure key is given (CCA AES secure
- * key, CCA AES cipher key or EP11 AES key) and tries to find all matching
- * crypto cards based on the MKVP and maybe other criterias (like CCA AES
- * cipher keys need a CEX5C or higher, EP11 keys with BLOB_PKEY_EXTRACTABLE
- * need a CEX7 and EP11 api version 4). The list of APQNs is further filtered
- * by the key's mkvp which needs to match to either the current mkvp (CCA and
- * EP11) or the alternate mkvp (old mkvp, CCA adapters only) of the apqns. The
- * flags argument may be used to limit the matching apqns. If the
- * PKEY_FLAGS_MATCH_CUR_MKVP is given, only the current mkvp of each apqn is
- * compared. Likewise with the PKEY_FLAGS_MATCH_ALT_MKVP. If both are given, it
- * is assumed to return apqns where either the current or the alternate mkvp
- * matches. At least one of the matching flags needs to be given.
+ * key, CCA AES cipher key, CCA ECC private key, EP11 AES key, EP11 ECC private
+ * key) and tries to find all matching crypto cards based on the MKVP and maybe
+ * other criterias (like CCA AES cipher keys need a CEX5C or higher, EP11 keys
+ * with BLOB_PKEY_EXTRACTABLE need a CEX7 and EP11 api version 4). The list of
+ * APQNs is further filtered by the key's mkvp which needs to match to either
+ * the current mkvp (CCA and EP11) or the alternate mkvp (old mkvp, CCA adapters
+ * only) of the apqns. The flags argument may be used to limit the matching
+ * apqns. If the PKEY_FLAGS_MATCH_CUR_MKVP is given, only the current mkvp of
+ * each apqn is compared. Likewise with the PKEY_FLAGS_MATCH_ALT_MKVP. If both
+ * are given, it is assumed to return apqns where either the current or the
+ * alternate mkvp matches. At least one of the matching flags needs to be given.
* The flags argument for EP11 keys has no further action and is currently
* ignored (but needs to be given as PKEY_FLAGS_MATCH_CUR_MKVP) as there is only
* the wkvp from the key to match against the apqn's wkvp.
@@ -365,9 +385,10 @@ struct pkey_apqns4key {
* restrict the list by given master key verification patterns.
* For different key types there may be different ways to match the
* master key verification patterns. For CCA keys (CCA data key and CCA
- * cipher key) the first 8 bytes of cur_mkvp refer to the current mkvp value
- * of the apqn and the first 8 bytes of the alt_mkvp refer to the old mkvp.
- * The flags argument controls if the apqns current and/or alternate mkvp
+ * cipher key) the first 8 bytes of cur_mkvp refer to the current AES mkvp value
+ * of the apqn and the first 8 bytes of the alt_mkvp refer to the old AES mkvp.
+ * For CCA ECC keys it is similar but the match is against the APKA current/old
+ * mkvp. The flags argument controls if the apqns current and/or alternate mkvp
* should match. If the PKEY_FLAGS_MATCH_CUR_MKVP is given, only the current
* mkvp of each apqn is compared. Likewise with the PKEY_FLAGS_MATCH_ALT_MKVP.
* If both are given, it is assumed to return apqns where either the
@@ -397,4 +418,30 @@ struct pkey_apqns4keytype {
};
#define PKEY_APQNS4KT _IOWR(PKEY_IOCTL_MAGIC, 0x1C, struct pkey_apqns4keytype)
+/*
+ * Transform a key blob into a protected key, version 3.
+ * The difference to version 2 of this ioctl is that the protected key
+ * buffer is now explicitly and not within a struct pkey_protkey any more.
+ * So this ioctl is also able to handle EP11 and CCA ECC secure keys and
+ * provide ECC protected keys.
+ * There needs to be a list of apqns given with at least one entry in there.
+ * All apqns in the list need to be exact apqns, 0xFFFF as ANY card or domain
+ * is not supported. The implementation walks through the list of apqns and
+ * tries to send the request to each apqn without any further checking (like
+ * card type or online state). If the apqn fails, simple the next one in the
+ * list is tried until success (return 0) or the end of the list is reached
+ * (return -1 with errno ENODEV). You may use the PKEY_APQNS4K ioctl to
+ * generate a list of apqns based on the key.
+ */
+struct pkey_kblob2pkey3 {
+ __u8 __user *key; /* in: pointer to key blob */
+ __u32 keylen; /* in: key blob size */
+ struct pkey_apqn __user *apqns; /* in: ptr to list of apqn targets */
+ __u32 apqn_entries; /* in: # of apqn target list entries */
+ __u32 pkeytype; /* out: prot key type (enum pkey_key_type) */
+ __u32 pkeylen; /* in/out: size of pkey buffer/actual len of pkey */
+ __u8 __user *pkey; /* in: pkey blob buffer space ptr */
+};
+#define PKEY_KBLOB2PROTK3 _IOWR(PKEY_IOCTL_MAGIC, 0x1D, struct pkey_kblob2pkey3)
+
#endif /* _UAPI_PKEY_H */
diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h
index 6ca1e68d7103..ede318653c87 100644
--- a/arch/s390/include/uapi/asm/sie.h
+++ b/arch/s390/include/uapi/asm/sie.h
@@ -29,7 +29,7 @@
{ 0x13, "SIGP conditional emergency signal" }, \
{ 0x15, "SIGP sense running" }, \
{ 0x16, "SIGP set multithreading"}, \
- { 0x17, "SIGP store additional status ait address"}
+ { 0x17, "SIGP store additional status at address"}
#define icpt_prog_codes \
{ 0x0001, "Prog Operation" }, \
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index efca70970761..dd73b7f07423 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -57,6 +57,7 @@ obj-$(CONFIG_COMPAT) += $(compat-obj-y)
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_KPROBES) += kprobes.o
+obj-$(CONFIG_KPROBES) += kprobes_insn_page.o
obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_UPROBES) += uprobes.o
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 5d8cc1864566..ece58f2217cb 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -59,26 +59,6 @@ int main(void)
OFFSET(__SF_SIE_REASON, stack_frame, empty1[2]);
OFFSET(__SF_SIE_FLAGS, stack_frame, empty1[3]);
BLANK();
- /* timeval/timezone offsets for use by vdso */
- OFFSET(__VDSO_UPD_COUNT, vdso_data, tb_update_count);
- OFFSET(__VDSO_XTIME_STAMP, vdso_data, xtime_tod_stamp);
- OFFSET(__VDSO_XTIME_SEC, vdso_data, xtime_clock_sec);
- OFFSET(__VDSO_XTIME_NSEC, vdso_data, xtime_clock_nsec);
- OFFSET(__VDSO_XTIME_CRS_SEC, vdso_data, xtime_coarse_sec);
- OFFSET(__VDSO_XTIME_CRS_NSEC, vdso_data, xtime_coarse_nsec);
- OFFSET(__VDSO_WTOM_SEC, vdso_data, wtom_clock_sec);
- OFFSET(__VDSO_WTOM_NSEC, vdso_data, wtom_clock_nsec);
- OFFSET(__VDSO_WTOM_CRS_SEC, vdso_data, wtom_coarse_sec);
- OFFSET(__VDSO_WTOM_CRS_NSEC, vdso_data, wtom_coarse_nsec);
- OFFSET(__VDSO_TIMEZONE, vdso_data, tz_minuteswest);
- OFFSET(__VDSO_ECTG_OK, vdso_data, ectg_available);
- OFFSET(__VDSO_TK_MULT, vdso_data, tk_mult);
- OFFSET(__VDSO_TK_SHIFT, vdso_data, tk_shift);
- OFFSET(__VDSO_TS_DIR, vdso_data, ts_dir);
- OFFSET(__VDSO_TS_END, vdso_data, ts_end);
- OFFSET(__VDSO_CLOCK_REALTIME_RES, vdso_data, hrtimer_res);
- OFFSET(__VDSO_ECTG_BASE, vdso_per_cpu_data, ectg_timer_base);
- OFFSET(__VDSO_ECTG_USER, vdso_per_cpu_data, ectg_user_time);
OFFSET(__VDSO_GETCPU_VAL, vdso_per_cpu_data, getcpu_val);
BLANK();
/* constants used by the vdso */
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index c42ce348103c..205b2e2648aa 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -141,7 +141,7 @@ int copy_oldmem_kernel(void *dst, void *src, size_t count)
while (count) {
from = __pa(src);
if (!OLDMEM_BASE && from < sclp.hsa_size) {
- /* Copy from zfcpdump HSA area */
+ /* Copy from zfcp/nvme dump HSA area */
len = min(count, sclp.hsa_size - from);
rc = memcpy_hsa_kernel(dst, from, len);
if (rc)
@@ -184,7 +184,7 @@ static int copy_oldmem_user(void __user *dst, void *src, size_t count)
while (count) {
from = __pa(src);
if (!OLDMEM_BASE && from < sclp.hsa_size) {
- /* Copy from zfcpdump HSA area */
+ /* Copy from zfcp/nvme dump HSA area */
len = min(count, sclp.hsa_size - from);
rc = memcpy_hsa_user(dst, from, len);
if (rc)
@@ -258,7 +258,7 @@ static int remap_oldmem_pfn_range_kdump(struct vm_area_struct *vma,
}
/*
- * Remap "oldmem" for zfcpdump
+ * Remap "oldmem" for zfcp/nvme dump
*
* We only map available memory above HSA size. Memory below HSA size
* is read on demand using the copy_oldmem_page() function.
@@ -283,7 +283,7 @@ static int remap_oldmem_pfn_range_zfcpdump(struct vm_area_struct *vma,
}
/*
- * Remap "oldmem" for kdump or zfcpdump
+ * Remap "oldmem" for kdump or zfcp/nvme dump
*/
int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from,
unsigned long pfn, unsigned long size, pgprot_t prot)
@@ -632,11 +632,11 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
u32 alloc_size;
u64 hdr_off;
- /* If we are not in kdump or zfcpdump mode return */
- if (!OLDMEM_BASE && ipl_info.type != IPL_TYPE_FCP_DUMP)
+ /* If we are not in kdump or zfcp/nvme dump mode return */
+ if (!OLDMEM_BASE && !is_ipl_type_dump())
return 0;
- /* If we cannot get HSA size for zfcpdump return error */
- if (ipl_info.type == IPL_TYPE_FCP_DUMP && !sclp.hsa_size)
+ /* If we cannot get HSA size for zfcp/nvme dump return error */
+ if (is_ipl_type_dump() && !sclp.hsa_size)
return -ENODEV;
/* For kdump, exclude previous crashkernel memory */
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
index ccba63aaeb47..b8b0cd7b008f 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag.c
@@ -104,18 +104,7 @@ static const struct seq_operations show_diag_stat_sops = {
.show = show_diag_stat,
};
-static int show_diag_stat_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &show_diag_stat_sops);
-}
-
-static const struct file_operations show_diag_stat_fops = {
- .open = show_diag_stat_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
+DEFINE_SEQ_ATTRIBUTE(show_diag_stat);
static int __init show_diag_stat_init(void)
{
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index f304802ecf7b..a7eab7be4db0 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -482,31 +482,37 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr)
return (int) (ptr - buffer);
}
+static int copy_from_regs(struct pt_regs *regs, void *dst, void *src, int len)
+{
+ if (user_mode(regs)) {
+ if (copy_from_user(dst, (char __user *)src, len))
+ return -EFAULT;
+ } else {
+ if (copy_from_kernel_nofault(dst, src, len))
+ return -EFAULT;
+ }
+ return 0;
+}
+
void show_code(struct pt_regs *regs)
{
char *mode = user_mode(regs) ? "User" : "Krnl";
unsigned char code[64];
char buffer[128], *ptr;
- mm_segment_t old_fs;
unsigned long addr;
int start, end, opsize, hops, i;
/* Get a snapshot of the 64 bytes surrounding the fault address. */
- old_fs = get_fs();
- set_fs(user_mode(regs) ? USER_DS : KERNEL_DS);
for (start = 32; start && regs->psw.addr >= 34 - start; start -= 2) {
addr = regs->psw.addr - 34 + start;
- if (__copy_from_user(code + start - 2,
- (char __user *) addr, 2))
+ if (copy_from_regs(regs, code + start - 2, (void *)addr, 2))
break;
}
for (end = 32; end < 64; end += 2) {
addr = regs->psw.addr + end - 32;
- if (__copy_from_user(code + end,
- (char __user *) addr, 2))
+ if (copy_from_regs(regs, code + end, (void *)addr, 2))
break;
}
- set_fs(old_fs);
/* Code snapshot useable ? */
if ((regs->psw.addr & 1) || start >= end) {
printk("%s Code: Bad PSW.\n", mode);
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 078277231858..705844f73934 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -274,19 +274,6 @@ static int __init disable_vector_extension(char *str)
}
early_param("novx", disable_vector_extension);
-static int __init cad_setup(char *str)
-{
- bool enabled;
- int rc;
-
- rc = kstrtobool(str, &enabled);
- if (!rc && enabled && test_facility(128))
- /* Enable problem state CAD. */
- __ctl_set_bit(2, 3);
- return rc;
-}
-early_param("cad", cad_setup);
-
char __bootdata(early_command_line)[COMMAND_LINE_SIZE];
static void __init setup_boot_command_line(void)
{
diff --git a/arch/s390/kernel/early_printk.c b/arch/s390/kernel/early_printk.c
index 6f24d83bc5dc..d9d53f44008a 100644
--- a/arch/s390/kernel/early_printk.c
+++ b/arch/s390/kernel/early_printk.c
@@ -10,7 +10,7 @@
static void sclp_early_write(struct console *con, const char *s, unsigned int len)
{
- __sclp_early_printk(s, len, 0);
+ __sclp_early_printk(s, len);
}
static struct console sclp_early_console = {
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 23edf196d3dc..86235919c2d1 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -435,10 +435,8 @@ ENTRY(system_call)
jz .Lsysc_skip_fpu
brasl %r14,load_fpu_regs
.Lsysc_skip_fpu:
- lg %r14,__LC_VDSO_PER_CPU
mvc __LC_RETURN_PSW(16),__PT_PSW(%r11)
stpt __LC_EXIT_TIMER
- mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
lmg %r0,%r15,__PT_R0(%r11)
b __LC_RETURN_LPSWE
@@ -797,13 +795,11 @@ ENTRY(io_int_handler)
TRACE_IRQS_ON
0:
#endif
- lg %r14,__LC_VDSO_PER_CPU
mvc __LC_RETURN_PSW(16),__PT_PSW(%r11)
tm __PT_PSW+1(%r11),0x01 # returning to user ?
jno .Lio_exit_kernel
BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP
stpt __LC_EXIT_TIMER
- mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
.Lio_exit_kernel:
lmg %r0,%r15,__PT_R0(%r11)
b __LC_RETURN_LPSWE
@@ -1213,14 +1209,12 @@ ENTRY(mcck_int_handler)
brasl %r14,s390_handle_mcck
TRACE_IRQS_ON
.Lmcck_return:
- lg %r14,__LC_VDSO_PER_CPU
lmg %r0,%r10,__PT_R0(%r11)
mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW
tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
jno 0f
BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP
stpt __LC_EXIT_TIMER
- mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
0: lmg %r11,%r15,__PT_R11(%r11)
b __LC_RETURN_MCCK_LPSWE
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index a44ddc2f2dec..d2ca3fe51f8e 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -9,7 +9,6 @@
#include <asm/idle.h>
extern void *restart_stack;
-extern unsigned long suspend_zero_pages;
void system_call(void);
void pgm_check_handler(void);
@@ -17,7 +16,6 @@ void ext_int_handler(void);
void io_int_handler(void);
void mcck_int_handler(void);
void restart_int_handler(void);
-void restart_call_handler(void);
asmlinkage long do_syscall_trace_enter(struct pt_regs *regs);
asmlinkage void do_syscall_trace_exit(struct pt_regs *regs);
@@ -62,12 +60,10 @@ void do_notify_resume(struct pt_regs *regs);
void __init init_IRQ(void);
void do_IRQ(struct pt_regs *regs, int irq);
void do_restart(void);
-void __init startup_init_nobss(void);
void __init startup_init(void);
void die(struct pt_regs *regs, const char *str);
int setup_profiling_timer(unsigned int multiplier);
void __init time_init(void);
-void s390_early_resume(void);
unsigned long prepare_ftrace_return(unsigned long parent, unsigned long sp, unsigned long ip);
struct s390_mmap_arg_struct;
@@ -92,4 +88,6 @@ void set_fs_fixup(void);
unsigned long stack_alloc(void);
void stack_free(unsigned long stack);
+extern char kprobes_insn_page[];
+
#endif /* _ENTRY_H */
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 90a2a17239b0..98b3aca1de8e 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -40,10 +40,12 @@
#define IPL_FCP_STR "fcp"
#define IPL_FCP_DUMP_STR "fcp_dump"
#define IPL_NVME_STR "nvme"
+#define IPL_NVME_DUMP_STR "nvme_dump"
#define IPL_NSS_STR "nss"
#define DUMP_CCW_STR "ccw"
#define DUMP_FCP_STR "fcp"
+#define DUMP_NVME_STR "nvme"
#define DUMP_NONE_STR "none"
/*
@@ -96,6 +98,8 @@ static char *ipl_type_str(enum ipl_type type)
return IPL_NSS_STR;
case IPL_TYPE_NVME:
return IPL_NVME_STR;
+ case IPL_TYPE_NVME_DUMP:
+ return IPL_NVME_DUMP_STR;
case IPL_TYPE_UNKNOWN:
default:
return IPL_UNKNOWN_STR;
@@ -106,6 +110,7 @@ enum dump_type {
DUMP_TYPE_NONE = 1,
DUMP_TYPE_CCW = 2,
DUMP_TYPE_FCP = 4,
+ DUMP_TYPE_NVME = 8,
};
static char *dump_type_str(enum dump_type type)
@@ -117,6 +122,8 @@ static char *dump_type_str(enum dump_type type)
return DUMP_CCW_STR;
case DUMP_TYPE_FCP:
return DUMP_FCP_STR;
+ case DUMP_TYPE_NVME:
+ return DUMP_NVME_STR;
default:
return NULL;
}
@@ -144,10 +151,12 @@ static struct ipl_parameter_block *reipl_block_actual;
static int dump_capabilities = DUMP_TYPE_NONE;
static enum dump_type dump_type = DUMP_TYPE_NONE;
static struct ipl_parameter_block *dump_block_fcp;
+static struct ipl_parameter_block *dump_block_nvme;
static struct ipl_parameter_block *dump_block_ccw;
static struct sclp_ipl_info sclp_ipl_info;
+static bool reipl_nvme_clear;
static bool reipl_fcp_clear;
static bool reipl_ccw_clear;
@@ -266,7 +275,10 @@ static __init enum ipl_type get_ipl_type(void)
else
return IPL_TYPE_FCP;
case IPL_PBT_NVME:
- return IPL_TYPE_NVME;
+ if (ipl_block.nvme.opt == IPL_PB0_NVME_OPT_DUMP)
+ return IPL_TYPE_NVME_DUMP;
+ else
+ return IPL_TYPE_NVME;
}
return IPL_TYPE_UNKNOWN;
}
@@ -324,6 +336,7 @@ static ssize_t sys_ipl_device_show(struct kobject *kobj,
case IPL_TYPE_FCP_DUMP:
return sprintf(page, "0.0.%04x\n", ipl_block.fcp.devno);
case IPL_TYPE_NVME:
+ case IPL_TYPE_NVME_DUMP:
return sprintf(page, "%08ux\n", ipl_block.nvme.fid);
default:
return 0;
@@ -531,6 +544,7 @@ static int __init ipl_init(void)
rc = sysfs_create_group(&ipl_kset->kobj, &ipl_fcp_attr_group);
break;
case IPL_TYPE_NVME:
+ case IPL_TYPE_NVME_DUMP:
rc = sysfs_create_group(&ipl_kset->kobj, &ipl_nvme_attr_group);
break;
default:
@@ -873,6 +887,24 @@ static struct attribute_group reipl_nvme_attr_group = {
.bin_attrs = reipl_nvme_bin_attrs
};
+static ssize_t reipl_nvme_clear_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return sprintf(page, "%u\n", reipl_nvme_clear);
+}
+
+static ssize_t reipl_nvme_clear_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ if (strtobool(buf, &reipl_nvme_clear) < 0)
+ return -EINVAL;
+ return len;
+}
+
+static struct kobj_attribute sys_reipl_nvme_clear_attr =
+ __ATTR(clear, 0644, reipl_nvme_clear_show, reipl_nvme_clear_store);
+
/* CCW reipl device attributes */
DEFINE_IPL_CCW_ATTR_RW(reipl_ccw, device, reipl_block_ccw->ccw);
@@ -1099,7 +1131,10 @@ static void __reipl_run(void *unused)
break;
case IPL_TYPE_NVME:
diag308(DIAG308_SET, reipl_block_nvme);
- diag308(DIAG308_LOAD_CLEAR, NULL);
+ if (reipl_nvme_clear)
+ diag308(DIAG308_LOAD_CLEAR, NULL);
+ else
+ diag308(DIAG308_LOAD_NORMAL, NULL);
break;
case IPL_TYPE_NSS:
diag308(DIAG308_SET, reipl_block_nss);
@@ -1109,6 +1144,7 @@ static void __reipl_run(void *unused)
diag308(DIAG308_LOAD_CLEAR, NULL);
break;
case IPL_TYPE_FCP_DUMP:
+ case IPL_TYPE_NVME_DUMP:
break;
}
disabled_wait();
@@ -1219,8 +1255,9 @@ static int __init reipl_fcp_init(void)
&sys_reipl_fcp_clear_attr.attr);
if (rc)
goto out2;
- } else
+ } else {
reipl_fcp_clear = true;
+ }
if (ipl_info.type == IPL_TYPE_FCP) {
memcpy(reipl_block_fcp, &ipl_block, sizeof(ipl_block));
@@ -1266,10 +1303,16 @@ static int __init reipl_nvme_init(void)
}
rc = sysfs_create_group(&reipl_nvme_kset->kobj, &reipl_nvme_attr_group);
- if (rc) {
- kset_unregister(reipl_nvme_kset);
- free_page((unsigned long) reipl_block_nvme);
- return rc;
+ if (rc)
+ goto out1;
+
+ if (test_facility(141)) {
+ rc = sysfs_create_file(&reipl_nvme_kset->kobj,
+ &sys_reipl_nvme_clear_attr.attr);
+ if (rc)
+ goto out2;
+ } else {
+ reipl_nvme_clear = true;
}
if (ipl_info.type == IPL_TYPE_NVME) {
@@ -1290,6 +1333,13 @@ static int __init reipl_nvme_init(void)
}
reipl_capabilities |= IPL_TYPE_NVME;
return 0;
+
+out2:
+ sysfs_remove_group(&reipl_nvme_kset->kobj, &reipl_nvme_attr_group);
+out1:
+ kset_unregister(reipl_nvme_kset);
+ free_page((unsigned long) reipl_block_nvme);
+ return rc;
}
static int __init reipl_type_init(void)
@@ -1382,6 +1432,29 @@ static struct attribute_group dump_fcp_attr_group = {
.attrs = dump_fcp_attrs,
};
+/* NVME dump device attributes */
+DEFINE_IPL_ATTR_RW(dump_nvme, fid, "0x%08llx\n", "%llx\n",
+ dump_block_nvme->nvme.fid);
+DEFINE_IPL_ATTR_RW(dump_nvme, nsid, "0x%08llx\n", "%llx\n",
+ dump_block_nvme->nvme.nsid);
+DEFINE_IPL_ATTR_RW(dump_nvme, bootprog, "%lld\n", "%llx\n",
+ dump_block_nvme->nvme.bootprog);
+DEFINE_IPL_ATTR_RW(dump_nvme, br_lba, "%lld\n", "%llx\n",
+ dump_block_nvme->nvme.br_lba);
+
+static struct attribute *dump_nvme_attrs[] = {
+ &sys_dump_nvme_fid_attr.attr,
+ &sys_dump_nvme_nsid_attr.attr,
+ &sys_dump_nvme_bootprog_attr.attr,
+ &sys_dump_nvme_br_lba_attr.attr,
+ NULL,
+};
+
+static struct attribute_group dump_nvme_attr_group = {
+ .name = IPL_NVME_STR,
+ .attrs = dump_nvme_attrs,
+};
+
/* CCW dump device attributes */
DEFINE_IPL_CCW_ATTR_RW(dump_ccw, device, dump_block_ccw->ccw);
@@ -1423,6 +1496,8 @@ static ssize_t dump_type_store(struct kobject *kobj,
rc = dump_set_type(DUMP_TYPE_CCW);
else if (strncmp(buf, DUMP_FCP_STR, strlen(DUMP_FCP_STR)) == 0)
rc = dump_set_type(DUMP_TYPE_FCP);
+ else if (strncmp(buf, DUMP_NVME_STR, strlen(DUMP_NVME_STR)) == 0)
+ rc = dump_set_type(DUMP_TYPE_NVME);
return (rc != 0) ? rc : len;
}
@@ -1450,6 +1525,9 @@ static void __dump_run(void *unused)
case DUMP_TYPE_FCP:
diag308_dump(dump_block_fcp);
break;
+ case DUMP_TYPE_NVME:
+ diag308_dump(dump_block_nvme);
+ break;
default:
break;
}
@@ -1506,6 +1584,29 @@ static int __init dump_fcp_init(void)
return 0;
}
+static int __init dump_nvme_init(void)
+{
+ int rc;
+
+ if (!sclp_ipl_info.has_dump)
+ return 0; /* LDIPL DUMP is not installed */
+ dump_block_nvme = (void *) get_zeroed_page(GFP_KERNEL);
+ if (!dump_block_nvme)
+ return -ENOMEM;
+ rc = sysfs_create_group(&dump_kset->kobj, &dump_nvme_attr_group);
+ if (rc) {
+ free_page((unsigned long)dump_block_nvme);
+ return rc;
+ }
+ dump_block_nvme->hdr.len = IPL_BP_NVME_LEN;
+ dump_block_nvme->hdr.version = IPL_PARM_BLOCK_VERSION;
+ dump_block_nvme->fcp.len = IPL_BP0_NVME_LEN;
+ dump_block_nvme->fcp.pbt = IPL_PBT_NVME;
+ dump_block_nvme->fcp.opt = IPL_PB0_NVME_OPT_DUMP;
+ dump_capabilities |= DUMP_TYPE_NVME;
+ return 0;
+}
+
static int __init dump_init(void)
{
int rc;
@@ -1524,6 +1625,9 @@ static int __init dump_init(void)
rc = dump_fcp_init();
if (rc)
return rc;
+ rc = dump_nvme_init();
+ if (rc)
+ return rc;
dump_set_type(DUMP_TYPE_NONE);
return 0;
}
@@ -1956,6 +2060,7 @@ void __init setup_ipl(void)
ipl_info.data.fcp.lun = ipl_block.fcp.lun;
break;
case IPL_TYPE_NVME:
+ case IPL_TYPE_NVME_DUMP:
ipl_info.data.nvme.fid = ipl_block.nvme.fid;
ipl_info.data.nvme.nsid = ipl_block.nvme.nsid;
break;
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index fc30e799bd84..aae24dc75df6 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -7,6 +7,7 @@
* s390 port, used ppc64 as template. Mike Grundy <grundym@us.ibm.com>
*/
+#include <linux/moduleloader.h>
#include <linux/kprobes.h>
#include <linux/ptrace.h>
#include <linux/preempt.h>
@@ -21,6 +22,7 @@
#include <asm/set_memory.h>
#include <asm/sections.h>
#include <asm/dis.h>
+#include "entry.h"
DEFINE_PER_CPU(struct kprobe *, current_kprobe);
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
@@ -30,19 +32,32 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = { };
DEFINE_INSN_CACHE_OPS(s390_insn);
static int insn_page_in_use;
-static char insn_page[PAGE_SIZE] __aligned(PAGE_SIZE);
+
+void *alloc_insn_page(void)
+{
+ void *page;
+
+ page = module_alloc(PAGE_SIZE);
+ if (!page)
+ return NULL;
+ __set_memory((unsigned long) page, 1, SET_MEMORY_RO | SET_MEMORY_X);
+ return page;
+}
+
+void free_insn_page(void *page)
+{
+ module_memfree(page);
+}
static void *alloc_s390_insn_page(void)
{
if (xchg(&insn_page_in_use, 1) == 1)
return NULL;
- set_memory_x((unsigned long) &insn_page, 1);
- return &insn_page;
+ return &kprobes_insn_page;
}
static void free_s390_insn_page(void *page)
{
- set_memory_nx((unsigned long) page, 1);
xchg(&insn_page_in_use, 0);
}
@@ -56,25 +71,29 @@ struct kprobe_insn_cache kprobe_s390_insn_slots = {
static void copy_instruction(struct kprobe *p)
{
+ kprobe_opcode_t insn[MAX_INSN_SIZE];
s64 disp, new_disp;
u64 addr, new_addr;
+ unsigned int len;
- memcpy(p->ainsn.insn, p->addr, insn_length(*p->addr >> 8));
- p->opcode = p->ainsn.insn[0];
- if (!probe_is_insn_relative_long(p->ainsn.insn))
- return;
- /*
- * For pc-relative instructions in RIL-b or RIL-c format patch the
- * RI2 displacement field. We have already made sure that the insn
- * slot for the patched instruction is within the same 2GB area
- * as the original instruction (either kernel image or module area).
- * Therefore the new displacement will always fit.
- */
- disp = *(s32 *)&p->ainsn.insn[1];
- addr = (u64)(unsigned long)p->addr;
- new_addr = (u64)(unsigned long)p->ainsn.insn;
- new_disp = ((addr + (disp * 2)) - new_addr) / 2;
- *(s32 *)&p->ainsn.insn[1] = new_disp;
+ len = insn_length(*p->addr >> 8);
+ memcpy(&insn, p->addr, len);
+ p->opcode = insn[0];
+ if (probe_is_insn_relative_long(&insn[0])) {
+ /*
+ * For pc-relative instructions in RIL-b or RIL-c format patch
+ * the RI2 displacement field. We have already made sure that
+ * the insn slot for the patched instruction is within the same
+ * 2GB area as the original instruction (either kernel image or
+ * module area). Therefore the new displacement will always fit.
+ */
+ disp = *(s32 *)&insn[1];
+ addr = (u64)(unsigned long)p->addr;
+ new_addr = (u64)(unsigned long)p->ainsn.insn;
+ new_disp = ((addr + (disp * 2)) - new_addr) / 2;
+ *(s32 *)&insn[1] = new_disp;
+ }
+ s390_kernel_write(p->ainsn.insn, &insn, len);
}
NOKPROBE_SYMBOL(copy_instruction);
diff --git a/arch/s390/kernel/kprobes_insn_page.S b/arch/s390/kernel/kprobes_insn_page.S
new file mode 100644
index 000000000000..f6cb022ef8c8
--- /dev/null
+++ b/arch/s390/kernel/kprobes_insn_page.S
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/linkage.h>
+
+/*
+ * insn_page is a special 4k aligned dummy function for kprobes.
+ * It will contain all kprobed instructions that are out-of-line executed.
+ * The page must be within the kernel image to guarantee that the
+ * out-of-line instructions are within 2GB distance of their original
+ * location. Using a dummy function ensures that the insn_page is within
+ * the text section of the kernel and mapped read-only/executable from
+ * the beginning on, thus avoiding to split large mappings if the page
+ * would be in the data section instead.
+ */
+ .section .kprobes.text, "ax"
+ .align 4096
+ENTRY(kprobes_insn_page)
+ .rept 2048
+ .word 0x07fe
+ .endr
+ENDPROC(kprobes_insn_page)
+ .previous
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index c64a95ae830f..4d843e64496f 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -102,7 +102,6 @@ struct mem_detect_info __bootdata(mem_detect);
struct exception_table_entry *__bootdata_preserved(__start_dma_ex_table);
struct exception_table_entry *__bootdata_preserved(__stop_dma_ex_table);
-unsigned long __bootdata_preserved(__swsusp_reset_dma);
unsigned long __bootdata_preserved(__stext_dma);
unsigned long __bootdata_preserved(__etext_dma);
unsigned long __bootdata_preserved(__sdma);
@@ -119,6 +118,7 @@ EXPORT_SYMBOL(VMALLOC_END);
struct page *vmemmap;
EXPORT_SYMBOL(vmemmap);
+unsigned long vmemmap_size;
unsigned long MODULES_VADDR;
unsigned long MODULES_END;
@@ -128,6 +128,12 @@ struct lowcore *lowcore_ptr[NR_CPUS];
EXPORT_SYMBOL(lowcore_ptr);
/*
+ * The Write Back bit position in the physaddr is given by the SLPC PCI.
+ * Leaving the mask zero always uses write through which is safe
+ */
+unsigned long mio_wb_bit_mask __ro_after_init;
+
+/*
* This is set up by the setup-routine at boot-time
* for S390 need to find out, what we have to setup
* using address 0x10400 ...
@@ -245,7 +251,7 @@ static void __init conmode_default(void)
#ifdef CONFIG_CRASH_DUMP
static void __init setup_zfcpdump(void)
{
- if (ipl_info.type != IPL_TYPE_FCP_DUMP)
+ if (!is_ipl_type_dump())
return;
if (OLDMEM_BASE)
return;
@@ -300,7 +306,7 @@ void machine_power_off(void)
void (*pm_power_off)(void) = machine_power_off;
EXPORT_SYMBOL_GPL(pm_power_off);
-void *restart_stack __section(.data);
+void *restart_stack;
unsigned long stack_alloc(void)
{
@@ -366,8 +372,12 @@ void __init arch_call_rest_init(void)
static void __init setup_lowcore_dat_off(void)
{
+ unsigned long int_psw_mask = PSW_KERNEL_BITS;
struct lowcore *lc;
+ if (IS_ENABLED(CONFIG_KASAN))
+ int_psw_mask |= PSW_MASK_DAT;
+
/*
* Setup lowcore for boot cpu
*/
@@ -379,15 +389,15 @@ static void __init setup_lowcore_dat_off(void)
lc->restart_psw.mask = PSW_KERNEL_BITS;
lc->restart_psw.addr = (unsigned long) restart_int_handler;
- lc->external_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
+ lc->external_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
lc->external_new_psw.addr = (unsigned long) ext_int_handler;
- lc->svc_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
+ lc->svc_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
lc->svc_new_psw.addr = (unsigned long) system_call;
- lc->program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
+ lc->program_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
lc->program_new_psw.addr = (unsigned long) pgm_check_handler;
lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler;
- lc->io_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
+ lc->io_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
lc->io_new_psw.addr = (unsigned long) io_int_handler;
lc->clock_comparator = clock_comparator_max;
lc->nodat_stack = ((unsigned long) &init_thread_union)
@@ -402,7 +412,6 @@ static void __init setup_lowcore_dat_off(void)
memcpy(lc->alt_stfle_fac_list, S390_lowcore.alt_stfle_fac_list,
sizeof(lc->alt_stfle_fac_list));
nmi_alloc_boot_cpu(lc);
- vdso_alloc_boot_cpu(lc);
lc->sync_enter_timer = S390_lowcore.sync_enter_timer;
lc->async_enter_timer = S390_lowcore.async_enter_timer;
lc->exit_timer = S390_lowcore.exit_timer;
@@ -552,22 +561,17 @@ static void __init setup_memory_end(void)
unsigned long vmax, tmp;
/* Choose kernel address space layout: 3 or 4 levels. */
- if (IS_ENABLED(CONFIG_KASAN)) {
- vmax = IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING)
- ? _REGION1_SIZE
- : _REGION2_SIZE;
- } else {
- tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE;
- tmp = tmp * (sizeof(struct page) + PAGE_SIZE);
- if (tmp + vmalloc_size + MODULES_LEN <= _REGION2_SIZE)
- vmax = _REGION2_SIZE; /* 3-level kernel page table */
- else
- vmax = _REGION1_SIZE; /* 4-level kernel page table */
- }
-
+ tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE;
+ tmp = tmp * (sizeof(struct page) + PAGE_SIZE);
+ if (tmp + vmalloc_size + MODULES_LEN <= _REGION2_SIZE)
+ vmax = _REGION2_SIZE; /* 3-level kernel page table */
+ else
+ vmax = _REGION1_SIZE; /* 4-level kernel page table */
if (is_prot_virt_host())
adjust_to_uv_max(&vmax);
-
+#ifdef CONFIG_KASAN
+ vmax = kasan_vmax;
+#endif
/* module area is at the end of the kernel address space. */
MODULES_END = vmax;
MODULES_VADDR = MODULES_END - MODULES_LEN;
@@ -586,9 +590,14 @@ static void __init setup_memory_end(void)
/* Take care that memory_end is set and <= vmemmap */
memory_end = min(memory_end ?: max_physmem_end, (unsigned long)vmemmap);
#ifdef CONFIG_KASAN
- /* fit in kasan shadow memory region between 1:1 and vmemmap */
memory_end = min(memory_end, KASAN_SHADOW_START);
- vmemmap = max(vmemmap, (struct page *)KASAN_SHADOW_END);
+#endif
+ vmemmap_size = SECTION_ALIGN_UP(memory_end / PAGE_SIZE) * sizeof(struct page);
+#ifdef CONFIG_KASAN
+ /* move vmemmap above kasan shadow only if stands in a way */
+ if (KASAN_SHADOW_END > (unsigned long)vmemmap &&
+ (unsigned long)vmemmap + vmemmap_size > KASAN_SHADOW_START)
+ vmemmap = max(vmemmap, (struct page *)KASAN_SHADOW_END);
#endif
max_pfn = max_low_pfn = PFN_DOWN(memory_end);
memblock_remove(memory_end, ULONG_MAX);
@@ -1133,8 +1142,7 @@ void __init setup_arch(char **cmdline_p)
free_mem_detect_info();
remove_oldmem();
- if (is_prot_virt_host())
- setup_uv();
+ setup_uv();
setup_memory_end();
setup_memory();
dma_contiguous_reserve(memory_end);
@@ -1178,7 +1186,7 @@ void __init setup_arch(char **cmdline_p)
if (IS_ENABLED(CONFIG_EXPOLINE))
nospec_init_branches();
- /* Setup zfcpdump support */
+ /* Setup zfcp/nvme dump support */
setup_zfcpdump();
/* Add system specific data to the random pool */
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 85700bd85f98..ebfe86d097f0 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -606,14 +606,14 @@ int smp_store_status(int cpu)
/*
* Collect CPU state of the previous, crashed system.
* There are four cases:
- * 1) standard zfcp dump
- * condition: OLDMEM_BASE == NULL && ipl_info.type == IPL_TYPE_FCP_DUMP
+ * 1) standard zfcp/nvme dump
+ * condition: OLDMEM_BASE == NULL && is_ipl_type_dump() == true
* The state for all CPUs except the boot CPU needs to be collected
* with sigp stop-and-store-status. The boot CPU state is located in
* the absolute lowcore of the memory stored in the HSA. The zcore code
* will copy the boot CPU state from the HSA.
- * 2) stand-alone kdump for SCSI (zfcp dump with swapped memory)
- * condition: OLDMEM_BASE != NULL && ipl_info.type == IPL_TYPE_FCP_DUMP
+ * 2) stand-alone kdump for SCSI/NVMe (zfcp/nvme dump with swapped memory)
+ * condition: OLDMEM_BASE != NULL && is_ipl_type_dump() == true
* The state for all CPUs except the boot CPU needs to be collected
* with sigp stop-and-store-status. The firmware or the boot-loader
* stored the registers of the boot CPU in the absolute lowcore in the
@@ -660,7 +660,7 @@ void __init smp_save_dump_cpus(void)
unsigned long page;
bool is_boot_cpu;
- if (!(OLDMEM_BASE || ipl_info.type == IPL_TYPE_FCP_DUMP))
+ if (!(OLDMEM_BASE || is_ipl_type_dump()))
/* No previous system present, normal boot. */
return;
/* Allocate a page as dumping area for the store status sigps */
@@ -686,7 +686,7 @@ void __init smp_save_dump_cpus(void)
/* Get the vector registers */
smp_save_cpu_vxrs(sa, addr, is_boot_cpu, page);
/*
- * For a zfcp dump OLDMEM_BASE == NULL and the registers
+ * For a zfcp/nvme dump OLDMEM_BASE == NULL and the registers
* of the boot CPU are stored in the HSA. To retrieve
* these registers an SCLP request is required which is
* done by drivers/s390/char/zcore.c:init_cpu_info()
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 1c3b48165e86..28c168000483 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -442,3 +442,4 @@
437 common openat2 sys_openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2 sys_faccessat2
+440 common process_madvise sys_process_madvise sys_process_madvise
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 513e59d08a55..0ac30ee2c633 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -41,6 +41,9 @@
#include <linux/gfp.h>
#include <linux/kprobes.h>
#include <linux/uaccess.h>
+#include <vdso/vsyscall.h>
+#include <vdso/clocksource.h>
+#include <vdso/helpers.h>
#include <asm/facility.h>
#include <asm/delay.h>
#include <asm/div64.h>
@@ -84,7 +87,7 @@ void __init time_early_init(void)
/* Initialize TOD steering parameters */
tod_steering_end = *(unsigned long long *) &tod_clock_base[1];
- vdso_data->ts_end = tod_steering_end;
+ vdso_data->arch_data.tod_steering_end = tod_steering_end;
if (!test_facility(28))
return;
@@ -257,6 +260,7 @@ static struct clocksource clocksource_tod = {
.mult = 1000,
.shift = 12,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
+ .vdso_clock_mode = VDSO_CLOCKMODE_TOD,
};
struct clocksource * __init clocksource_default_clock(void)
@@ -264,56 +268,6 @@ struct clocksource * __init clocksource_default_clock(void)
return &clocksource_tod;
}
-void update_vsyscall(struct timekeeper *tk)
-{
- u64 nsecps;
-
- if (tk->tkr_mono.clock != &clocksource_tod)
- return;
-
- /* Make userspace gettimeofday spin until we're done. */
- ++vdso_data->tb_update_count;
- smp_wmb();
- vdso_data->xtime_tod_stamp = tk->tkr_mono.cycle_last;
- vdso_data->xtime_clock_sec = tk->xtime_sec;
- vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
- vdso_data->wtom_clock_sec =
- tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
- vdso_data->wtom_clock_nsec = tk->tkr_mono.xtime_nsec +
- + ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift);
- nsecps = (u64) NSEC_PER_SEC << tk->tkr_mono.shift;
- while (vdso_data->wtom_clock_nsec >= nsecps) {
- vdso_data->wtom_clock_nsec -= nsecps;
- vdso_data->wtom_clock_sec++;
- }
-
- vdso_data->xtime_coarse_sec = tk->xtime_sec;
- vdso_data->xtime_coarse_nsec =
- (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
- vdso_data->wtom_coarse_sec =
- vdso_data->xtime_coarse_sec + tk->wall_to_monotonic.tv_sec;
- vdso_data->wtom_coarse_nsec =
- vdso_data->xtime_coarse_nsec + tk->wall_to_monotonic.tv_nsec;
- while (vdso_data->wtom_coarse_nsec >= NSEC_PER_SEC) {
- vdso_data->wtom_coarse_nsec -= NSEC_PER_SEC;
- vdso_data->wtom_coarse_sec++;
- }
-
- vdso_data->tk_mult = tk->tkr_mono.mult;
- vdso_data->tk_shift = tk->tkr_mono.shift;
- vdso_data->hrtimer_res = hrtimer_resolution;
- smp_wmb();
- ++vdso_data->tb_update_count;
-}
-
-extern struct timezone sys_tz;
-
-void update_vsyscall_tz(void)
-{
- vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
- vdso_data->tz_dsttime = sys_tz.tz_dsttime;
-}
-
/*
* Initialize the TOD clock and the CPU timer of
* the boot cpu.
@@ -342,11 +296,12 @@ void __init time_init(void)
}
static DEFINE_PER_CPU(atomic_t, clock_sync_word);
-static DEFINE_MUTEX(clock_sync_mutex);
+static DEFINE_MUTEX(stp_mutex);
static unsigned long clock_sync_flags;
-#define CLOCK_SYNC_HAS_STP 0
-#define CLOCK_SYNC_STP 1
+#define CLOCK_SYNC_HAS_STP 0
+#define CLOCK_SYNC_STP 1
+#define CLOCK_SYNC_STPINFO_VALID 2
/*
* The get_clock function for the physical clock. It will get the current
@@ -431,7 +386,6 @@ static void clock_sync_global(unsigned long long delta)
/* Epoch overflow */
tod_clock_base[0]++;
/* Adjust TOD steering parameters. */
- vdso_data->tb_update_count++;
now = get_tod_clock();
adj = tod_steering_end - now;
if (unlikely((s64) adj >= 0))
@@ -443,9 +397,8 @@ static void clock_sync_global(unsigned long long delta)
panic("TOD clock sync offset %lli is too large to drift\n",
tod_steering_delta);
tod_steering_end = now + (abs(tod_steering_delta) << 15);
- vdso_data->ts_dir = (tod_steering_delta < 0) ? 0 : 1;
- vdso_data->ts_end = tod_steering_end;
- vdso_data->tb_update_count++;
+ vdso_data->arch_data.tod_steering_end = tod_steering_end;
+
/* Update LPAR offset. */
if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0)
lpar_offset = qto.tod_epoch_difference;
@@ -492,7 +445,6 @@ static struct stp_sstpi stp_info;
static void *stp_page;
static void stp_work_fn(struct work_struct *work);
-static DEFINE_MUTEX(stp_work_mutex);
static DECLARE_WORK(stp_work, stp_work_fn);
static struct timer_list stp_timer;
@@ -583,10 +535,26 @@ void stp_queue_work(void)
queue_work(time_sync_wq, &stp_work);
}
+static int __store_stpinfo(void)
+{
+ int rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi));
+
+ if (rc)
+ clear_bit(CLOCK_SYNC_STPINFO_VALID, &clock_sync_flags);
+ else
+ set_bit(CLOCK_SYNC_STPINFO_VALID, &clock_sync_flags);
+ return rc;
+}
+
+static int stpinfo_valid(void)
+{
+ return stp_online && test_bit(CLOCK_SYNC_STPINFO_VALID, &clock_sync_flags);
+}
+
static int stp_sync_clock(void *data)
{
struct clock_sync_data *sync = data;
- unsigned long long clock_delta;
+ unsigned long long clock_delta, flags;
static int first;
int rc;
@@ -599,16 +567,17 @@ static int stp_sync_clock(void *data)
if (stp_info.todoff[0] || stp_info.todoff[1] ||
stp_info.todoff[2] || stp_info.todoff[3] ||
stp_info.tmd != 2) {
+ flags = vdso_update_begin();
rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0,
&clock_delta);
if (rc == 0) {
sync->clock_delta = clock_delta;
clock_sync_global(clock_delta);
- rc = chsc_sstpi(stp_page, &stp_info,
- sizeof(struct stp_sstpi));
+ rc = __store_stpinfo();
if (rc == 0 && stp_info.tmd != 2)
rc = -EAGAIN;
}
+ vdso_update_end(flags);
}
sync->in_sync = rc ? -EAGAIN : 1;
xchg(&first, 0);
@@ -628,6 +597,81 @@ static int stp_sync_clock(void *data)
return 0;
}
+static int stp_clear_leap(void)
+{
+ struct __kernel_timex txc;
+ int ret;
+
+ memset(&txc, 0, sizeof(txc));
+
+ ret = do_adjtimex(&txc);
+ if (ret < 0)
+ return ret;
+
+ txc.modes = ADJ_STATUS;
+ txc.status &= ~(STA_INS|STA_DEL);
+ return do_adjtimex(&txc);
+}
+
+static void stp_check_leap(void)
+{
+ struct stp_stzi stzi;
+ struct stp_lsoib *lsoib = &stzi.lsoib;
+ struct __kernel_timex txc;
+ int64_t timediff;
+ int leapdiff, ret;
+
+ if (!stp_info.lu || !check_sync_clock()) {
+ /*
+ * Either a scheduled leap second was removed by the operator,
+ * or STP is out of sync. In both cases, clear the leap second
+ * kernel flags.
+ */
+ if (stp_clear_leap() < 0)
+ pr_err("failed to clear leap second flags\n");
+ return;
+ }
+
+ if (chsc_stzi(stp_page, &stzi, sizeof(stzi))) {
+ pr_err("stzi failed\n");
+ return;
+ }
+
+ timediff = tod_to_ns(lsoib->nlsout - get_tod_clock()) / NSEC_PER_SEC;
+ leapdiff = lsoib->nlso - lsoib->also;
+
+ if (leapdiff != 1 && leapdiff != -1) {
+ pr_err("Cannot schedule %d leap seconds\n", leapdiff);
+ return;
+ }
+
+ if (timediff < 0) {
+ if (stp_clear_leap() < 0)
+ pr_err("failed to clear leap second flags\n");
+ } else if (timediff < 7200) {
+ memset(&txc, 0, sizeof(txc));
+ ret = do_adjtimex(&txc);
+ if (ret < 0)
+ return;
+
+ txc.modes = ADJ_STATUS;
+ if (leapdiff > 0)
+ txc.status |= STA_INS;
+ else
+ txc.status |= STA_DEL;
+ ret = do_adjtimex(&txc);
+ if (ret < 0)
+ pr_err("failed to set leap second flags\n");
+ /* arm Timer to clear leap second flags */
+ mod_timer(&stp_timer, jiffies + msecs_to_jiffies(14400 * MSEC_PER_SEC));
+ } else {
+ /* The day the leap second is scheduled for hasn't been reached. Retry
+ * in one hour.
+ */
+ mod_timer(&stp_timer, jiffies + msecs_to_jiffies(3600 * MSEC_PER_SEC));
+ }
+}
+
/*
* STP work. Check for the STP state and take over the clock
* synchronization if the STP clock source is usable.
@@ -638,7 +682,7 @@ static void stp_work_fn(struct work_struct *work)
int rc;
/* prevent multiple execution. */
- mutex_lock(&stp_work_mutex);
+ mutex_lock(&stp_mutex);
if (!stp_online) {
chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL);
@@ -646,23 +690,22 @@ static void stp_work_fn(struct work_struct *work)
goto out_unlock;
}
- rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0, NULL);
+ rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xf0e0, NULL);
if (rc)
goto out_unlock;
- rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi));
+ rc = __store_stpinfo();
if (rc || stp_info.c == 0)
goto out_unlock;
/* Skip synchronization if the clock is already in sync. */
- if (check_sync_clock())
- goto out_unlock;
-
- memset(&stp_sync, 0, sizeof(stp_sync));
- cpus_read_lock();
- atomic_set(&stp_sync.cpus, num_online_cpus() - 1);
- stop_machine_cpuslocked(stp_sync_clock, &stp_sync, cpu_online_mask);
- cpus_read_unlock();
+ if (!check_sync_clock()) {
+ memset(&stp_sync, 0, sizeof(stp_sync));
+ cpus_read_lock();
+ atomic_set(&stp_sync.cpus, num_online_cpus() - 1);
+ stop_machine_cpuslocked(stp_sync_clock, &stp_sync, cpu_online_mask);
+ cpus_read_unlock();
+ }
if (!check_sync_clock())
/*
@@ -670,9 +713,11 @@ static void stp_work_fn(struct work_struct *work)
* Retry after a second.
*/
mod_timer(&stp_timer, jiffies + msecs_to_jiffies(MSEC_PER_SEC));
+ else if (stp_info.lu)
+ stp_check_leap();
out_unlock:
- mutex_unlock(&stp_work_mutex);
+ mutex_unlock(&stp_mutex);
}
/*
@@ -687,10 +732,14 @@ static ssize_t ctn_id_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- if (!stp_online)
- return -ENODATA;
- return sprintf(buf, "%016llx\n",
- *(unsigned long long *) stp_info.ctnid);
+ ssize_t ret = -ENODATA;
+
+ mutex_lock(&stp_mutex);
+ if (stpinfo_valid())
+ ret = sprintf(buf, "%016llx\n",
+ *(unsigned long long *) stp_info.ctnid);
+ mutex_unlock(&stp_mutex);
+ return ret;
}
static DEVICE_ATTR_RO(ctn_id);
@@ -699,9 +748,13 @@ static ssize_t ctn_type_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- if (!stp_online)
- return -ENODATA;
- return sprintf(buf, "%i\n", stp_info.ctn);
+ ssize_t ret = -ENODATA;
+
+ mutex_lock(&stp_mutex);
+ if (stpinfo_valid())
+ ret = sprintf(buf, "%i\n", stp_info.ctn);
+ mutex_unlock(&stp_mutex);
+ return ret;
}
static DEVICE_ATTR_RO(ctn_type);
@@ -710,9 +763,13 @@ static ssize_t dst_offset_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- if (!stp_online || !(stp_info.vbits & 0x2000))
- return -ENODATA;
- return sprintf(buf, "%i\n", (int)(s16) stp_info.dsto);
+ ssize_t ret = -ENODATA;
+
+ mutex_lock(&stp_mutex);
+ if (stpinfo_valid() && (stp_info.vbits & 0x2000))
+ ret = sprintf(buf, "%i\n", (int)(s16) stp_info.dsto);
+ mutex_unlock(&stp_mutex);
+ return ret;
}
static DEVICE_ATTR_RO(dst_offset);
@@ -721,20 +778,56 @@ static ssize_t leap_seconds_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- if (!stp_online || !(stp_info.vbits & 0x8000))
- return -ENODATA;
- return sprintf(buf, "%i\n", (int)(s16) stp_info.leaps);
+ ssize_t ret = -ENODATA;
+
+ mutex_lock(&stp_mutex);
+ if (stpinfo_valid() && (stp_info.vbits & 0x8000))
+ ret = sprintf(buf, "%i\n", (int)(s16) stp_info.leaps);
+ mutex_unlock(&stp_mutex);
+ return ret;
}
static DEVICE_ATTR_RO(leap_seconds);
+static ssize_t leap_seconds_scheduled_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct stp_stzi stzi;
+ ssize_t ret;
+
+ mutex_lock(&stp_mutex);
+ if (!stpinfo_valid() || !(stp_info.vbits & 0x8000) || !stp_info.lu) {
+ mutex_unlock(&stp_mutex);
+ return -ENODATA;
+ }
+
+ ret = chsc_stzi(stp_page, &stzi, sizeof(stzi));
+ mutex_unlock(&stp_mutex);
+ if (ret < 0)
+ return ret;
+
+ if (!stzi.lsoib.p)
+ return sprintf(buf, "0,0\n");
+
+ return sprintf(buf, "%llu,%d\n",
+ tod_to_ns(stzi.lsoib.nlsout - TOD_UNIX_EPOCH) / NSEC_PER_SEC,
+ stzi.lsoib.nlso - stzi.lsoib.also);
+}
+
+static DEVICE_ATTR_RO(leap_seconds_scheduled);
+
static ssize_t stratum_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- if (!stp_online)
- return -ENODATA;
- return sprintf(buf, "%i\n", (int)(s16) stp_info.stratum);
+ ssize_t ret = -ENODATA;
+
+ mutex_lock(&stp_mutex);
+ if (stpinfo_valid())
+ ret = sprintf(buf, "%i\n", (int)(s16) stp_info.stratum);
+ mutex_unlock(&stp_mutex);
+ return ret;
}
static DEVICE_ATTR_RO(stratum);
@@ -743,9 +836,13 @@ static ssize_t time_offset_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- if (!stp_online || !(stp_info.vbits & 0x0800))
- return -ENODATA;
- return sprintf(buf, "%i\n", (int) stp_info.tto);
+ ssize_t ret = -ENODATA;
+
+ mutex_lock(&stp_mutex);
+ if (stpinfo_valid() && (stp_info.vbits & 0x0800))
+ ret = sprintf(buf, "%i\n", (int) stp_info.tto);
+ mutex_unlock(&stp_mutex);
+ return ret;
}
static DEVICE_ATTR_RO(time_offset);
@@ -754,9 +851,13 @@ static ssize_t time_zone_offset_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- if (!stp_online || !(stp_info.vbits & 0x4000))
- return -ENODATA;
- return sprintf(buf, "%i\n", (int)(s16) stp_info.tzo);
+ ssize_t ret = -ENODATA;
+
+ mutex_lock(&stp_mutex);
+ if (stpinfo_valid() && (stp_info.vbits & 0x4000))
+ ret = sprintf(buf, "%i\n", (int)(s16) stp_info.tzo);
+ mutex_unlock(&stp_mutex);
+ return ret;
}
static DEVICE_ATTR_RO(time_zone_offset);
@@ -765,9 +866,13 @@ static ssize_t timing_mode_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- if (!stp_online)
- return -ENODATA;
- return sprintf(buf, "%i\n", stp_info.tmd);
+ ssize_t ret = -ENODATA;
+
+ mutex_lock(&stp_mutex);
+ if (stpinfo_valid())
+ ret = sprintf(buf, "%i\n", stp_info.tmd);
+ mutex_unlock(&stp_mutex);
+ return ret;
}
static DEVICE_ATTR_RO(timing_mode);
@@ -776,9 +881,13 @@ static ssize_t timing_state_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- if (!stp_online)
- return -ENODATA;
- return sprintf(buf, "%i\n", stp_info.tst);
+ ssize_t ret = -ENODATA;
+
+ mutex_lock(&stp_mutex);
+ if (stpinfo_valid())
+ ret = sprintf(buf, "%i\n", stp_info.tst);
+ mutex_unlock(&stp_mutex);
+ return ret;
}
static DEVICE_ATTR_RO(timing_state);
@@ -801,14 +910,14 @@ static ssize_t online_store(struct device *dev,
return -EINVAL;
if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
return -EOPNOTSUPP;
- mutex_lock(&clock_sync_mutex);
+ mutex_lock(&stp_mutex);
stp_online = value;
if (stp_online)
set_bit(CLOCK_SYNC_STP, &clock_sync_flags);
else
clear_bit(CLOCK_SYNC_STP, &clock_sync_flags);
queue_work(time_sync_wq, &stp_work);
- mutex_unlock(&clock_sync_mutex);
+ mutex_unlock(&stp_mutex);
return count;
}
@@ -824,6 +933,7 @@ static struct device_attribute *stp_attributes[] = {
&dev_attr_dst_offset,
&dev_attr_leap_seconds,
&dev_attr_online,
+ &dev_attr_leap_seconds_scheduled,
&dev_attr_stratum,
&dev_attr_time_offset,
&dev_attr_time_zone_offset,
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index c296e5c8dbf9..14bd9d58edc9 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -26,33 +26,10 @@ int __bootdata_preserved(prot_virt_guest);
struct uv_info __bootdata_preserved(uv_info);
#if IS_ENABLED(CONFIG_KVM)
-int prot_virt_host;
+int __bootdata_preserved(prot_virt_host);
EXPORT_SYMBOL(prot_virt_host);
EXPORT_SYMBOL(uv_info);
-static int __init prot_virt_setup(char *val)
-{
- bool enabled;
- int rc;
-
- rc = kstrtobool(val, &enabled);
- if (!rc && enabled)
- prot_virt_host = 1;
-
- if (is_prot_virt_guest() && prot_virt_host) {
- prot_virt_host = 0;
- pr_warn("Protected virtualization not available in protected guests.");
- }
-
- if (prot_virt_host && !test_facility(158)) {
- prot_virt_host = 0;
- pr_warn("Protected virtualization not supported by the hardware.");
- }
-
- return rc;
-}
-early_param("prot_virt", prot_virt_setup);
-
static int __init uv_init(unsigned long stor_base, unsigned long stor_len)
{
struct uv_cb_init uvcb = {
@@ -74,6 +51,24 @@ void __init setup_uv(void)
{
unsigned long uv_stor_base;
+ /*
+ * keep these conditions in line with kasan init code has_uv_sec_stor_limit()
+ */
+ if (!is_prot_virt_host())
+ return;
+
+ if (is_prot_virt_guest()) {
+ prot_virt_host = 0;
+ pr_warn("Protected virtualization not available in protected guests.");
+ return;
+ }
+
+ if (!test_facility(158)) {
+ prot_virt_host = 0;
+ pr_warn("Protected virtualization not supported by the hardware.");
+ return;
+ }
+
uv_stor_base = (unsigned long)memblock_alloc_try_nid(
uv_info.uv_base_stor_len, SZ_1M, SZ_2G,
MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE);
@@ -98,7 +93,8 @@ fail:
void adjust_to_uv_max(unsigned long *vmax)
{
- *vmax = min_t(unsigned long, *vmax, uv_info.max_sec_stor_addr);
+ if (uv_info.max_sec_stor_addr)
+ *vmax = min_t(unsigned long, *vmax, uv_info.max_sec_stor_addr);
}
/*
@@ -119,6 +115,26 @@ static int uv_pin_shared(unsigned long paddr)
}
/*
+ * Requests the Ultravisor to destroy a guest page and make it
+ * accessible to the host. The destroy clears the page instead of
+ * exporting.
+ *
+ * @paddr: Absolute host address of page to be destroyed
+ */
+int uv_destroy_page(unsigned long paddr)
+{
+ struct uv_cb_cfs uvcb = {
+ .header.cmd = UVC_CMD_DESTR_SEC_STOR,
+ .header.len = sizeof(uvcb),
+ .paddr = paddr
+ };
+
+ if (uv_call(0, (u64)&uvcb))
+ return -EINVAL;
+ return 0;
+}
+
+/*
* Requests the Ultravisor to encrypt a guest page and make it
* accessible to the host for paging (export).
*
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index c4baefaa6e34..f9da5b149141 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -20,6 +20,8 @@
#include <linux/security.h>
#include <linux/memblock.h>
#include <linux/compat.h>
+#include <linux/binfmts.h>
+#include <vdso/datapage.h>
#include <asm/asm-offsets.h>
#include <asm/processor.h>
#include <asm/mmu.h>
@@ -96,35 +98,12 @@ static union {
struct vdso_data data;
u8 page[PAGE_SIZE];
} vdso_data_store __page_aligned_data;
-struct vdso_data *vdso_data = &vdso_data_store.data;
-
-/*
- * Setup vdso data page.
- */
-static void __init vdso_init_data(struct vdso_data *vd)
-{
- vd->ectg_available = test_facility(31);
-}
-
+struct vdso_data *vdso_data = (struct vdso_data *)&vdso_data_store.data;
/*
* Allocate/free per cpu vdso data.
*/
#define SEGMENT_ORDER 2
-/*
- * The initial vdso_data structure for the boot CPU. Eventually
- * it is replaced with a properly allocated structure in vdso_init.
- * This is necessary because a valid S390_lowcore.vdso_per_cpu_data
- * pointer is required to be able to return from an interrupt or
- * program check. See the exit paths in entry.S.
- */
-struct vdso_data boot_vdso_data __initdata;
-
-void __init vdso_alloc_boot_cpu(struct lowcore *lowcore)
-{
- lowcore->vdso_per_cpu_data = (unsigned long) &boot_vdso_data;
-}
-
int vdso_alloc_per_cpu(struct lowcore *lowcore)
{
unsigned long segment_table, page_table, page_frame;
@@ -246,8 +225,6 @@ static int __init vdso_init(void)
{
int i;
- vdso_init_data(vdso_data);
-
/* Calculate the size of the 64 bit vDSO */
vdso64_pages = ((&vdso64_end - &vdso64_start
+ PAGE_SIZE - 1) >> PAGE_SHIFT) + 1;
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index 4a66a1cb919b..3d3303283181 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -1,17 +1,23 @@
# SPDX-License-Identifier: GPL-2.0
-# List of files in the vdso, has to be asm only for now
+# List of files in the vdso
KCOV_INSTRUMENT := n
+ARCH_REL_TYPE_ABS := R_390_COPY|R_390_GLOB_DAT|R_390_JMP_SLOT|R_390_RELATIVE
+ARCH_REL_TYPE_ABS += R_390_GOT|R_390_PLT
-obj-vdso64 = gettimeofday.o clock_getres.o clock_gettime.o note.o getcpu.o
+include $(srctree)/lib/vdso/Makefile
+obj-vdso64 = vdso_user_wrapper.o note.o getcpu.o
+obj-cvdso64 = vdso64_generic.o
+CFLAGS_REMOVE_vdso64_generic.o = -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE)
# Build rules
-targets := $(obj-vdso64) vdso64.so vdso64.so.dbg
+targets := $(obj-vdso64) $(obj-cvdso64) vdso64.so vdso64.so.dbg
obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
+obj-cvdso64 := $(addprefix $(obj)/, $(obj-cvdso64))
KBUILD_AFLAGS += -DBUILD_VDSO
-KBUILD_CFLAGS += -DBUILD_VDSO
+KBUILD_CFLAGS += -DBUILD_VDSO -DDISABLE_BRANCH_PROFILING
KBUILD_AFLAGS_64 := $(filter-out -m64,$(KBUILD_AFLAGS))
KBUILD_AFLAGS_64 += -m64 -s
@@ -37,7 +43,7 @@ KASAN_SANITIZE := n
$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
# link rule for the .so file, .lds has to be first
-$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) FORCE
+$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) $(obj-cvdso64) FORCE
$(call if_changed,ld)
# strip rule for the .so file
@@ -49,9 +55,14 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
$(obj-vdso64): %.o: %.S FORCE
$(call if_changed_dep,vdso64as)
+$(obj-cvdso64): %.o: %.c FORCE
+ $(call if_changed_dep,vdso64cc)
+
# actual build commands
quiet_cmd_vdso64as = VDSO64A $@
cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $<
+quiet_cmd_vdso64cc = VDSO64C $@
+ cmd_vdso64cc = $(CC) $(c_flags) -c -o $@ $<
# install commands for the unstripped file
quiet_cmd_vdso_install = INSTALL $@
diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S
deleted file mode 100644
index 0c79caa32b59..000000000000
--- a/arch/s390/kernel/vdso64/clock_getres.S
+++ /dev/null
@@ -1,50 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Userland implementation of clock_getres() for 64 bits processes in a
- * s390 kernel for use in the vDSO
- *
- * Copyright IBM Corp. 2008
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/dwarf.h>
-
- .text
- .align 4
- .globl __kernel_clock_getres
- .type __kernel_clock_getres,@function
-__kernel_clock_getres:
- CFI_STARTPROC
- larl %r1,3f
- lg %r0,0(%r1)
- cghi %r2,__CLOCK_REALTIME_COARSE
- je 0f
- cghi %r2,__CLOCK_MONOTONIC_COARSE
- je 0f
- larl %r1,_vdso_data
- llgf %r0,__VDSO_CLOCK_REALTIME_RES(%r1)
- cghi %r2,__CLOCK_REALTIME
- je 0f
- cghi %r2,__CLOCK_MONOTONIC
- je 0f
- cghi %r2,__CLOCK_THREAD_CPUTIME_ID
- je 0f
- cghi %r2,-2 /* Per-thread CPUCLOCK with PID=0, VIRT=1 */
- jne 2f
- larl %r5,_vdso_data
- icm %r0,15,__LC_ECTG_OK(%r5)
- jz 2f
-0: ltgr %r3,%r3
- jz 1f /* res == NULL */
- xc 0(8,%r3),0(%r3) /* set tp->tv_sec to zero */
- stg %r0,8(%r3) /* store tp->tv_usec */
-1: lghi %r2,0
- br %r14
-2: lghi %r1,__NR_clock_getres /* fallback to svc */
- svc 0
- br %r14
- CFI_ENDPROC
-3: .quad __CLOCK_COARSE_RES
- .size __kernel_clock_getres,.-__kernel_clock_getres
diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S
deleted file mode 100644
index 9d2ee79b90f2..000000000000
--- a/arch/s390/kernel/vdso64/clock_gettime.S
+++ /dev/null
@@ -1,163 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Userland implementation of clock_gettime() for 64 bits processes in a
- * s390 kernel for use in the vDSO
- *
- * Copyright IBM Corp. 2008
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/dwarf.h>
-#include <asm/ptrace.h>
-
- .text
- .align 4
- .globl __kernel_clock_gettime
- .type __kernel_clock_gettime,@function
-__kernel_clock_gettime:
- CFI_STARTPROC
- aghi %r15,-16
- CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
- CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
- larl %r5,_vdso_data
- cghi %r2,__CLOCK_REALTIME_COARSE
- je 4f
- cghi %r2,__CLOCK_REALTIME
- je 5f
- cghi %r2,-3 /* Per-thread CPUCLOCK with PID=0, VIRT=1 */
- je 9f
- cghi %r2,__CLOCK_MONOTONIC_COARSE
- je 3f
- cghi %r2,__CLOCK_MONOTONIC
- jne 12f
-
- /* CLOCK_MONOTONIC */
-0: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */
- tmll %r4,0x0001 /* pending update ? loop */
- jnz 0b
- stcke 0(%r15) /* Store TOD clock */
- lgf %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
- lg %r0,__VDSO_WTOM_SEC(%r5)
- lg %r1,1(%r15)
- sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
- msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */
- alg %r1,__VDSO_WTOM_NSEC(%r5)
- srlg %r1,%r1,0(%r2) /* >> tk->shift */
- clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
- jne 0b
- larl %r5,13f
-1: clg %r1,0(%r5)
- jl 2f
- slg %r1,0(%r5)
- aghi %r0,1
- j 1b
-2: stg %r0,0(%r3) /* store tp->tv_sec */
- stg %r1,8(%r3) /* store tp->tv_nsec */
- lghi %r2,0
- aghi %r15,16
- CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
- CFI_RESTORE 15
- br %r14
-
- /* CLOCK_MONOTONIC_COARSE */
- CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
- CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
-3: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */
- tmll %r4,0x0001 /* pending update ? loop */
- jnz 3b
- lg %r0,__VDSO_WTOM_CRS_SEC(%r5)
- lg %r1,__VDSO_WTOM_CRS_NSEC(%r5)
- clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
- jne 3b
- j 2b
-
- /* CLOCK_REALTIME_COARSE */
-4: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */
- tmll %r4,0x0001 /* pending update ? loop */
- jnz 4b
- lg %r0,__VDSO_XTIME_CRS_SEC(%r5)
- lg %r1,__VDSO_XTIME_CRS_NSEC(%r5)
- clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
- jne 4b
- j 7f
-
- /* CLOCK_REALTIME */
-5: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */
- tmll %r4,0x0001 /* pending update ? loop */
- jnz 5b
- stcke 0(%r15) /* Store TOD clock */
- lg %r1,1(%r15)
- lg %r0,__VDSO_TS_END(%r5) /* TOD steering end time */
- slgr %r0,%r1 /* now - ts_steering_end */
- ltgr %r0,%r0 /* past end of steering ? */
- jm 17f
- srlg %r0,%r0,15 /* 1 per 2^16 */
- tm __VDSO_TS_DIR+3(%r5),0x01 /* steering direction? */
- jz 18f
- lcgr %r0,%r0 /* negative TOD offset */
-18: algr %r1,%r0 /* add steering offset */
-17: lgf %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
- sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
- msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */
- alg %r1,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */
- srlg %r1,%r1,0(%r2) /* >> tk->shift */
- lg %r0,__VDSO_XTIME_SEC(%r5) /* tk->xtime_sec */
- clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
- jne 5b
- larl %r5,13f
-6: clg %r1,0(%r5)
- jl 7f
- slg %r1,0(%r5)
- aghi %r0,1
- j 6b
-7: stg %r0,0(%r3) /* store tp->tv_sec */
- stg %r1,8(%r3) /* store tp->tv_nsec */
- lghi %r2,0
- aghi %r15,16
- CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
- CFI_RESTORE 15
- br %r14
-
- /* CPUCLOCK_VIRT for this thread */
- CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
- CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
-9: lghi %r4,0
- icm %r0,15,__VDSO_ECTG_OK(%r5)
- jz 12f
- sacf 256 /* Magic ectg instruction */
- .insn ssf,0xc80100000000,__VDSO_ECTG_BASE(4),__VDSO_ECTG_USER(4),4
- sacf 0
- algr %r1,%r0 /* r1 = cputime as TOD value */
- mghi %r1,1000 /* convert to nanoseconds */
- srlg %r1,%r1,12 /* r1 = cputime in nanosec */
- lgr %r4,%r1
- larl %r5,13f
- srlg %r1,%r1,9 /* divide by 1000000000 */
- mlg %r0,8(%r5)
- srlg %r0,%r0,11 /* r0 = tv_sec */
- stg %r0,0(%r3)
- msg %r0,0(%r5) /* calculate tv_nsec */
- slgr %r4,%r0 /* r4 = tv_nsec */
- stg %r4,8(%r3)
- lghi %r2,0
- aghi %r15,16
- CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
- CFI_RESTORE 15
- br %r14
-
- /* Fallback to system call */
- CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
- CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
-12: lghi %r1,__NR_clock_gettime
- svc 0
- aghi %r15,16
- CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
- CFI_RESTORE 15
- br %r14
- CFI_ENDPROC
-
-13: .quad 1000000000
-14: .quad 19342813113834067
- .size __kernel_clock_gettime,.-__kernel_clock_gettime
diff --git a/arch/s390/kernel/vdso64/gettimeofday.S b/arch/s390/kernel/vdso64/gettimeofday.S
deleted file mode 100644
index aebe10dc7c99..000000000000
--- a/arch/s390/kernel/vdso64/gettimeofday.S
+++ /dev/null
@@ -1,71 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Userland implementation of gettimeofday() for 64 bits processes in a
- * s390 kernel for use in the vDSO
- *
- * Copyright IBM Corp. 2008
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/dwarf.h>
-#include <asm/ptrace.h>
-
- .text
- .align 4
- .globl __kernel_gettimeofday
- .type __kernel_gettimeofday,@function
-__kernel_gettimeofday:
- CFI_STARTPROC
- aghi %r15,-16
- CFI_ADJUST_CFA_OFFSET 16
- CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
- larl %r5,_vdso_data
-0: ltgr %r3,%r3 /* check if tz is NULL */
- je 1f
- mvc 0(8,%r3),__VDSO_TIMEZONE(%r5)
-1: ltgr %r2,%r2 /* check if tv is NULL */
- je 4f
- lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */
- tmll %r4,0x0001 /* pending update ? loop */
- jnz 0b
- stcke 0(%r15) /* Store TOD clock */
- lg %r1,1(%r15)
- lg %r0,__VDSO_TS_END(%r5) /* TOD steering end time */
- slgr %r0,%r1 /* now - ts_steering_end */
- ltgr %r0,%r0 /* past end of steering ? */
- jm 6f
- srlg %r0,%r0,15 /* 1 per 2^16 */
- tm __VDSO_TS_DIR+3(%r5),0x01 /* steering direction? */
- jz 7f
- lcgr %r0,%r0 /* negative TOD offset */
-7: algr %r1,%r0 /* add steering offset */
-6: sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
- msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */
- alg %r1,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */
- lg %r0,__VDSO_XTIME_SEC(%r5) /* tk->xtime_sec */
- clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
- jne 0b
- lgf %r5,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
- srlg %r1,%r1,0(%r5) /* >> tk->shift */
- larl %r5,5f
-2: clg %r1,0(%r5)
- jl 3f
- slg %r1,0(%r5)
- aghi %r0,1
- j 2b
-3: stg %r0,0(%r2) /* store tv->tv_sec */
- slgr %r0,%r0 /* tv_nsec -> tv_usec */
- ml %r0,8(%r5)
- srlg %r0,%r0,6
- stg %r0,8(%r2) /* store tv->tv_usec */
-4: lghi %r2,0
- aghi %r15,16
- CFI_ADJUST_CFA_OFFSET -16
- CFI_RESTORE 15
- br %r14
- CFI_ENDPROC
-5: .quad 1000000000
- .long 274877907
- .size __kernel_gettimeofday,.-__kernel_gettimeofday
diff --git a/arch/s390/kernel/vdso64/vdso64_generic.c b/arch/s390/kernel/vdso64/vdso64_generic.c
new file mode 100644
index 000000000000..a8cef7e4d137
--- /dev/null
+++ b/arch/s390/kernel/vdso64/vdso64_generic.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../../../../lib/vdso/gettimeofday.c"
+
+int __s390_vdso_gettimeofday(struct __kernel_old_timeval *tv,
+ struct timezone *tz)
+{
+ return __cvdso_gettimeofday(tv, tz);
+}
+
+int __s390_vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
+{
+ return __cvdso_clock_gettime(clock, ts);
+}
+
+int __s390_vdso_clock_getres(clockid_t clock, struct __kernel_timespec *ts)
+{
+ return __cvdso_clock_getres(clock, ts);
+}
diff --git a/arch/s390/kernel/vdso64/vdso_user_wrapper.S b/arch/s390/kernel/vdso64/vdso_user_wrapper.S
new file mode 100644
index 000000000000..a775d7e52872
--- /dev/null
+++ b/arch/s390/kernel/vdso64/vdso_user_wrapper.S
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/vdso.h>
+#include <asm/unistd.h>
+#include <asm/asm-offsets.h>
+#include <asm/dwarf.h>
+#include <asm/ptrace.h>
+
+#define WRAPPER_FRAME_SIZE (STACK_FRAME_OVERHEAD+8)
+
+/*
+ * Older glibc version called vdso without allocating a stackframe. This wrapper
+ * is just used to allocate a stackframe. See
+ * https://sourceware.org/git/?p=glibc.git;a=commit;h=478593e6374f3818da39332260dc453cb19cfa1e
+ * for details.
+ */
+.macro vdso_func func
+ .globl __kernel_\func
+ .type __kernel_\func,@function
+ .align 8
+__kernel_\func:
+ CFI_STARTPROC
+ aghi %r15,-WRAPPER_FRAME_SIZE
+ CFI_DEF_CFA_OFFSET (STACK_FRAME_OVERHEAD + WRAPPER_FRAME_SIZE)
+ CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
+ stg %r14,STACK_FRAME_OVERHEAD(%r15)
+ brasl %r14,__s390_vdso_\func
+ lg %r14,STACK_FRAME_OVERHEAD(%r15)
+ aghi %r15,WRAPPER_FRAME_SIZE
+ CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
+ CFI_RESTORE 15
+ br %r14
+ CFI_ENDPROC
+ .size __kernel_\func,.-__kernel_\func
+.endm
+
+vdso_func gettimeofday
+vdso_func clock_getres
+vdso_func clock_gettime
diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c
index 0e30e6e43b0c..93b3209b94a2 100644
--- a/arch/s390/lib/string.c
+++ b/arch/s390/lib/string.c
@@ -333,7 +333,7 @@ EXPORT_SYMBOL(memchr);
* memcmp - Compare two areas of memory
* @s1: One area of memory
* @s2: Another area of memory
- * @count: The size of the area.
+ * @n: The size of the area.
*/
#ifdef __HAVE_ARCH_MEMCMP
int memcmp(const void *s1, const void *s2, size_t n)
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index 3175413186b9..cd67e94c16aa 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -8,7 +8,7 @@ obj-y += page-states.o pageattr.o pgtable.o pgalloc.o
obj-$(CONFIG_CMM) += cmm.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
-obj-$(CONFIG_S390_PTDUMP) += dump_pagetables.o
+obj-$(CONFIG_PTDUMP_CORE) += dump_pagetables.o
obj-$(CONFIG_PGSTE) += gmap.o
KASAN_SANITIZE_kasan_init.o := n
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index c2ac9b8ae612..8f9ff7e7187d 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -1,9 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
+#include <linux/set_memory.h>
+#include <linux/ptdump.h>
#include <linux/seq_file.h>
#include <linux/debugfs.h>
-#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/kasan.h>
+#include <asm/ptdump.h>
#include <asm/kasan.h>
#include <asm/sections.h>
@@ -15,264 +17,235 @@ struct addr_marker {
};
enum address_markers_idx {
- IDENTITY_NR = 0,
+ IDENTITY_BEFORE_NR = 0,
+ IDENTITY_BEFORE_END_NR,
KERNEL_START_NR,
KERNEL_END_NR,
+ IDENTITY_AFTER_NR,
+ IDENTITY_AFTER_END_NR,
#ifdef CONFIG_KASAN
KASAN_SHADOW_START_NR,
KASAN_SHADOW_END_NR,
#endif
VMEMMAP_NR,
+ VMEMMAP_END_NR,
VMALLOC_NR,
+ VMALLOC_END_NR,
MODULES_NR,
+ MODULES_END_NR,
};
static struct addr_marker address_markers[] = {
- [IDENTITY_NR] = {0, "Identity Mapping"},
+ [IDENTITY_BEFORE_NR] = {0, "Identity Mapping Start"},
+ [IDENTITY_BEFORE_END_NR] = {(unsigned long)_stext, "Identity Mapping End"},
[KERNEL_START_NR] = {(unsigned long)_stext, "Kernel Image Start"},
[KERNEL_END_NR] = {(unsigned long)_end, "Kernel Image End"},
+ [IDENTITY_AFTER_NR] = {(unsigned long)_end, "Identity Mapping Start"},
+ [IDENTITY_AFTER_END_NR] = {0, "Identity Mapping End"},
#ifdef CONFIG_KASAN
[KASAN_SHADOW_START_NR] = {KASAN_SHADOW_START, "Kasan Shadow Start"},
[KASAN_SHADOW_END_NR] = {KASAN_SHADOW_END, "Kasan Shadow End"},
#endif
- [VMEMMAP_NR] = {0, "vmemmap Area"},
- [VMALLOC_NR] = {0, "vmalloc Area"},
- [MODULES_NR] = {0, "Modules Area"},
+ [VMEMMAP_NR] = {0, "vmemmap Area Start"},
+ [VMEMMAP_END_NR] = {0, "vmemmap Area End"},
+ [VMALLOC_NR] = {0, "vmalloc Area Start"},
+ [VMALLOC_END_NR] = {0, "vmalloc Area End"},
+ [MODULES_NR] = {0, "Modules Area Start"},
+ [MODULES_END_NR] = {0, "Modules Area End"},
{ -1, NULL }
};
struct pg_state {
+ struct ptdump_state ptdump;
+ struct seq_file *seq;
int level;
unsigned int current_prot;
+ bool check_wx;
+ unsigned long wx_pages;
unsigned long start_address;
- unsigned long current_address;
const struct addr_marker *marker;
};
+#define pt_dump_seq_printf(m, fmt, args...) \
+({ \
+ struct seq_file *__m = (m); \
+ \
+ if (__m) \
+ seq_printf(__m, fmt, ##args); \
+})
+
+#define pt_dump_seq_puts(m, fmt) \
+({ \
+ struct seq_file *__m = (m); \
+ \
+ if (__m) \
+ seq_printf(__m, fmt); \
+})
+
static void print_prot(struct seq_file *m, unsigned int pr, int level)
{
static const char * const level_name[] =
{ "ASCE", "PGD", "PUD", "PMD", "PTE" };
- seq_printf(m, "%s ", level_name[level]);
+ pt_dump_seq_printf(m, "%s ", level_name[level]);
if (pr & _PAGE_INVALID) {
- seq_printf(m, "I\n");
+ pt_dump_seq_printf(m, "I\n");
return;
}
- seq_puts(m, (pr & _PAGE_PROTECT) ? "RO " : "RW ");
- seq_puts(m, (pr & _PAGE_NOEXEC) ? "NX\n" : "X\n");
+ pt_dump_seq_puts(m, (pr & _PAGE_PROTECT) ? "RO " : "RW ");
+ pt_dump_seq_puts(m, (pr & _PAGE_NOEXEC) ? "NX\n" : "X\n");
}
-static void note_page(struct seq_file *m, struct pg_state *st,
- unsigned int new_prot, int level)
+static void note_prot_wx(struct pg_state *st, unsigned long addr)
+{
+#ifdef CONFIG_DEBUG_WX
+ if (!st->check_wx)
+ return;
+ if (st->current_prot & _PAGE_INVALID)
+ return;
+ if (st->current_prot & _PAGE_PROTECT)
+ return;
+ if (st->current_prot & _PAGE_NOEXEC)
+ return;
+ /* The first lowcore page is currently still W+X. */
+ if (addr == PAGE_SIZE)
+ return;
+ WARN_ONCE(1, "s390/mm: Found insecure W+X mapping at address %pS\n",
+ (void *)st->start_address);
+ st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
+#endif /* CONFIG_DEBUG_WX */
+}
+
+static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, u64 val)
{
- static const char units[] = "KMGTPE";
int width = sizeof(unsigned long) * 2;
+ static const char units[] = "KMGTPE";
const char *unit = units;
- unsigned int prot, cur;
unsigned long delta;
+ struct pg_state *st;
+ struct seq_file *m;
+ unsigned int prot;
- /*
- * If we have a "break" in the series, we need to flush the state
- * that we have now. "break" is either changing perms, levels or
- * address space marker.
- */
- prot = new_prot;
- cur = st->current_prot;
-
- if (!st->level) {
- /* First entry */
- st->current_prot = new_prot;
+ st = container_of(pt_st, struct pg_state, ptdump);
+ m = st->seq;
+ prot = val & (_PAGE_PROTECT | _PAGE_NOEXEC);
+ if (level == 4 && (val & _PAGE_INVALID))
+ prot = _PAGE_INVALID;
+ /* For pmd_none() & friends val gets passed as zero. */
+ if (level != 4 && !val)
+ prot = _PAGE_INVALID;
+ /* Final flush from generic code. */
+ if (level == -1)
+ addr = max_addr;
+ if (st->level == -1) {
+ pt_dump_seq_printf(m, "---[ %s ]---\n", st->marker->name);
+ st->start_address = addr;
+ st->current_prot = prot;
st->level = level;
- st->marker = address_markers;
- seq_printf(m, "---[ %s ]---\n", st->marker->name);
- } else if (prot != cur || level != st->level ||
- st->current_address >= st->marker[1].start_address) {
- /* Print the actual finished series */
- seq_printf(m, "0x%0*lx-0x%0*lx ",
- width, st->start_address,
- width, st->current_address);
- delta = (st->current_address - st->start_address) >> 10;
+ } else if (prot != st->current_prot || level != st->level ||
+ addr >= st->marker[1].start_address) {
+ note_prot_wx(st, addr);
+ pt_dump_seq_printf(m, "0x%0*lx-0x%0*lx ",
+ width, st->start_address,
+ width, addr);
+ delta = (addr - st->start_address) >> 10;
while (!(delta & 0x3ff) && unit[1]) {
delta >>= 10;
unit++;
}
- seq_printf(m, "%9lu%c ", delta, *unit);
+ pt_dump_seq_printf(m, "%9lu%c ", delta, *unit);
print_prot(m, st->current_prot, st->level);
- while (st->current_address >= st->marker[1].start_address) {
+ while (addr >= st->marker[1].start_address) {
st->marker++;
- seq_printf(m, "---[ %s ]---\n", st->marker->name);
+ pt_dump_seq_printf(m, "---[ %s ]---\n", st->marker->name);
}
- st->start_address = st->current_address;
- st->current_prot = new_prot;
+ st->start_address = addr;
+ st->current_prot = prot;
st->level = level;
}
}
-#ifdef CONFIG_KASAN
-static void note_kasan_early_shadow_page(struct seq_file *m,
- struct pg_state *st)
-{
- unsigned int prot;
-
- prot = pte_val(*kasan_early_shadow_pte) &
- (_PAGE_PROTECT | _PAGE_INVALID | _PAGE_NOEXEC);
- note_page(m, st, prot, 4);
-}
-#endif
-
-/*
- * The actual page table walker functions. In order to keep the
- * implementation of print_prot() short, we only check and pass
- * _PAGE_INVALID and _PAGE_PROTECT flags to note_page() if a region,
- * segment or page table entry is invalid or read-only.
- * After all it's just a hint that the current level being walked
- * contains an invalid or read-only entry.
- */
-static void walk_pte_level(struct seq_file *m, struct pg_state *st,
- pmd_t *pmd, unsigned long addr)
-{
- unsigned int prot;
- pte_t *pte;
- int i;
-
- for (i = 0; i < PTRS_PER_PTE && addr < max_addr; i++) {
- st->current_address = addr;
- pte = pte_offset_kernel(pmd, addr);
- prot = pte_val(*pte) &
- (_PAGE_PROTECT | _PAGE_INVALID | _PAGE_NOEXEC);
- note_page(m, st, prot, 4);
- addr += PAGE_SIZE;
- }
-}
-
-static void walk_pmd_level(struct seq_file *m, struct pg_state *st,
- pud_t *pud, unsigned long addr)
-{
- unsigned int prot;
- pmd_t *pmd;
- int i;
-
-#ifdef CONFIG_KASAN
- if ((pud_val(*pud) & PAGE_MASK) == __pa(kasan_early_shadow_pmd)) {
- note_kasan_early_shadow_page(m, st);
- return;
- }
-#endif
-
- pmd = pmd_offset(pud, addr);
- for (i = 0; i < PTRS_PER_PMD && addr < max_addr; i++, pmd++) {
- st->current_address = addr;
- if (!pmd_none(*pmd)) {
- if (pmd_large(*pmd)) {
- prot = pmd_val(*pmd) &
- (_SEGMENT_ENTRY_PROTECT |
- _SEGMENT_ENTRY_NOEXEC);
- note_page(m, st, prot, 3);
- } else
- walk_pte_level(m, st, pmd, addr);
- } else
- note_page(m, st, _PAGE_INVALID, 3);
- addr += PMD_SIZE;
- }
-}
-
-static void walk_pud_level(struct seq_file *m, struct pg_state *st,
- p4d_t *p4d, unsigned long addr)
+#ifdef CONFIG_DEBUG_WX
+void ptdump_check_wx(void)
{
- unsigned int prot;
- pud_t *pud;
- int i;
+ struct pg_state st = {
+ .ptdump = {
+ .note_page = note_page,
+ .range = (struct ptdump_range[]) {
+ {.start = 0, .end = max_addr},
+ {.start = 0, .end = 0},
+ }
+ },
+ .seq = NULL,
+ .level = -1,
+ .current_prot = 0,
+ .check_wx = true,
+ .wx_pages = 0,
+ .start_address = 0,
+ .marker = (struct addr_marker[]) {
+ { .start_address = 0, .name = NULL},
+ { .start_address = -1, .name = NULL},
+ },
+ };
-#ifdef CONFIG_KASAN
- if ((p4d_val(*p4d) & PAGE_MASK) == __pa(kasan_early_shadow_pud)) {
- note_kasan_early_shadow_page(m, st);
+ if (!MACHINE_HAS_NX)
return;
- }
-#endif
-
- pud = pud_offset(p4d, addr);
- for (i = 0; i < PTRS_PER_PUD && addr < max_addr; i++, pud++) {
- st->current_address = addr;
- if (!pud_none(*pud))
- if (pud_large(*pud)) {
- prot = pud_val(*pud) &
- (_REGION_ENTRY_PROTECT |
- _REGION_ENTRY_NOEXEC);
- note_page(m, st, prot, 2);
- } else
- walk_pmd_level(m, st, pud, addr);
- else
- note_page(m, st, _PAGE_INVALID, 2);
- addr += PUD_SIZE;
- }
+ ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
+ if (st.wx_pages)
+ pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n", st.wx_pages);
+ else
+ pr_info("Checked W+X mappings: passed, no unexpected W+X pages found\n");
}
+#endif /* CONFIG_DEBUG_WX */
-static void walk_p4d_level(struct seq_file *m, struct pg_state *st,
- pgd_t *pgd, unsigned long addr)
+#ifdef CONFIG_PTDUMP_DEBUGFS
+static int ptdump_show(struct seq_file *m, void *v)
{
- p4d_t *p4d;
- int i;
-
-#ifdef CONFIG_KASAN
- if ((pgd_val(*pgd) & PAGE_MASK) == __pa(kasan_early_shadow_p4d)) {
- note_kasan_early_shadow_page(m, st);
- return;
- }
-#endif
+ struct pg_state st = {
+ .ptdump = {
+ .note_page = note_page,
+ .range = (struct ptdump_range[]) {
+ {.start = 0, .end = max_addr},
+ {.start = 0, .end = 0},
+ }
+ },
+ .seq = m,
+ .level = -1,
+ .current_prot = 0,
+ .check_wx = false,
+ .wx_pages = 0,
+ .start_address = 0,
+ .marker = address_markers,
+ };
- p4d = p4d_offset(pgd, addr);
- for (i = 0; i < PTRS_PER_P4D && addr < max_addr; i++, p4d++) {
- st->current_address = addr;
- if (!p4d_none(*p4d))
- walk_pud_level(m, st, p4d, addr);
- else
- note_page(m, st, _PAGE_INVALID, 2);
- addr += P4D_SIZE;
- }
+ get_online_mems();
+ mutex_lock(&cpa_mutex);
+ ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
+ mutex_unlock(&cpa_mutex);
+ put_online_mems();
+ return 0;
}
+DEFINE_SHOW_ATTRIBUTE(ptdump);
+#endif /* CONFIG_PTDUMP_DEBUGFS */
-static void walk_pgd_level(struct seq_file *m)
+/*
+ * Heapsort from lib/sort.c is not a stable sorting algorithm, do a simple
+ * insertion sort to preserve the original order of markers with the same
+ * start address.
+ */
+static void sort_address_markers(void)
{
- unsigned long addr = 0;
- struct pg_state st;
- pgd_t *pgd;
- int i;
+ struct addr_marker tmp;
+ int i, j;
- memset(&st, 0, sizeof(st));
- for (i = 0; i < PTRS_PER_PGD && addr < max_addr; i++) {
- st.current_address = addr;
- pgd = pgd_offset_k(addr);
- if (!pgd_none(*pgd))
- walk_p4d_level(m, &st, pgd, addr);
- else
- note_page(m, &st, _PAGE_INVALID, 1);
- addr += PGDIR_SIZE;
- cond_resched();
+ for (i = 1; i < ARRAY_SIZE(address_markers) - 1; i++) {
+ tmp = address_markers[i];
+ for (j = i - 1; j >= 0 && address_markers[j].start_address > tmp.start_address; j--)
+ address_markers[j + 1] = address_markers[j];
+ address_markers[j + 1] = tmp;
}
- /* Flush out the last page */
- st.current_address = max_addr;
- note_page(m, &st, 0, 0);
}
-static int ptdump_show(struct seq_file *m, void *v)
-{
- walk_pgd_level(m);
- return 0;
-}
-
-static int ptdump_open(struct inode *inode, struct file *filp)
-{
- return single_open(filp, ptdump_show, NULL);
-}
-
-static const struct file_operations ptdump_fops = {
- .open = ptdump_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int pt_dump_init(void)
{
/*
@@ -282,10 +255,17 @@ static int pt_dump_init(void)
*/
max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2;
max_addr = 1UL << (max_addr * 11 + 31);
+ address_markers[IDENTITY_AFTER_END_NR].start_address = memory_end;
address_markers[MODULES_NR].start_address = MODULES_VADDR;
+ address_markers[MODULES_END_NR].start_address = MODULES_END;
address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap;
+ address_markers[VMEMMAP_END_NR].start_address = (unsigned long)vmemmap + vmemmap_size;
address_markers[VMALLOC_NR].start_address = VMALLOC_START;
+ address_markers[VMALLOC_END_NR].start_address = VMALLOC_END;
+ sort_address_markers();
+#ifdef CONFIG_PTDUMP_DEBUGFS
debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops);
+#endif /* CONFIG_PTDUMP_DEBUGFS */
return 0;
}
device_initcall(pt_dump_init);
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 373542ca1113..cfb0017f33a7 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -2679,7 +2679,7 @@ static int __s390_reset_acc(pte_t *ptep, unsigned long addr,
pte_t pte = READ_ONCE(*ptep);
if (pte_present(pte))
- WARN_ON_ONCE(uv_convert_from_secure(pte_val(pte) & PAGE_MASK));
+ WARN_ON_ONCE(uv_destroy_page(pte_val(pte) & PAGE_MASK));
return 0;
}
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 0d282081dc1f..d3ddb4361361 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -34,6 +34,7 @@
#include <asm/processor.h>
#include <linux/uaccess.h>
#include <asm/pgalloc.h>
+#include <asm/ptdump.h>
#include <asm/dma.h>
#include <asm/lowcore.h>
#include <asm/tlb.h>
@@ -129,6 +130,7 @@ void mark_rodata_ro(void)
set_memory_ro((unsigned long)__start_ro_after_init, size >> PAGE_SHIFT);
pr_info("Write protected read-only-after-init data: %luk\n", size >> 10);
+ debug_checkwx();
}
int set_memory_encrypted(unsigned long addr, int numpages)
diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c
index 99dd1c63a065..5646b39c728a 100644
--- a/arch/s390/mm/kasan_init.c
+++ b/arch/s390/mm/kasan_init.c
@@ -11,7 +11,9 @@
#include <asm/facility.h>
#include <asm/sections.h>
#include <asm/setup.h>
+#include <asm/uv.h>
+unsigned long kasan_vmax;
static unsigned long segment_pos __initdata;
static unsigned long segment_low __initdata;
static unsigned long pgalloc_pos __initdata;
@@ -99,8 +101,12 @@ static void __init kasan_early_vmemmap_populate(unsigned long address,
pgt_prot_zero = pgprot_val(PAGE_KERNEL_RO);
if (!has_nx)
pgt_prot_zero &= ~_PAGE_NOEXEC;
- pgt_prot = pgprot_val(PAGE_KERNEL_EXEC);
- sgt_prot = pgprot_val(SEGMENT_KERNEL_EXEC);
+ pgt_prot = pgprot_val(PAGE_KERNEL);
+ sgt_prot = pgprot_val(SEGMENT_KERNEL);
+ if (!has_nx || mode == POPULATE_ONE2ONE) {
+ pgt_prot &= ~_PAGE_NOEXEC;
+ sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC;
+ }
while (address < end) {
pg_dir = pgd_offset_k(address);
@@ -252,14 +258,31 @@ static void __init kasan_early_detect_facilities(void)
}
}
+static bool __init has_uv_sec_stor_limit(void)
+{
+ /*
+ * keep these conditions in line with setup_uv()
+ */
+ if (!is_prot_virt_host())
+ return false;
+
+ if (is_prot_virt_guest())
+ return false;
+
+ if (!test_facility(158))
+ return false;
+
+ return !!uv_info.max_sec_stor_addr;
+}
+
void __init kasan_early_init(void)
{
unsigned long untracked_mem_end;
unsigned long shadow_alloc_size;
+ unsigned long vmax_unlimited;
unsigned long initrd_end;
unsigned long asce_type;
unsigned long memsize;
- unsigned long vmax;
unsigned long pgt_prot = pgprot_val(PAGE_KERNEL_RO);
pte_t pte_z;
pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY);
@@ -287,7 +310,9 @@ void __init kasan_early_init(void)
BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, P4D_SIZE));
crst_table_init((unsigned long *)early_pg_dir,
_REGION2_ENTRY_EMPTY);
- untracked_mem_end = vmax = _REGION1_SIZE;
+ untracked_mem_end = kasan_vmax = vmax_unlimited = _REGION1_SIZE;
+ if (has_uv_sec_stor_limit())
+ kasan_vmax = min(vmax_unlimited, uv_info.max_sec_stor_addr);
asce_type = _ASCE_TYPE_REGION2;
} else {
/* 3 level paging */
@@ -295,7 +320,7 @@ void __init kasan_early_init(void)
BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PUD_SIZE));
crst_table_init((unsigned long *)early_pg_dir,
_REGION3_ENTRY_EMPTY);
- untracked_mem_end = vmax = _REGION2_SIZE;
+ untracked_mem_end = kasan_vmax = vmax_unlimited = _REGION2_SIZE;
asce_type = _ASCE_TYPE_REGION3;
}
@@ -365,17 +390,20 @@ void __init kasan_early_init(void)
/* populate kasan shadow (for identity mapping and zero page mapping) */
kasan_early_vmemmap_populate(__sha(0), __sha(memsize), POPULATE_MAP);
if (IS_ENABLED(CONFIG_MODULES))
- untracked_mem_end = vmax - MODULES_LEN;
+ untracked_mem_end = kasan_vmax - MODULES_LEN;
if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
- untracked_mem_end = vmax - vmalloc_size - MODULES_LEN;
+ untracked_mem_end = kasan_vmax - vmalloc_size - MODULES_LEN;
/* shallowly populate kasan shadow for vmalloc and modules */
kasan_early_vmemmap_populate(__sha(untracked_mem_end),
- __sha(vmax), POPULATE_SHALLOW);
+ __sha(kasan_vmax), POPULATE_SHALLOW);
}
/* populate kasan shadow for untracked memory */
kasan_early_vmemmap_populate(__sha(max_physmem_end),
__sha(untracked_mem_end),
POPULATE_ZERO_SHADOW);
+ kasan_early_vmemmap_populate(__sha(kasan_vmax),
+ __sha(vmax_unlimited),
+ POPULATE_ZERO_SHADOW);
/* memory allocated for identity mapping structs will be freed later */
pgalloc_freeable = pgalloc_pos;
/* populate identity mapping */
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index c5c52ec2b46f..ed8e5b3575d5 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -278,7 +278,7 @@ static int walk_p4d_level(pgd_t *pgd, unsigned long addr, unsigned long end,
return rc;
}
-static DEFINE_MUTEX(cpa_mutex);
+DEFINE_MUTEX(cpa_mutex);
static int change_page_attr(unsigned long addr, unsigned long end,
unsigned long flags)
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 0d25f743b270..18205f851c24 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -24,6 +24,26 @@
#include <asm/mmu_context.h>
#include <asm/page-states.h>
+pgprot_t pgprot_writecombine(pgprot_t prot)
+{
+ /*
+ * mio_wb_bit_mask may be set on a different CPU, but it is only set
+ * once at init and only read afterwards.
+ */
+ return __pgprot(pgprot_val(prot) | mio_wb_bit_mask);
+}
+EXPORT_SYMBOL_GPL(pgprot_writecombine);
+
+pgprot_t pgprot_writethrough(pgprot_t prot)
+{
+ /*
+ * mio_wb_bit_mask may be set on a different CPU, but it is only set
+ * once at init and only read afterwards.
+ */
+ return __pgprot(pgprot_val(prot) & ~mio_wb_bit_mask);
+}
+EXPORT_SYMBOL_GPL(pgprot_writethrough);
+
static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, int nodat)
{
diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
index b4e3c84772a1..bf557a1b789c 100644
--- a/arch/s390/pci/Makefile
+++ b/arch/s390/pci/Makefile
@@ -6,3 +6,4 @@
obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_dma.o pci_clp.o pci_sysfs.o \
pci_event.o pci_debug.o pci_insn.o pci_mmio.o \
pci_bus.o
+obj-$(CONFIG_PCI_IOV) += pci_iov.o
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 1804230dd8d8..570016ae8bcd 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -37,6 +37,7 @@
#include <asm/pci_dma.h>
#include "pci_bus.h"
+#include "pci_iov.h"
/* list of all detected zpci devices */
static LIST_HEAD(zpci_list);
@@ -226,7 +227,7 @@ void __iowrite64_copy(void __iomem *to, const void *from, size_t count)
zpci_memcpy_toio(to, from, count);
}
-void __iomem *ioremap(phys_addr_t addr, size_t size)
+static void __iomem *__ioremap(phys_addr_t addr, size_t size, pgprot_t prot)
{
unsigned long offset, vaddr;
struct vm_struct *area;
@@ -247,14 +248,37 @@ void __iomem *ioremap(phys_addr_t addr, size_t size)
return NULL;
vaddr = (unsigned long) area->addr;
- if (ioremap_page_range(vaddr, vaddr + size, addr, PAGE_KERNEL)) {
+ if (ioremap_page_range(vaddr, vaddr + size, addr, prot)) {
free_vm_area(area);
return NULL;
}
return (void __iomem *) ((unsigned long) area->addr + offset);
}
+
+void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot)
+{
+ return __ioremap(addr, size, __pgprot(prot));
+}
+EXPORT_SYMBOL(ioremap_prot);
+
+void __iomem *ioremap(phys_addr_t addr, size_t size)
+{
+ return __ioremap(addr, size, PAGE_KERNEL);
+}
EXPORT_SYMBOL(ioremap);
+void __iomem *ioremap_wc(phys_addr_t addr, size_t size)
+{
+ return __ioremap(addr, size, pgprot_writecombine(PAGE_KERNEL));
+}
+EXPORT_SYMBOL(ioremap_wc);
+
+void __iomem *ioremap_wt(phys_addr_t addr, size_t size)
+{
+ return __ioremap(addr, size, pgprot_writethrough(PAGE_KERNEL));
+}
+EXPORT_SYMBOL(ioremap_wt);
+
void iounmap(volatile void __iomem *addr)
{
if (static_branch_likely(&have_mio))
@@ -390,15 +414,6 @@ static struct pci_ops pci_root_ops = {
.write = pci_write,
};
-#ifdef CONFIG_PCI_IOV
-static struct resource iov_res = {
- .name = "PCI IOV res",
- .start = 0,
- .end = -1,
- .flags = IORESOURCE_MEM,
-};
-#endif
-
static void zpci_map_resources(struct pci_dev *pdev)
{
struct zpci_dev *zdev = to_zpci(pdev);
@@ -419,16 +434,7 @@ static void zpci_map_resources(struct pci_dev *pdev)
pdev->resource[i].end = pdev->resource[i].start + len - 1;
}
-#ifdef CONFIG_PCI_IOV
- for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
- int bar = i + PCI_IOV_RESOURCES;
-
- len = pci_resource_len(pdev, bar);
- if (!len)
- continue;
- pdev->resource[bar].parent = &iov_res;
- }
-#endif
+ zpci_iov_map_resources(pdev);
}
static void zpci_unmap_resources(struct pci_dev *pdev)
@@ -684,7 +690,7 @@ void zpci_remove_device(struct zpci_dev *zdev)
pdev = pci_get_slot(zbus->bus, zdev->devfn);
if (pdev) {
if (pdev->is_virtfn)
- return zpci_remove_virtfn(pdev, zdev->vfn);
+ return zpci_iov_remove_virtfn(pdev, zdev->vfn);
pci_stop_and_remove_bus_device_locked(pdev);
}
}
@@ -788,6 +794,9 @@ static int zpci_mem_init(void)
if (!zpci_iomap_bitmap)
goto error_iomap_bitmap;
+ if (static_branch_likely(&have_mio))
+ clp_setup_writeback_mio();
+
return 0;
error_iomap_bitmap:
kfree(zpci_iomap_start);
@@ -885,9 +894,3 @@ out:
return rc;
}
subsys_initcall_sync(pci_base_init);
-
-void zpci_rescan(void)
-{
- if (zpci_is_enabled())
- clp_rescan_pci_devices_simple(NULL);
-}
diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c
index 5967f3014156..0c0db7c3a404 100644
--- a/arch/s390/pci/pci_bus.c
+++ b/arch/s390/pci/pci_bus.c
@@ -24,6 +24,7 @@
#include <asm/pci_dma.h>
#include "pci_bus.h"
+#include "pci_iov.h"
static LIST_HEAD(zbus_list);
static DEFINE_SPINLOCK(zbus_list_lock);
@@ -126,69 +127,6 @@ static struct zpci_bus *zpci_bus_alloc(int pchid)
return zbus;
}
-#ifdef CONFIG_PCI_IOV
-static int zpci_bus_link_virtfn(struct pci_dev *pdev,
- struct pci_dev *virtfn, int vfid)
-{
- int rc;
-
- rc = pci_iov_sysfs_link(pdev, virtfn, vfid);
- if (rc)
- return rc;
-
- virtfn->is_virtfn = 1;
- virtfn->multifunction = 0;
- virtfn->physfn = pci_dev_get(pdev);
-
- return 0;
-}
-
-static int zpci_bus_setup_virtfn(struct zpci_bus *zbus,
- struct pci_dev *virtfn, int vfn)
-{
- int i, cand_devfn;
- struct zpci_dev *zdev;
- struct pci_dev *pdev;
- int vfid = vfn - 1; /* Linux' vfid's start at 0 vfn at 1*/
- int rc = 0;
-
- if (!zbus->multifunction)
- return 0;
-
- /* If the parent PF for the given VF is also configured in the
- * instance, it must be on the same zbus.
- * We can then identify the parent PF by checking what
- * devfn the VF would have if it belonged to that PF using the PF's
- * stride and offset. Only if this candidate devfn matches the
- * actual devfn will we link both functions.
- */
- for (i = 0; i < ZPCI_FUNCTIONS_PER_BUS; i++) {
- zdev = zbus->function[i];
- if (zdev && zdev->is_physfn) {
- pdev = pci_get_slot(zbus->bus, zdev->devfn);
- if (!pdev)
- continue;
- cand_devfn = pci_iov_virtfn_devfn(pdev, vfid);
- if (cand_devfn == virtfn->devfn) {
- rc = zpci_bus_link_virtfn(pdev, virtfn, vfid);
- /* balance pci_get_slot() */
- pci_dev_put(pdev);
- break;
- }
- /* balance pci_get_slot() */
- pci_dev_put(pdev);
- }
- }
- return rc;
-}
-#else
-static inline int zpci_bus_setup_virtfn(struct zpci_bus *zbus,
- struct pci_dev *virtfn, int vfn)
-{
- return 0;
-}
-#endif
-
void pcibios_bus_add_device(struct pci_dev *pdev)
{
struct zpci_dev *zdev = to_zpci(pdev);
@@ -198,7 +136,7 @@ void pcibios_bus_add_device(struct pci_dev *pdev)
* perform PF/VF linking.
*/
if (zdev->vfn)
- zpci_bus_setup_virtfn(zdev->zbus, pdev, zdev->vfn);
+ zpci_iov_setup_virtfn(zdev->zbus, pdev, zdev->vfn);
}
diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h
index 4972433df458..f8dfac0b5b71 100644
--- a/arch/s390/pci/pci_bus.h
+++ b/arch/s390/pci/pci_bus.h
@@ -9,7 +9,6 @@
int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops);
void zpci_bus_device_unregister(struct zpci_dev *zdev);
-int zpci_bus_init(void);
void zpci_release_device(struct kref *kref);
static inline void zpci_zdev_put(struct zpci_dev *zdev)
@@ -30,15 +29,3 @@ static inline struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus,
return (devfn >= ZPCI_FUNCTIONS_PER_BUS) ? NULL : zbus->function[devfn];
}
-#ifdef CONFIG_PCI_IOV
-static inline void zpci_remove_virtfn(struct pci_dev *pdev, int vfn)
-{
-
- pci_lock_rescan_remove();
- /* Linux' vfid's start at 0 vfn at 1 */
- pci_iov_remove_virtfn(pdev->physfn, vfn - 1);
- pci_unlock_rescan_remove();
-}
-#else /* CONFIG_PCI_IOV */
-static inline void zpci_remove_virtfn(struct pci_dev *pdev, int vfn) {}
-#endif /* CONFIG_PCI_IOV */
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index 7e735f41a0a6..5a34a1359dc5 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -244,6 +244,7 @@ error:
return rc;
}
+static int clp_refresh_fh(u32 fid);
/*
* Enable/Disable a given PCI function and update its function handle if
* necessary
@@ -286,7 +287,41 @@ static int clp_set_pci_fn(struct zpci_dev *zdev, u8 nr_dma_as, u8 command)
} else if (!rc && rrb->response.hdr.rsp == CLP_RC_SETPCIFN_ALRDY &&
rrb->response.fh == 0) {
/* Function is already in desired state - update handle */
- rc = clp_rescan_pci_devices_simple(&fid);
+ rc = clp_refresh_fh(fid);
+ }
+ clp_free_block(rrb);
+ return rc;
+}
+
+int clp_setup_writeback_mio(void)
+{
+ struct clp_req_rsp_slpc_pci *rrb;
+ u8 wb_bit_pos;
+ int rc;
+
+ rrb = clp_alloc_block(GFP_KERNEL);
+ if (!rrb)
+ return -ENOMEM;
+
+ memset(rrb, 0, sizeof(*rrb));
+ rrb->request.hdr.len = sizeof(rrb->request);
+ rrb->request.hdr.cmd = CLP_SLPC;
+ rrb->response.hdr.len = sizeof(rrb->response);
+
+ rc = clp_req(rrb, CLP_LPS_PCI);
+ if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) {
+ if (rrb->response.vwb) {
+ wb_bit_pos = rrb->response.mio_wb;
+ set_bit_inv(wb_bit_pos, &mio_wb_bit_mask);
+ zpci_dbg(3, "wb bit: %d\n", wb_bit_pos);
+ } else {
+ zpci_dbg(3, "wb bit: n.a.\n");
+ }
+
+ } else {
+ zpci_err("SLPC PCI:\n");
+ zpci_err_clp(rrb->response.hdr.rsp, rc);
+ rc = -EIO;
}
clp_free_block(rrb);
return rc;
@@ -374,24 +409,6 @@ static void __clp_add(struct clp_fh_list_entry *entry, void *data)
clp_add_pci_device(entry->fid, entry->fh, entry->config_state);
}
-static void __clp_update(struct clp_fh_list_entry *entry, void *data)
-{
- struct zpci_dev *zdev;
- u32 *fid = data;
-
- if (!entry->vendor_id)
- return;
-
- if (fid && *fid != entry->fid)
- return;
-
- zdev = get_zdev_by_fid(entry->fid);
- if (!zdev)
- return;
-
- zdev->fh = entry->fh;
-}
-
int clp_scan_pci_devices(void)
{
struct clp_req_rsp_list_pci *rrb;
@@ -407,27 +424,25 @@ int clp_scan_pci_devices(void)
return rc;
}
-int clp_rescan_pci_devices(void)
+static void __clp_refresh_fh(struct clp_fh_list_entry *entry, void *data)
{
- struct clp_req_rsp_list_pci *rrb;
- int rc;
-
- zpci_remove_reserved_devices();
+ struct zpci_dev *zdev;
+ u32 fid = *((u32 *)data);
- rrb = clp_alloc_block(GFP_KERNEL);
- if (!rrb)
- return -ENOMEM;
+ if (!entry->vendor_id || fid != entry->fid)
+ return;
- rc = clp_list_pci(rrb, NULL, __clp_add);
+ zdev = get_zdev_by_fid(fid);
+ if (!zdev)
+ return;
- clp_free_block(rrb);
- return rc;
+ zdev->fh = entry->fh;
}
-/* Rescan PCI functions and refresh function handles. If fid is non-NULL only
- * refresh the handle of the function matching @fid
+/*
+ * Refresh the function handle of the function matching @fid
*/
-int clp_rescan_pci_devices_simple(u32 *fid)
+static int clp_refresh_fh(u32 fid)
{
struct clp_req_rsp_list_pci *rrb;
int rc;
@@ -436,7 +451,7 @@ int clp_rescan_pci_devices_simple(u32 *fid)
if (!rrb)
return -ENOMEM;
- rc = clp_list_pci(rrb, fid, __clp_update);
+ rc = clp_list_pci(rrb, &fid, __clp_refresh_fh);
clp_free_block(rrb);
return rc;
@@ -495,7 +510,7 @@ static int clp_base_command(struct clp_req *req, struct clp_req_hdr *lpcb)
}
}
-static int clp_pci_slpc(struct clp_req *req, struct clp_req_rsp_slpc *lpcb)
+static int clp_pci_slpc(struct clp_req *req, struct clp_req_rsp_slpc_pci *lpcb)
{
unsigned long limit = PAGE_SIZE - sizeof(lpcb->request);
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index d9ae7456dd4c..d33f21545dfd 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -152,7 +152,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
}
break;
case 0x0306: /* 0x308 or 0x302 for multiple devices */
- clp_rescan_pci_devices();
+ zpci_remove_reserved_devices();
+ clp_scan_pci_devices();
break;
case 0x0308: /* Standby -> Reserved */
if (!zdev)
diff --git a/arch/s390/pci/pci_iov.c b/arch/s390/pci/pci_iov.c
new file mode 100644
index 000000000000..ead062bf2b41
--- /dev/null
+++ b/arch/s390/pci/pci_iov.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2020
+ *
+ * Author(s):
+ * Niklas Schnelle <schnelle@linux.ibm.com>
+ *
+ */
+
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+
+#include "pci_iov.h"
+
+static struct resource iov_res = {
+ .name = "PCI IOV res",
+ .start = 0,
+ .end = -1,
+ .flags = IORESOURCE_MEM,
+};
+
+void zpci_iov_map_resources(struct pci_dev *pdev)
+{
+ resource_size_t len;
+ int i;
+
+ for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+ int bar = i + PCI_IOV_RESOURCES;
+
+ len = pci_resource_len(pdev, bar);
+ if (!len)
+ continue;
+ pdev->resource[bar].parent = &iov_res;
+ }
+}
+
+void zpci_iov_remove_virtfn(struct pci_dev *pdev, int vfn)
+{
+ pci_lock_rescan_remove();
+ /* Linux' vfid's start at 0 vfn at 1 */
+ pci_iov_remove_virtfn(pdev->physfn, vfn - 1);
+ pci_unlock_rescan_remove();
+}
+
+static int zpci_iov_link_virtfn(struct pci_dev *pdev, struct pci_dev *virtfn, int vfid)
+{
+ int rc;
+
+ rc = pci_iov_sysfs_link(pdev, virtfn, vfid);
+ if (rc)
+ return rc;
+
+ virtfn->is_virtfn = 1;
+ virtfn->multifunction = 0;
+ virtfn->physfn = pci_dev_get(pdev);
+
+ return 0;
+}
+
+int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn)
+{
+ int i, cand_devfn;
+ struct zpci_dev *zdev;
+ struct pci_dev *pdev;
+ int vfid = vfn - 1; /* Linux' vfid's start at 0 vfn at 1*/
+ int rc = 0;
+
+ if (!zbus->multifunction)
+ return 0;
+
+ /* If the parent PF for the given VF is also configured in the
+ * instance, it must be on the same zbus.
+ * We can then identify the parent PF by checking what
+ * devfn the VF would have if it belonged to that PF using the PF's
+ * stride and offset. Only if this candidate devfn matches the
+ * actual devfn will we link both functions.
+ */
+ for (i = 0; i < ZPCI_FUNCTIONS_PER_BUS; i++) {
+ zdev = zbus->function[i];
+ if (zdev && zdev->is_physfn) {
+ pdev = pci_get_slot(zbus->bus, zdev->devfn);
+ if (!pdev)
+ continue;
+ cand_devfn = pci_iov_virtfn_devfn(pdev, vfid);
+ if (cand_devfn == virtfn->devfn) {
+ rc = zpci_iov_link_virtfn(pdev, virtfn, vfid);
+ /* balance pci_get_slot() */
+ pci_dev_put(pdev);
+ break;
+ }
+ /* balance pci_get_slot() */
+ pci_dev_put(pdev);
+ }
+ }
+ return rc;
+}
diff --git a/arch/s390/pci/pci_iov.h b/arch/s390/pci/pci_iov.h
new file mode 100644
index 000000000000..b2c828003bad
--- /dev/null
+++ b/arch/s390/pci/pci_iov.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2020
+ *
+ * Author(s):
+ * Niklas Schnelle <schnelle@linux.ibm.com>
+ *
+ */
+
+#ifndef __S390_PCI_IOV_H
+#define __S390_PCI_IOV_H
+
+#ifdef CONFIG_PCI_IOV
+void zpci_iov_remove_virtfn(struct pci_dev *pdev, int vfn);
+
+void zpci_iov_map_resources(struct pci_dev *pdev);
+
+int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn);
+
+#else /* CONFIG_PCI_IOV */
+static inline void zpci_iov_remove_virtfn(struct pci_dev *pdev, int vfn) {}
+
+static inline void zpci_iov_map_resources(struct pci_dev *pdev) {}
+
+static inline int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn)
+{
+ return 0;
+}
+#endif /* CONFIG_PCI_IOV */
+#endif /* __S390_PCI_IOV_h */
diff --git a/arch/s390/scripts/Makefile.chkbss b/arch/s390/scripts/Makefile.chkbss
deleted file mode 100644
index f4f4c2c6dee9..000000000000
--- a/arch/s390/scripts/Makefile.chkbss
+++ /dev/null
@@ -1,20 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-
-chkbss-target ?= built-in.a
-$(obj)/$(chkbss-target): chkbss
-
-chkbss-files := $(addsuffix .chkbss, $(chkbss))
-clean-files += $(chkbss-files)
-
-PHONY += chkbss
-chkbss: $(addprefix $(obj)/, $(chkbss-files))
-
-quiet_cmd_chkbss = CHKBSS $<
- cmd_chkbss = \
- if ! $(OBJSIZE) --common $< | $(AWK) 'END { if ($$3) exit 1 }'; then \
- echo "error: $< .bss section is not empty" >&2; exit 1; \
- fi; \
- touch $@;
-
-$(obj)/%.o.chkbss: $(obj)/%.o
- $(call cmd,chkbss)
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index ae0a00beea5f..783738448ff5 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -442,3 +442,4 @@
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
+440 common process_madvise sys_process_madvise
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index e286e2badc8a..e38d8bf454e8 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1039,38 +1039,9 @@ void smp_fetch_global_pmu(void)
* are flush_tlb_*() routines, and these run after flush_cache_*()
* which performs the flushw.
*
- * The SMP TLB coherency scheme we use works as follows:
- *
- * 1) mm->cpu_vm_mask is a bit mask of which cpus an address
- * space has (potentially) executed on, this is the heuristic
- * we use to avoid doing cross calls.
- *
- * Also, for flushing from kswapd and also for clones, we
- * use cpu_vm_mask as the list of cpus to make run the TLB.
- *
- * 2) TLB context numbers are shared globally across all processors
- * in the system, this allows us to play several games to avoid
- * cross calls.
- *
- * One invariant is that when a cpu switches to a process, and
- * that processes tsk->active_mm->cpu_vm_mask does not have the
- * current cpu's bit set, that tlb context is flushed locally.
- *
- * If the address space is non-shared (ie. mm->count == 1) we avoid
- * cross calls when we want to flush the currently running process's
- * tlb state. This is done by clearing all cpu bits except the current
- * processor's in current->mm->cpu_vm_mask and performing the
- * flush locally only. This will force any subsequent cpus which run
- * this task to flush the context from the local tlb if the process
- * migrates to another cpu (again).
- *
- * 3) For shared address spaces (threads) and swapping we bite the
- * bullet for most cases and perform the cross call (but only to
- * the cpus listed in cpu_vm_mask).
- *
- * The performance gain from "optimizing" away the cross call for threads is
- * questionable (in theory the big win for threads is the massive sharing of
- * address space state across processors).
+ * mm->cpu_vm_mask is a bit mask of which cpus an address
+ * space has (potentially) executed on, this is the heuristic
+ * we use to limit cross calls.
*/
/* This currently is only used by the hugetlb arch pre-fault
@@ -1080,18 +1051,13 @@ void smp_fetch_global_pmu(void)
void smp_flush_tlb_mm(struct mm_struct *mm)
{
u32 ctx = CTX_HWBITS(mm->context);
- int cpu = get_cpu();
- if (atomic_read(&mm->mm_users) == 1) {
- cpumask_copy(mm_cpumask(mm), cpumask_of(cpu));
- goto local_flush_and_out;
- }
+ get_cpu();
smp_cross_call_masked(&xcall_flush_tlb_mm,
ctx, 0, 0,
mm_cpumask(mm));
-local_flush_and_out:
__flush_tlb_mm(ctx, SECONDARY_CONTEXT);
put_cpu();
@@ -1114,17 +1080,15 @@ void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long
{
u32 ctx = CTX_HWBITS(mm->context);
struct tlb_pending_info info;
- int cpu = get_cpu();
+
+ get_cpu();
info.ctx = ctx;
info.nr = nr;
info.vaddrs = vaddrs;
- if (mm == current->mm && atomic_read(&mm->mm_users) == 1)
- cpumask_copy(mm_cpumask(mm), cpumask_of(cpu));
- else
- smp_call_function_many(mm_cpumask(mm), tlb_pending_func,
- &info, 1);
+ smp_call_function_many(mm_cpumask(mm), tlb_pending_func,
+ &info, 1);
__flush_tlb_pending(ctx, nr, vaddrs);
@@ -1134,14 +1098,13 @@ void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long
void smp_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr)
{
unsigned long context = CTX_HWBITS(mm->context);
- int cpu = get_cpu();
- if (mm == current->mm && atomic_read(&mm->mm_users) == 1)
- cpumask_copy(mm_cpumask(mm), cpumask_of(cpu));
- else
- smp_cross_call_masked(&xcall_flush_tlb_page,
- context, vaddr, 0,
- mm_cpumask(mm));
+ get_cpu();
+
+ smp_cross_call_masked(&xcall_flush_tlb_page,
+ context, vaddr, 0,
+ mm_cpumask(mm));
+
__flush_tlb_page(context, vaddr);
put_cpu();
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index 37ec52b34c73..78160260991b 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -485,3 +485,4 @@
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
+440 common process_madvise sys_process_madvise
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index d49f471b02e3..16187211d059 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -62,12 +62,12 @@ config NR_CPUS
source "arch/$(HEADER_ARCH)/um/Kconfig"
-config FORBID_STATIC_LINK
- bool
+config MAY_HAVE_RUNTIME_DEPS
+ bool
config STATIC_LINK
bool "Force a static link"
- depends on !FORBID_STATIC_LINK
+ depends on CC_CAN_LINK_STATIC_NO_RUNTIME_DEPS || !MAY_HAVE_RUNTIME_DEPS
help
This option gives you the ability to force a static link of UML.
Normally, UML is linked as a shared binary. This is inconvenient for
diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig
index 9160ead56e33..2e7b8e0e7194 100644
--- a/arch/um/drivers/Kconfig
+++ b/arch/um/drivers/Kconfig
@@ -234,7 +234,7 @@ config UML_NET_DAEMON
config UML_NET_VECTOR
bool "Vector I/O high performance network devices"
depends on UML_NET
- select FORBID_STATIC_LINK
+ select MAY_HAVE_RUNTIME_DEPS
help
This User-Mode Linux network driver uses multi-message send
and receive functions. The host running the UML guest must have
@@ -246,7 +246,7 @@ config UML_NET_VECTOR
config UML_NET_VDE
bool "VDE transport (obsolete)"
depends on UML_NET
- select FORBID_STATIC_LINK
+ select MAY_HAVE_RUNTIME_DEPS
help
This User-Mode Linux network transport allows one or more running
UMLs on a single host to communicate with each other and also
@@ -294,7 +294,7 @@ config UML_NET_MCAST
config UML_NET_PCAP
bool "pcap transport (obsolete)"
depends on UML_NET
- select FORBID_STATIC_LINK
+ select MAY_HAVE_RUNTIME_DEPS
help
The pcap transport makes a pcap packet stream on the host look
like an ethernet device inside UML. This is useful for making
diff --git a/arch/um/drivers/daemon_user.c b/arch/um/drivers/daemon_user.c
index 3695821d06a2..785baedc3555 100644
--- a/arch/um/drivers/daemon_user.c
+++ b/arch/um/drivers/daemon_user.c
@@ -7,6 +7,7 @@
*/
#include <stdint.h>
+#include <string.h>
#include <unistd.h>
#include <errno.h>
#include <sys/types.h>
diff --git a/arch/um/drivers/pcap_user.c b/arch/um/drivers/pcap_user.c
index bbd20638788a..52ddda3e3b10 100644
--- a/arch/um/drivers/pcap_user.c
+++ b/arch/um/drivers/pcap_user.c
@@ -32,7 +32,7 @@ static int pcap_user_init(void *data, void *dev)
return 0;
}
-static int pcap_open(void *data)
+static int pcap_user_open(void *data)
{
struct pcap_data *pri = data;
__u32 netmask;
@@ -44,14 +44,14 @@ static int pcap_open(void *data)
if (pri->filter != NULL) {
err = dev_netmask(pri->dev, &netmask);
if (err < 0) {
- printk(UM_KERN_ERR "pcap_open : dev_netmask failed\n");
+ printk(UM_KERN_ERR "pcap_user_open : dev_netmask failed\n");
return -EIO;
}
pri->compiled = uml_kmalloc(sizeof(struct bpf_program),
UM_GFP_KERNEL);
if (pri->compiled == NULL) {
- printk(UM_KERN_ERR "pcap_open : kmalloc failed\n");
+ printk(UM_KERN_ERR "pcap_user_open : kmalloc failed\n");
return -ENOMEM;
}
@@ -59,14 +59,14 @@ static int pcap_open(void *data)
(struct bpf_program *) pri->compiled,
pri->filter, pri->optimize, netmask);
if (err < 0) {
- printk(UM_KERN_ERR "pcap_open : pcap_compile failed - "
+ printk(UM_KERN_ERR "pcap_user_open : pcap_compile failed - "
"'%s'\n", pcap_geterr(pri->pcap));
goto out;
}
err = pcap_setfilter(pri->pcap, pri->compiled);
if (err < 0) {
- printk(UM_KERN_ERR "pcap_open : pcap_setfilter "
+ printk(UM_KERN_ERR "pcap_user_open : pcap_setfilter "
"failed - '%s'\n", pcap_geterr(pri->pcap));
goto out;
}
@@ -127,7 +127,7 @@ int pcap_user_read(int fd, void *buffer, int len, struct pcap_data *pri)
const struct net_user_info pcap_user_info = {
.init = pcap_user_init,
- .open = pcap_open,
+ .open = pcap_user_open,
.close = NULL,
.remove = pcap_remove,
.add_address = NULL,
diff --git a/arch/um/drivers/slip_user.c b/arch/um/drivers/slip_user.c
index 8016d32b6809..482a19c5105c 100644
--- a/arch/um/drivers/slip_user.c
+++ b/arch/um/drivers/slip_user.c
@@ -9,7 +9,7 @@
#include <errno.h>
#include <fcntl.h>
#include <string.h>
-#include <sys/termios.h>
+#include <termios.h>
#include <sys/wait.h>
#include <net_user.h>
#include <os.h>
diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
index 8735c468230a..555203e3e7b4 100644
--- a/arch/um/drivers/vector_kern.c
+++ b/arch/um/drivers/vector_kern.c
@@ -1403,7 +1403,7 @@ static int vector_net_load_bpf_flash(struct net_device *dev,
kfree(vp->bpf->filter);
vp->bpf->filter = NULL;
} else {
- vp->bpf = kmalloc(sizeof(struct sock_fprog), GFP_KERNEL);
+ vp->bpf = kmalloc(sizeof(struct sock_fprog), GFP_ATOMIC);
if (vp->bpf == NULL) {
netdev_err(dev, "failed to allocate memory for firmware\n");
goto flash_fail;
@@ -1415,7 +1415,7 @@ static int vector_net_load_bpf_flash(struct net_device *dev,
if (request_firmware(&fw, efl->data, &vdevice->pdev.dev))
goto flash_fail;
- vp->bpf->filter = kmemdup(fw->data, fw->size, GFP_KERNEL);
+ vp->bpf->filter = kmemdup(fw->data, fw->size, GFP_ATOMIC);
if (!vp->bpf->filter)
goto free_buffer;
diff --git a/arch/um/drivers/vector_user.c b/arch/um/drivers/vector_user.c
index c4a0f26b2824..bae53220ce26 100644
--- a/arch/um/drivers/vector_user.c
+++ b/arch/um/drivers/vector_user.c
@@ -18,9 +18,7 @@
#include <fcntl.h>
#include <sys/socket.h>
#include <sys/un.h>
-#include <net/ethernet.h>
#include <netinet/ip.h>
-#include <netinet/ether.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <sys/wait.h>
@@ -39,6 +37,7 @@
#define ID_MAX 2
#define TOKEN_IFNAME "ifname"
+#define TOKEN_SCRIPT "ifup"
#define TRANS_RAW "raw"
#define TRANS_RAW_LEN strlen(TRANS_RAW)
@@ -55,6 +54,9 @@
#define MAX_UN_LEN 107
+static const char padchar[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+static const char *template = "tapXXXXXX";
+
/* This is very ugly and brute force lookup, but it is done
* only once at initialization so not worth doing hashes or
* anything more intelligent
@@ -191,16 +193,21 @@ raw_fd_cleanup:
return err;
}
+
static struct vector_fds *user_init_tap_fds(struct arglist *ifspec)
{
- int fd = -1;
+ int fd = -1, i;
char *iface;
struct vector_fds *result = NULL;
+ bool dynamic = false;
+ char dynamic_ifname[IFNAMSIZ];
+ char *argv[] = {NULL, NULL, NULL, NULL};
iface = uml_vector_fetch_arg(ifspec, TOKEN_IFNAME);
if (iface == NULL) {
- printk(UM_KERN_ERR "uml_tap: failed to parse interface spec\n");
- goto tap_cleanup;
+ dynamic = true;
+ iface = dynamic_ifname;
+ srand(getpid());
}
result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL);
@@ -214,14 +221,30 @@ static struct vector_fds *user_init_tap_fds(struct arglist *ifspec)
result->remote_addr_size = 0;
/* TAP */
+ do {
+ if (dynamic) {
+ strcpy(iface, template);
+ for (i = 0; i < strlen(iface); i++) {
+ if (iface[i] == 'X') {
+ iface[i] = padchar[rand() % strlen(padchar)];
+ }
+ }
+ }
+ fd = create_tap_fd(iface);
+ if ((fd < 0) && (!dynamic)) {
+ printk(UM_KERN_ERR "uml_tap: failed to create tun interface\n");
+ goto tap_cleanup;
+ }
+ result->tx_fd = fd;
+ result->rx_fd = fd;
+ } while (fd < 0);
- fd = create_tap_fd(iface);
- if (fd < 0) {
- printk(UM_KERN_ERR "uml_tap: failed to create tun interface\n");
- goto tap_cleanup;
+ argv[0] = uml_vector_fetch_arg(ifspec, TOKEN_SCRIPT);
+ if (argv[0]) {
+ argv[1] = iface;
+ run_helper(NULL, NULL, argv);
}
- result->tx_fd = fd;
- result->rx_fd = fd;
+
return result;
tap_cleanup:
printk(UM_KERN_ERR "user_init_tap: init failed, error %d", fd);
@@ -233,6 +256,7 @@ static struct vector_fds *user_init_hybrid_fds(struct arglist *ifspec)
{
char *iface;
struct vector_fds *result = NULL;
+ char *argv[] = {NULL, NULL, NULL, NULL};
iface = uml_vector_fetch_arg(ifspec, TOKEN_IFNAME);
if (iface == NULL) {
@@ -266,6 +290,12 @@ static struct vector_fds *user_init_hybrid_fds(struct arglist *ifspec)
"uml_tap: failed to create paired raw socket: %i\n", result->rx_fd);
goto hybrid_cleanup;
}
+
+ argv[0] = uml_vector_fetch_arg(ifspec, TOKEN_SCRIPT);
+ if (argv[0]) {
+ argv[1] = iface;
+ run_helper(NULL, NULL, argv);
+ }
return result;
hybrid_cleanup:
printk(UM_KERN_ERR "user_init_hybrid: init failed");
@@ -332,7 +362,7 @@ static struct vector_fds *user_init_unix_fds(struct arglist *ifspec, int id)
}
switch (id) {
case ID_BESS:
- if (connect(fd, remote_addr, sizeof(struct sockaddr_un)) < 0) {
+ if (connect(fd, (const struct sockaddr *) remote_addr, sizeof(struct sockaddr_un)) < 0) {
printk(UM_KERN_ERR "bess open:cannot connect to %s %i", remote_addr->sun_path, -errno);
goto unix_cleanup;
}
@@ -399,8 +429,7 @@ static struct vector_fds *user_init_fd_fds(struct arglist *ifspec)
fd_cleanup:
if (fd >= 0)
os_close_file(fd);
- if (result != NULL)
- kfree(result);
+ kfree(result);
return NULL;
}
@@ -410,6 +439,7 @@ static struct vector_fds *user_init_raw_fds(struct arglist *ifspec)
int err = -ENOMEM;
char *iface;
struct vector_fds *result = NULL;
+ char *argv[] = {NULL, NULL, NULL, NULL};
iface = uml_vector_fetch_arg(ifspec, TOKEN_IFNAME);
if (iface == NULL)
@@ -432,6 +462,11 @@ static struct vector_fds *user_init_raw_fds(struct arglist *ifspec)
result->remote_addr = NULL;
result->remote_addr_size = 0;
}
+ argv[0] = uml_vector_fetch_arg(ifspec, TOKEN_SCRIPT);
+ if (argv[0]) {
+ argv[1] = iface;
+ run_helper(NULL, NULL, argv);
+ }
return result;
raw_cleanup:
printk(UM_KERN_ERR "user_init_raw: init failed, error %d", err);
@@ -789,10 +824,12 @@ void *uml_vector_user_bpf(char *filename)
return false;
}
bpf_prog = uml_kmalloc(sizeof(struct sock_fprog), UM_GFP_KERNEL);
- if (bpf_prog != NULL) {
- bpf_prog->len = statbuf.st_size / sizeof(struct sock_filter);
- bpf_prog->filter = NULL;
+ if (bpf_prog == NULL) {
+ printk(KERN_ERR "Failed to allocate bpf prog buffer");
+ return NULL;
}
+ bpf_prog->len = statbuf.st_size / sizeof(struct sock_filter);
+ bpf_prog->filter = NULL;
ffd = os_open_file(filename, of_read(OPENFLAGS()), 0);
if (ffd < 0) {
printk(KERN_ERR "Error %d opening bpf file", -errno);
diff --git a/arch/um/kernel/sigio.c b/arch/um/kernel/sigio.c
index 10c99e058fca..d1cffc2a7f21 100644
--- a/arch/um/kernel/sigio.c
+++ b/arch/um/kernel/sigio.c
@@ -35,14 +35,14 @@ int write_sigio_irq(int fd)
}
/* These are called from os-Linux/sigio.c to protect its pollfds arrays. */
-static DEFINE_SPINLOCK(sigio_spinlock);
+static DEFINE_MUTEX(sigio_mutex);
void sigio_lock(void)
{
- spin_lock(&sigio_spinlock);
+ mutex_lock(&sigio_mutex);
}
void sigio_unlock(void)
{
- spin_unlock(&sigio_spinlock);
+ mutex_unlock(&sigio_mutex);
}
diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c
index acbc879d2773..7452f70d50d0 100644
--- a/arch/um/kernel/sysrq.c
+++ b/arch/um/kernel/sysrq.c
@@ -47,12 +47,10 @@ void show_stack(struct task_struct *task, unsigned long *stack,
if (kstack_end(stack))
break;
if (i && ((i % STACKSLOTS_PER_LINE) == 0))
- printk("%s\n", loglvl);
+ pr_cont("\n");
pr_cont(" %08lx", *stack++);
}
- printk("%s\n", loglvl);
printk("%sCall Trace:\n", loglvl);
dump_trace(current, &stackops, (void *)loglvl);
- printk("%s\n", loglvl);
}
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index 25eaa6a0c658..3d109ff3309b 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -70,13 +70,17 @@ static void time_travel_handle_message(struct um_timetravel_msg *msg,
* read of the message and write of the ACK.
*/
if (mode != TTMH_READ) {
+ bool disabled = irqs_disabled();
+
+ BUG_ON(mode == TTMH_IDLE && !disabled);
+
+ if (disabled)
+ local_irq_enable();
while (os_poll(1, &time_travel_ext_fd) != 0) {
- if (mode == TTMH_IDLE) {
- BUG_ON(!irqs_disabled());
- local_irq_enable();
- local_irq_disable();
- }
+ /* nothing */
}
+ if (disabled)
+ local_irq_disable();
}
ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg));
@@ -102,6 +106,7 @@ static void time_travel_handle_message(struct um_timetravel_msg *msg,
break;
}
+ resp.seq = msg->seq;
os_write_file(time_travel_ext_fd, &resp, sizeof(resp));
}
diff --git a/arch/um/os-Linux/umid.c b/arch/um/os-Linux/umid.c
index 9e16078a4bf8..1d7558dac75f 100644
--- a/arch/um/os-Linux/umid.c
+++ b/arch/um/os-Linux/umid.c
@@ -97,7 +97,7 @@ static int remove_files_and_dir(char *dir)
while ((ent = readdir(directory)) != NULL) {
if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, ".."))
continue;
- len = strlen(dir) + sizeof("/") + strlen(ent->d_name) + 1;
+ len = strlen(dir) + strlen("/") + strlen(ent->d_name) + 1;
if (len > sizeof(file)) {
ret = -E2BIG;
goto out;
@@ -135,7 +135,7 @@ out:
*/
static inline int is_umdir_used(char *dir)
{
- char pid[sizeof("nnnnn\0")], *end, *file;
+ char pid[sizeof("nnnnnnnnn")], *end, *file;
int dead, fd, p, n, err;
size_t filelen;
@@ -217,10 +217,10 @@ static int umdir_take_if_dead(char *dir)
static void __init create_pid_file(void)
{
- char pid[sizeof("nnnnn\0")], *file;
+ char pid[sizeof("nnnnnnnnn")], *file;
int fd, n;
- n = strlen(uml_dir) + UMID_LEN + sizeof("/pid\0");
+ n = strlen(uml_dir) + UMID_LEN + sizeof("/pid");
file = malloc(n);
if (!file)
return;
diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c
index ecf2f390fad2..07327425d06e 100644
--- a/arch/um/os-Linux/util.c
+++ b/arch/um/os-Linux/util.c
@@ -10,7 +10,7 @@
#include <signal.h>
#include <string.h>
#include <termios.h>
-#include <wait.h>
+#include <sys/wait.h>
#include <sys/mman.h>
#include <sys/utsname.h>
#include <init.h>
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 9b6931f8d555..0d0667a9fbd7 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -444,3 +444,4 @@
437 i386 openat2 sys_openat2
438 i386 pidfd_getfd sys_pidfd_getfd
439 i386 faccessat2 sys_faccessat2
+440 i386 process_madvise sys_process_madvise
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 347809649ba2..1f47e24fb65c 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -361,6 +361,7 @@
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
+440 common process_madvise sys_process_madvise
#
# x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c
index a84a141a2ad2..8443a675715b 100644
--- a/arch/x86/kvm/mmu/page_track.c
+++ b/arch/x86/kvm/mmu/page_track.c
@@ -229,7 +229,8 @@ void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
return;
idx = srcu_read_lock(&head->track_srcu);
- hlist_for_each_entry_rcu(n, &head->track_notifier_list, node)
+ hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
+ srcu_read_lock_held(&head->track_srcu))
if (n->track_write)
n->track_write(vcpu, gpa, new, bytes, n);
srcu_read_unlock(&head->track_srcu, idx);
@@ -254,7 +255,8 @@ void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
return;
idx = srcu_read_lock(&head->track_srcu);
- hlist_for_each_entry_rcu(n, &head->track_notifier_list, node)
+ hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
+ srcu_read_lock_held(&head->track_srcu))
if (n->track_flush_slot)
n->track_flush_slot(kvm, slot, n);
srcu_read_unlock(&head->track_srcu, idx);
diff --git a/arch/x86/um/ptrace_64.c b/arch/x86/um/ptrace_64.c
index 09a085bde0d4..1401899dee9b 100644
--- a/arch/x86/um/ptrace_64.c
+++ b/arch/x86/um/ptrace_64.c
@@ -52,14 +52,6 @@ static const int reg_offsets[] =
int putreg(struct task_struct *child, int regno, unsigned long value)
{
-#ifdef TIF_IA32
- /*
- * Some code in the 64bit emulation may not be 64bit clean.
- * Don't take any chances.
- */
- if (test_tsk_thread_flag(child, TIF_IA32))
- value &= 0xffffffff;
-#endif
switch (regno) {
case R8:
case R9:
@@ -137,10 +129,7 @@ int poke_user(struct task_struct *child, long addr, long data)
unsigned long getreg(struct task_struct *child, int regno)
{
unsigned long mask = ~0UL;
-#ifdef TIF_IA32
- if (test_tsk_thread_flag(child, TIF_IA32))
- mask = 0xffffffff;
-#endif
+
switch (regno) {
case R8:
case R9:
diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c
index c51dd8363d25..bae61554abcc 100644
--- a/arch/x86/um/user-offsets.c
+++ b/arch/x86/um/user-offsets.c
@@ -2,7 +2,7 @@
#include <stdio.h>
#include <stddef.h>
#include <signal.h>
-#include <sys/poll.h>
+#include <poll.h>
#include <sys/mman.h>
#include <sys/user.h>
#define __FRAME_OFFSETS
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
index 4988e19598c8..1e681bf62561 100644
--- a/arch/x86/xen/grant-table.c
+++ b/arch/x86/xen/grant-table.c
@@ -25,6 +25,7 @@
static struct gnttab_vm_area {
struct vm_struct *area;
pte_t **ptes;
+ int idx;
} gnttab_shared_vm_area, gnttab_status_vm_area;
int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
@@ -90,19 +91,31 @@ void arch_gnttab_unmap(void *shared, unsigned long nr_gframes)
}
}
+static int gnttab_apply(pte_t *pte, unsigned long addr, void *data)
+{
+ struct gnttab_vm_area *area = data;
+
+ area->ptes[area->idx++] = pte;
+ return 0;
+}
+
static int arch_gnttab_valloc(struct gnttab_vm_area *area, unsigned nr_frames)
{
area->ptes = kmalloc_array(nr_frames, sizeof(*area->ptes), GFP_KERNEL);
if (area->ptes == NULL)
return -ENOMEM;
-
- area->area = alloc_vm_area(PAGE_SIZE * nr_frames, area->ptes);
- if (area->area == NULL) {
- kfree(area->ptes);
- return -ENOMEM;
- }
-
+ area->area = get_vm_area(PAGE_SIZE * nr_frames, VM_IOREMAP);
+ if (!area->area)
+ goto out_free_ptes;
+ if (apply_to_page_range(&init_mm, (unsigned long)area->area->addr,
+ PAGE_SIZE * nr_frames, gnttab_apply, area))
+ goto out_free_vm_area;
return 0;
+out_free_vm_area:
+ free_vm_area(area->area);
+out_free_ptes:
+ kfree(area->ptes);
+ return -ENOMEM;
}
static void arch_gnttab_vfree(struct gnttab_vm_area *area)
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index 6276e3c2d3fc..b070f272995d 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -410,3 +410,4 @@
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
+440 common process_madvise sys_process_madvise
diff --git a/block/bio.c b/block/bio.c
index 561de0a30608..811b6e41f5e6 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -327,7 +327,7 @@ static void bio_chain_endio(struct bio *bio)
/**
* bio_chain - chain bio completions
* @bio: the target bio
- * @parent: the @bio's parent bio
+ * @parent: the parent bio of @bio
*
* The caller won't have a bi_end_io called when @bio completes - instead,
* @parent's bi_end_io won't be called until both @parent and @bio have
@@ -1132,7 +1132,7 @@ static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter)
* released.
*
* The function tries, but does not guarantee, to pin as many pages as
- * fit into the bio, or are requested in *iter, whatever is smaller. If
+ * fit into the bio, or are requested in @iter, whatever is smaller. If
* MM encounters an error pinning the requested pages, it stops. Error
* is returned only if 0 pages could be pinned.
*/
diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c
index ad6e90fbc813..b02fd51e5589 100644
--- a/drivers/acpi/acpi_memhotplug.c
+++ b/drivers/acpi/acpi_memhotplug.c
@@ -194,7 +194,8 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device)
if (node < 0)
node = memory_add_physaddr_to_nid(info->start_addr);
- result = __add_memory(node, info->start_addr, info->length);
+ result = __add_memory(node, info->start_addr, info->length,
+ MHP_NONE);
/*
* If the memory block has been used by the kernel, add_memory()
diff --git a/drivers/base/core.c b/drivers/base/core.c
index b919e6d01d9a..c852f16c111b 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -4212,13 +4212,16 @@ define_dev_printk_level(_dev_info, KERN_INFO);
* -EPROBE_DEFER and propagate error upwards.
* In case of -EPROBE_DEFER it sets also defer probe reason, which can be
* checked later by reading devices_deferred debugfs attribute.
- * It replaces code sequence:
+ * It replaces code sequence::
+ *
* if (err != -EPROBE_DEFER)
* dev_err(dev, ...);
* else
* dev_dbg(dev, ...);
* return err;
- * with
+ *
+ * with::
+ *
* return dev_err_probe(dev, err, ...);
*
* Returns @err.
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index adf828dfccf0..eef4ffb6122c 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -432,7 +432,8 @@ static ssize_t probe_store(struct device *dev, struct device_attribute *attr,
nid = memory_add_physaddr_to_nid(phys_addr);
ret = __add_memory(nid, phys_addr,
- MIN_MEMORY_BLOCK_SIZE * sections_per_block);
+ MIN_MEMORY_BLOCK_SIZE * sections_per_block,
+ MHP_NONE);
if (ret)
goto out;
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 43d21f9e88b1..6ffa470e2984 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -772,8 +772,8 @@ static int __ref get_nid_for_pfn(unsigned long pfn)
return pfn_to_nid(pfn);
}
-static int do_register_memory_block_under_node(int nid,
- struct memory_block *mem_blk)
+static void do_register_memory_block_under_node(int nid,
+ struct memory_block *mem_blk)
{
int ret;
@@ -786,12 +786,19 @@ static int do_register_memory_block_under_node(int nid,
ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj,
&mem_blk->dev.kobj,
kobject_name(&mem_blk->dev.kobj));
- if (ret)
- return ret;
+ if (ret && ret != -EEXIST)
+ dev_err_ratelimited(&node_devices[nid]->dev,
+ "can't create link to %s in sysfs (%d)\n",
+ kobject_name(&mem_blk->dev.kobj), ret);
- return sysfs_create_link_nowarn(&mem_blk->dev.kobj,
+ ret = sysfs_create_link_nowarn(&mem_blk->dev.kobj,
&node_devices[nid]->dev.kobj,
kobject_name(&node_devices[nid]->dev.kobj));
+ if (ret && ret != -EEXIST)
+ dev_err_ratelimited(&mem_blk->dev,
+ "can't create link to %s in sysfs (%d)\n",
+ kobject_name(&node_devices[nid]->dev.kobj),
+ ret);
}
/* register memory section under specified node if it spans that node */
@@ -827,7 +834,8 @@ static int register_mem_block_under_node_early(struct memory_block *mem_blk,
if (page_nid != nid)
continue;
- return do_register_memory_block_under_node(nid, mem_blk);
+ do_register_memory_block_under_node(nid, mem_blk);
+ return 0;
}
/* mem section does not span the specified node */
return 0;
@@ -842,7 +850,8 @@ static int register_mem_block_under_node_hotplug(struct memory_block *mem_blk,
{
int nid = *(int *)arg;
- return do_register_memory_block_under_node(nid, mem_blk);
+ do_register_memory_block_under_node(nid, mem_blk);
+ return 0;
}
/*
@@ -860,8 +869,8 @@ void unregister_memory_block_under_nodes(struct memory_block *mem_blk)
kobject_name(&node_devices[mem_blk->nid]->dev.kobj));
}
-int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn,
- enum meminit_context context)
+void link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn,
+ enum meminit_context context)
{
walk_memory_blocks_func_t func;
@@ -870,9 +879,9 @@ int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn,
else
func = register_mem_block_under_node_early;
- return walk_memory_blocks(PFN_PHYS(start_pfn),
- PFN_PHYS(end_pfn - start_pfn), (void *)&nid,
- func);
+ walk_memory_blocks(PFN_PHYS(start_pfn), PFN_PHYS(end_pfn - start_pfn),
+ (void *)&nid, func);
+ return;
}
#ifdef CONFIG_HUGETLBFS
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index adfc9352351d..501e9dacfff9 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -201,7 +201,7 @@ static inline void shrink_free_pagepool(struct xen_blkif_ring *ring, int num)
#define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page)))
-static int do_block_io_op(struct xen_blkif_ring *ring);
+static int do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags);
static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
struct blkif_request *req,
struct pending_req *pending_req);
@@ -612,6 +612,8 @@ int xen_blkif_schedule(void *arg)
struct xen_vbd *vbd = &blkif->vbd;
unsigned long timeout;
int ret;
+ bool do_eoi;
+ unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS;
set_freezable();
while (!kthread_should_stop()) {
@@ -636,16 +638,23 @@ int xen_blkif_schedule(void *arg)
if (timeout == 0)
goto purge_gnt_list;
+ do_eoi = ring->waiting_reqs;
+
ring->waiting_reqs = 0;
smp_mb(); /* clear flag *before* checking for work */
- ret = do_block_io_op(ring);
+ ret = do_block_io_op(ring, &eoi_flags);
if (ret > 0)
ring->waiting_reqs = 1;
if (ret == -EACCES)
wait_event_interruptible(ring->shutdown_wq,
kthread_should_stop());
+ if (do_eoi && !ring->waiting_reqs) {
+ xen_irq_lateeoi(ring->irq, eoi_flags);
+ eoi_flags |= XEN_EOI_FLAG_SPURIOUS;
+ }
+
purge_gnt_list:
if (blkif->vbd.feature_gnt_persistent &&
time_after(jiffies, ring->next_lru)) {
@@ -1121,7 +1130,7 @@ static void end_block_io_op(struct bio *bio)
* and transmute it to the block API to hand it over to the proper block disk.
*/
static int
-__do_block_io_op(struct xen_blkif_ring *ring)
+__do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags)
{
union blkif_back_rings *blk_rings = &ring->blk_rings;
struct blkif_request req;
@@ -1144,6 +1153,9 @@ __do_block_io_op(struct xen_blkif_ring *ring)
if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
break;
+ /* We've seen a request, so clear spurious eoi flag. */
+ *eoi_flags &= ~XEN_EOI_FLAG_SPURIOUS;
+
if (kthread_should_stop()) {
more_to_do = 1;
break;
@@ -1202,13 +1214,13 @@ done:
}
static int
-do_block_io_op(struct xen_blkif_ring *ring)
+do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags)
{
union blkif_back_rings *blk_rings = &ring->blk_rings;
int more_to_do;
do {
- more_to_do = __do_block_io_op(ring);
+ more_to_do = __do_block_io_op(ring, eoi_flags);
if (more_to_do)
break;
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index b9aa5d1ac10b..5e7c36d73dc6 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -246,9 +246,8 @@ static int xen_blkif_map(struct xen_blkif_ring *ring, grant_ref_t *gref,
if (req_prod - rsp_prod > size)
goto fail;
- err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn,
- xen_blkif_be_int, 0,
- "blkif-backend", ring);
+ err = bind_interdomain_evtchn_to_irqhandler_lateeoi(blkif->domid,
+ evtchn, xen_blkif_be_int, 0, "blkif-backend", ring);
if (err < 0)
goto fail;
ring->irq = err;
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index bff3d4021c18..029403c18ca3 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1270,7 +1270,7 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
zram_slot_unlock(zram, index);
/* Should NEVER happen. Return bio error if it does. */
- if (unlikely(ret))
+ if (WARN_ON(ret))
pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
return ret;
diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index a9af15e994cc..e439b43c19eb 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -885,12 +885,15 @@ static int powernv_cpufreq_reboot_notifier(struct notifier_block *nb,
unsigned long action, void *unused)
{
int cpu;
- struct cpufreq_policy cpu_policy;
+ struct cpufreq_policy *cpu_policy;
rebooting = true;
for_each_online_cpu(cpu) {
- cpufreq_get_policy(&cpu_policy, cpu);
- powernv_cpufreq_target_index(&cpu_policy, get_nominal_index());
+ cpu_policy = cpufreq_cpu_get(cpu);
+ if (!cpu_policy)
+ continue;
+ powernv_cpufreq_target_index(cpu_policy, get_nominal_index());
+ cpufreq_cpu_put(cpu_policy);
}
return NOTIFY_DONE;
diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index addaa6e6718b..c32c600b3cf8 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -141,7 +141,7 @@ static int stop_loop(struct cpuidle_device *dev,
struct cpuidle_driver *drv,
int index)
{
- power9_idle_type(stop_psscr_table[index].val,
+ arch300_idle_type(stop_psscr_table[index].val,
stop_psscr_table[index].mask);
return index;
}
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 37593387164a..37da0c070a88 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -71,10 +71,26 @@ config ZCRYPT
help
Select this option if you want to enable support for
s390 cryptographic adapters like:
- + PCI-X Cryptographic Coprocessor (PCIXCC)
- + Crypto Express 2,3,4 or 5 Coprocessor (CEXxC)
- + Crypto Express 2,3,4 or 5 Accelerator (CEXxA)
- + Crypto Express 4 or 5 EP11 Coprocessor (CEXxP)
+ + Crypto Express 2 up to 7 Coprocessor (CEXxC)
+ + Crypto Express 2 up to 7 Accelerator (CEXxA)
+ + Crypto Express 4 up to 7 EP11 Coprocessor (CEXxP)
+
+config ZCRYPT_DEBUG
+ bool "Enable debug features for s390 cryptographic adapters"
+ default n
+ depends on DEBUG_KERNEL
+ depends on ZCRYPT
+ help
+ Say 'Y' here to enable some additional debug features on the
+ s390 cryptographic adapters driver.
+
+ There will be some more sysfs attributes displayed for ap cards
+ and queues and some flags on crypto requests are interpreted as
+ debugging messages to force error injection.
+
+ Do not enable on production level kernel build.
+
+ If unsure, say N.
config ZCRYPT_MULTIDEVNODES
bool "Support for multiple zcrypt device nodes"
diff --git a/drivers/dax/kmem.c b/drivers/dax/kmem.c
index 6c933f2b604e..b4368c5b6a0c 100644
--- a/drivers/dax/kmem.c
+++ b/drivers/dax/kmem.c
@@ -35,11 +35,17 @@ static int dax_kmem_range(struct dev_dax *dev_dax, int i, struct range *r)
return 0;
}
+struct dax_kmem_data {
+ const char *res_name;
+ struct resource *res[];
+};
+
static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
{
struct device *dev = &dev_dax->dev;
+ struct dax_kmem_data *data;
+ int rc = -ENOMEM;
int i, mapped = 0;
- char *res_name;
int numa_node;
/*
@@ -55,14 +61,17 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
return -EINVAL;
}
- res_name = kstrdup(dev_name(dev), GFP_KERNEL);
- if (!res_name)
+ data = kzalloc(sizeof(*data) + sizeof(struct resource *) * dev_dax->nr_range, GFP_KERNEL);
+ if (!data)
return -ENOMEM;
+ data->res_name = kstrdup(dev_name(dev), GFP_KERNEL);
+ if (!data->res_name)
+ goto err_res_name;
+
for (i = 0; i < dev_dax->nr_range; i++) {
struct resource *res;
struct range range;
- int rc;
rc = dax_kmem_range(dev_dax, i, &range);
if (rc) {
@@ -72,7 +81,7 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
}
/* Region is permanently reserved if hotremove fails. */
- res = request_mem_region(range.start, range_len(&range), res_name);
+ res = request_mem_region(range.start, range_len(&range), data->res_name);
if (!res) {
dev_warn(dev, "mapping%d: %#llx-%#llx could not reserve region\n",
i, range.start, range.end);
@@ -82,9 +91,10 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
*/
if (mapped)
continue;
- kfree(res_name);
- return -EBUSY;
+ rc = -EBUSY;
+ goto err_request_mem;
}
+ data->res[i] = res;
/*
* Set flags appropriate for System RAM. Leave ..._BUSY clear
@@ -99,23 +109,30 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
* this as RAM automatically.
*/
rc = add_memory_driver_managed(numa_node, range.start,
- range_len(&range), kmem_name);
+ range_len(&range), kmem_name, MHP_NONE);
if (rc) {
dev_warn(dev, "mapping%d: %#llx-%#llx memory add failed\n",
i, range.start, range.end);
- release_mem_region(range.start, range_len(&range));
+ release_resource(res);
+ kfree(res);
+ data->res[i] = NULL;
if (mapped)
continue;
- kfree(res_name);
- return rc;
+ goto err_request_mem;
}
mapped++;
}
- dev_set_drvdata(dev, res_name);
+ dev_set_drvdata(dev, data);
return 0;
+
+err_request_mem:
+ kfree(data->res_name);
+err_res_name:
+ kfree(data);
+ return rc;
}
#ifdef CONFIG_MEMORY_HOTREMOVE
@@ -123,7 +140,7 @@ static int dev_dax_kmem_remove(struct dev_dax *dev_dax)
{
int i, success = 0;
struct device *dev = &dev_dax->dev;
- const char *res_name = dev_get_drvdata(dev);
+ struct dax_kmem_data *data = dev_get_drvdata(dev);
/*
* We have one shot for removing memory, if some memory blocks were not
@@ -142,7 +159,9 @@ static int dev_dax_kmem_remove(struct dev_dax *dev_dax)
rc = remove_memory(dev_dax->target_node, range.start,
range_len(&range));
if (rc == 0) {
- release_mem_region(range.start, range_len(&range));
+ release_resource(data->res[i]);
+ kfree(data->res[i]);
+ data->res[i] = NULL;
success++;
continue;
}
@@ -153,7 +172,8 @@ static int dev_dax_kmem_remove(struct dev_dax *dev_dax)
}
if (success >= dev_dax->nr_range) {
- kfree(res_name);
+ kfree(data->res_name);
+ kfree(data);
dev_set_drvdata(dev, NULL);
}
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index e84070b55463..edc279be3e59 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -46,7 +46,8 @@ EXPORT_SYMBOL_GPL(dax_read_unlock);
int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size,
pgoff_t *pgoff)
{
- phys_addr_t phys_off = (get_start_sect(bdev) + sector) * 512;
+ sector_t start_sect = bdev ? get_start_sect(bdev) : 0;
+ phys_addr_t phys_off = (start_sect + sector) * 512;
if (pgoff)
*pgoff = PHYS_PFN(phys_off);
diff --git a/drivers/firmware/broadcom/bcm47xx_sprom.c b/drivers/firmware/broadcom/bcm47xx_sprom.c
index 4787f86c8ac1..14fbcd11657c 100644
--- a/drivers/firmware/broadcom/bcm47xx_sprom.c
+++ b/drivers/firmware/broadcom/bcm47xx_sprom.c
@@ -27,6 +27,7 @@
*/
#include <linux/bcm47xx_nvram.h>
+#include <linux/bcm47xx_sprom.h>
#include <linux/bcma/bcma.h>
#include <linux/etherdevice.h>
#include <linux/if_ether.h>
diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
index da1887f72a51..36ec1f718893 100644
--- a/drivers/firmware/efi/Kconfig
+++ b/drivers/firmware/efi/Kconfig
@@ -106,7 +106,7 @@ config EFI_GENERIC_STUB
config EFI_ARMSTUB_DTB_LOADER
bool "Enable the DTB loader"
- depends on EFI_GENERIC_STUB
+ depends on EFI_GENERIC_STUB && !RISCV
default y
help
Select this config option to add support for the dtb= command
@@ -123,6 +123,7 @@ config EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER
bool "Enable the command line initrd loader" if !X86
depends on EFI_STUB && (EFI_GENERIC_STUB || X86)
default y
+ depends on !RISCV
help
Select this config option to add support for the initrd= command
line parameter, allowing an initrd that resides on the same volume
diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile
index e8da782280b6..d6ca2da19339 100644
--- a/drivers/firmware/efi/Makefile
+++ b/drivers/firmware/efi/Makefile
@@ -36,6 +36,8 @@ fake_map-$(CONFIG_X86) += x86_fake_mem.o
arm-obj-$(CONFIG_EFI) := efi-init.o arm-runtime.o
obj-$(CONFIG_ARM) += $(arm-obj-y)
obj-$(CONFIG_ARM64) += $(arm-obj-y)
+riscv-obj-$(CONFIG_EFI) := efi-init.o riscv-runtime.o
+obj-$(CONFIG_RISCV) += $(riscv-obj-y)
obj-$(CONFIG_EFI_CAPSULE_LOADER) += capsule-loader.o
obj-$(CONFIG_EFI_EARLYCON) += earlycon.o
obj-$(CONFIG_UEFI_CPER_ARM) += cper-arm.o
diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index 039a9acab817..8a94388e38b3 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -23,6 +23,8 @@ cflags-$(CONFIG_ARM64) := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \
cflags-$(CONFIG_ARM) := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \
-fno-builtin -fpic \
$(call cc-option,-mno-single-pic-base)
+cflags-$(CONFIG_RISCV) := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \
+ -fpic
cflags-$(CONFIG_EFI_GENERIC_STUB) += -I$(srctree)/scripts/dtc/libfdt
@@ -64,6 +66,7 @@ lib-$(CONFIG_EFI_GENERIC_STUB) += efi-stub.o fdt.o string.o \
lib-$(CONFIG_ARM) += arm32-stub.o
lib-$(CONFIG_ARM64) += arm64-stub.o
lib-$(CONFIG_X86) += x86-stub.o
+lib-$(CONFIG_RISCV) += riscv-stub.o
CFLAGS_arm32-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
# Even when -mbranch-protection=none is set, Clang will generate a
@@ -112,6 +115,13 @@ STUBCOPY_FLAGS-$(CONFIG_ARM64) += --prefix-alloc-sections=.init \
--prefix-symbols=__efistub_
STUBCOPY_RELOC-$(CONFIG_ARM64) := R_AARCH64_ABS
+# For RISC-V, we don't need anything special other than arm64. Keep all the
+# symbols in .init section and make sure that no absolute symbols references
+# doesn't exist.
+STUBCOPY_FLAGS-$(CONFIG_RISCV) += --prefix-alloc-sections=.init \
+ --prefix-symbols=__efistub_
+STUBCOPY_RELOC-$(CONFIG_RISCV) := R_RISCV_HI20
+
$(obj)/%.stub.o: $(obj)/%.o FORCE
$(call if_changed,stubcopy)
diff --git a/drivers/firmware/efi/libstub/efi-stub.c b/drivers/firmware/efi/libstub/efi-stub.c
index 311a16802dd6..914a343c7785 100644
--- a/drivers/firmware/efi/libstub/efi-stub.c
+++ b/drivers/firmware/efi/libstub/efi-stub.c
@@ -17,7 +17,10 @@
/*
* This is the base address at which to start allocating virtual memory ranges
- * for UEFI Runtime Services. This is in the low TTBR0 range so that we can use
+ * for UEFI Runtime Services.
+ *
+ * For ARM/ARM64:
+ * This is in the low TTBR0 range so that we can use
* any allocation we choose, and eliminate the risk of a conflict after kexec.
* The value chosen is the largest non-zero power of 2 suitable for this purpose
* both on 32-bit and 64-bit ARM CPUs, to maximize the likelihood that it can
@@ -25,6 +28,12 @@
* Since 32-bit ARM could potentially execute with a 1G/3G user/kernel split,
* map everything below 1 GB. (512 MB is a reasonable upper bound for the
* entire footprint of the UEFI runtime services memory regions)
+ *
+ * For RISC-V:
+ * There is no specific reason for which, this address (512MB) can't be used
+ * EFI runtime virtual address for RISC-V. It also helps to use EFI runtime
+ * services on both RV32/RV64. Keep the same runtime virtual address for RISC-V
+ * as well to minimize the code churn.
*/
#define EFI_RT_VIRTUAL_BASE SZ_512M
#define EFI_RT_VIRTUAL_SIZE SZ_512M
diff --git a/drivers/firmware/efi/libstub/riscv-stub.c b/drivers/firmware/efi/libstub/riscv-stub.c
new file mode 100644
index 000000000000..380e4e251399
--- /dev/null
+++ b/drivers/firmware/efi/libstub/riscv-stub.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ */
+
+#include <linux/efi.h>
+#include <linux/libfdt.h>
+
+#include <asm/efi.h>
+#include <asm/sections.h>
+
+#include "efistub.h"
+
+/*
+ * RISC-V requires the kernel image to placed 2 MB aligned base for 64 bit and
+ * 4MB for 32 bit.
+ */
+#ifdef CONFIG_64BIT
+#define MIN_KIMG_ALIGN SZ_2M
+#else
+#define MIN_KIMG_ALIGN SZ_4M
+#endif
+
+typedef void __noreturn (*jump_kernel_func)(unsigned int, unsigned long);
+
+static u32 hartid;
+
+static u32 get_boot_hartid_from_fdt(void)
+{
+ const void *fdt;
+ int chosen_node, len;
+ const fdt32_t *prop;
+
+ fdt = get_efi_config_table(DEVICE_TREE_GUID);
+ if (!fdt)
+ return U32_MAX;
+
+ chosen_node = fdt_path_offset(fdt, "/chosen");
+ if (chosen_node < 0)
+ return U32_MAX;
+
+ prop = fdt_getprop((void *)fdt, chosen_node, "boot-hartid", &len);
+ if (!prop || len != sizeof(u32))
+ return U32_MAX;
+
+ return fdt32_to_cpu(*prop);
+}
+
+efi_status_t check_platform_features(void)
+{
+ hartid = get_boot_hartid_from_fdt();
+ if (hartid == U32_MAX) {
+ efi_err("/chosen/boot-hartid missing or invalid!\n");
+ return EFI_UNSUPPORTED;
+ }
+ return EFI_SUCCESS;
+}
+
+void __noreturn efi_enter_kernel(unsigned long entrypoint, unsigned long fdt,
+ unsigned long fdt_size)
+{
+ unsigned long stext_offset = _start_kernel - _start;
+ unsigned long kernel_entry = entrypoint + stext_offset;
+ jump_kernel_func jump_kernel = (jump_kernel_func)kernel_entry;
+
+ /*
+ * Jump to real kernel here with following constraints.
+ * 1. MMU should be disabled.
+ * 2. a0 should contain hartid
+ * 3. a1 should DT address
+ */
+ csr_write(CSR_SATP, 0);
+ jump_kernel(hartid, fdt);
+}
+
+efi_status_t handle_kernel_image(unsigned long *image_addr,
+ unsigned long *image_size,
+ unsigned long *reserve_addr,
+ unsigned long *reserve_size,
+ efi_loaded_image_t *image)
+{
+ unsigned long kernel_size = 0;
+ unsigned long preferred_addr;
+ efi_status_t status;
+
+ kernel_size = _edata - _start;
+ *image_addr = (unsigned long)_start;
+ *image_size = kernel_size + (_end - _edata);
+
+ /*
+ * RISC-V kernel maps PAGE_OFFSET virtual address to the same physical
+ * address where kernel is booted. That's why kernel should boot from
+ * as low as possible to avoid wastage of memory. Currently, dram_base
+ * is occupied by the firmware. So the preferred address for kernel to
+ * boot is next aligned address. If preferred address is not available,
+ * relocate_kernel will fall back to efi_low_alloc_above to allocate
+ * lowest possible memory region as long as the address and size meets
+ * the alignment constraints.
+ */
+ preferred_addr = MIN_KIMG_ALIGN;
+ status = efi_relocate_kernel(image_addr, kernel_size, *image_size,
+ preferred_addr, MIN_KIMG_ALIGN, 0x0);
+
+ if (status != EFI_SUCCESS) {
+ efi_err("Failed to relocate kernel\n");
+ *image_size = 0;
+ }
+ return status;
+}
diff --git a/drivers/firmware/efi/riscv-runtime.c b/drivers/firmware/efi/riscv-runtime.c
new file mode 100644
index 000000000000..d28e715d2bcc
--- /dev/null
+++ b/drivers/firmware/efi/riscv-runtime.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Extensible Firmware Interface
+ *
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ *
+ * Based on Extensible Firmware Interface Specification version 2.4
+ * Adapted from drivers/firmware/efi/arm-runtime.c
+ *
+ */
+
+#include <linux/dmi.h>
+#include <linux/efi.h>
+#include <linux/io.h>
+#include <linux/memblock.h>
+#include <linux/mm_types.h>
+#include <linux/preempt.h>
+#include <linux/rbtree.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/pgtable.h>
+
+#include <asm/cacheflush.h>
+#include <asm/efi.h>
+#include <asm/mmu.h>
+#include <asm/pgalloc.h>
+
+static bool __init efi_virtmap_init(void)
+{
+ efi_memory_desc_t *md;
+
+ efi_mm.pgd = pgd_alloc(&efi_mm);
+ mm_init_cpumask(&efi_mm);
+ init_new_context(NULL, &efi_mm);
+
+ for_each_efi_memory_desc(md) {
+ phys_addr_t phys = md->phys_addr;
+ int ret;
+
+ if (!(md->attribute & EFI_MEMORY_RUNTIME))
+ continue;
+ if (md->virt_addr == 0)
+ return false;
+
+ ret = efi_create_mapping(&efi_mm, md);
+ if (ret) {
+ pr_warn(" EFI remap %pa: failed to create mapping (%d)\n",
+ &phys, ret);
+ return false;
+ }
+ }
+
+ if (efi_memattr_apply_permissions(&efi_mm, efi_set_mapping_permissions))
+ return false;
+
+ return true;
+}
+
+/*
+ * Enable the UEFI Runtime Services if all prerequisites are in place, i.e.,
+ * non-early mapping of the UEFI system table and virtual mappings for all
+ * EFI_MEMORY_RUNTIME regions.
+ */
+static int __init riscv_enable_runtime_services(void)
+{
+ u64 mapsize;
+
+ if (!efi_enabled(EFI_BOOT)) {
+ pr_info("EFI services will not be available.\n");
+ return 0;
+ }
+
+ efi_memmap_unmap();
+
+ mapsize = efi.memmap.desc_size * efi.memmap.nr_map;
+
+ if (efi_memmap_init_late(efi.memmap.phys_map, mapsize)) {
+ pr_err("Failed to remap EFI memory map\n");
+ return 0;
+ }
+
+ if (efi_soft_reserve_enabled()) {
+ efi_memory_desc_t *md;
+
+ for_each_efi_memory_desc(md) {
+ int md_size = md->num_pages << EFI_PAGE_SHIFT;
+ struct resource *res;
+
+ if (!(md->attribute & EFI_MEMORY_SP))
+ continue;
+
+ res = kzalloc(sizeof(*res), GFP_KERNEL);
+ if (WARN_ON(!res))
+ break;
+
+ res->start = md->phys_addr;
+ res->end = md->phys_addr + md_size - 1;
+ res->name = "Soft Reserved";
+ res->flags = IORESOURCE_MEM;
+ res->desc = IORES_DESC_SOFT_RESERVED;
+
+ insert_resource(&iomem_resource, res);
+ }
+ }
+
+ if (efi_runtime_disabled()) {
+ pr_info("EFI runtime services will be disabled.\n");
+ return 0;
+ }
+
+ if (efi_enabled(EFI_RUNTIME_SERVICES)) {
+ pr_info("EFI runtime services access via paravirt.\n");
+ return 0;
+ }
+
+ pr_info("Remapping and enabling EFI services.\n");
+
+ if (!efi_virtmap_init()) {
+ pr_err("UEFI virtual mapping missing or invalid -- runtime services will not be available\n");
+ return -ENOMEM;
+ }
+
+ /* Set up runtime services function pointers */
+ efi_native_runtime_setup();
+ set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+
+ return 0;
+}
+early_initcall(riscv_enable_runtime_services);
+
+void efi_virtmap_load(void)
+{
+ preempt_disable();
+ switch_mm(current->active_mm, &efi_mm, NULL);
+}
+
+void efi_virtmap_unload(void)
+{
+ switch_mm(&efi_mm, current->active_mm, NULL);
+ preempt_enable();
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 495c3d7bb2b2..f3b7287e84c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -68,6 +68,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler);
mutex_init(&adev->vcn.vcn_pg_lock);
+ mutex_init(&adev->vcn.vcn1_jpeg1_workaround);
atomic_set(&adev->vcn.total_submission_cnt, 0);
for (i = 0; i < adev->vcn.num_vcn_inst; i++)
atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0);
@@ -237,6 +238,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
}
release_firmware(adev->vcn.fw);
+ mutex_destroy(&adev->vcn.vcn1_jpeg1_workaround);
mutex_destroy(&adev->vcn.vcn_pg_lock);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index 7a9b804bc988..17691158f783 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -220,6 +220,7 @@ struct amdgpu_vcn {
struct amdgpu_vcn_inst inst[AMDGPU_MAX_VCN_INSTANCES];
struct amdgpu_vcn_reg internal;
struct mutex vcn_pg_lock;
+ struct mutex vcn1_jpeg1_workaround;
atomic_t total_submission_cnt;
unsigned harvest_config;
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
index bc300283b6ab..c600b61b5f45 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
@@ -33,6 +33,7 @@
static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev);
+static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring);
static void jpeg_v1_0_decode_ring_patch_wreg(struct amdgpu_ring *ring, uint32_t *ptr, uint32_t reg_offset, uint32_t val)
{
@@ -564,8 +565,8 @@ static const struct amdgpu_ring_funcs jpeg_v1_0_decode_ring_vm_funcs = {
.insert_start = jpeg_v1_0_decode_ring_insert_start,
.insert_end = jpeg_v1_0_decode_ring_insert_end,
.pad_ib = amdgpu_ring_generic_pad_ib,
- .begin_use = vcn_v1_0_ring_begin_use,
- .end_use = amdgpu_vcn_ring_end_use,
+ .begin_use = jpeg_v1_0_ring_begin_use,
+ .end_use = vcn_v1_0_ring_end_use,
.emit_wreg = jpeg_v1_0_decode_ring_emit_wreg,
.emit_reg_wait = jpeg_v1_0_decode_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
@@ -586,3 +587,22 @@ static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev)
{
adev->jpeg.inst->irq.funcs = &jpeg_v1_0_irq_funcs;
}
+
+static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
+ int cnt = 0;
+
+ mutex_lock(&adev->vcn.vcn1_jpeg1_workaround);
+
+ if (amdgpu_fence_wait_empty(&adev->vcn.inst->ring_dec))
+ DRM_ERROR("JPEG dec: vcn dec ring may not be empty\n");
+
+ for (cnt = 0; cnt < adev->vcn.num_enc_rings; cnt++) {
+ if (amdgpu_fence_wait_empty(&adev->vcn.inst->ring_enc[cnt]))
+ DRM_ERROR("JPEG dec: vcn enc ring[%d] may not be empty\n", cnt);
+ }
+
+ vcn_v1_0_set_pg_for_begin_use(ring, set_clocks);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index 73699eafb51e..86e1ef732ebe 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -54,6 +54,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
int inst_idx, struct dpg_pause_state *new_state);
static void vcn_v1_0_idle_work_handler(struct work_struct *work);
+static void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring);
/**
* vcn_v1_0_early_init - set function pointers
@@ -1804,11 +1805,24 @@ static void vcn_v1_0_idle_work_handler(struct work_struct *work)
}
}
-void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring)
+static void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring)
{
- struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_device *adev = ring->adev;
bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
+ mutex_lock(&adev->vcn.vcn1_jpeg1_workaround);
+
+ if (amdgpu_fence_wait_empty(&ring->adev->jpeg.inst->ring_dec))
+ DRM_ERROR("VCN dec: jpeg dec ring may not be empty\n");
+
+ vcn_v1_0_set_pg_for_begin_use(ring, set_clocks);
+
+}
+
+void vcn_v1_0_set_pg_for_begin_use(struct amdgpu_ring *ring, bool set_clocks)
+{
+ struct amdgpu_device *adev = ring->adev;
+
if (set_clocks) {
amdgpu_gfx_off_ctrl(adev, false);
if (adev->pm.dpm_enabled)
@@ -1844,6 +1858,12 @@ void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring)
}
}
+void vcn_v1_0_ring_end_use(struct amdgpu_ring *ring)
+{
+ schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
+ mutex_unlock(&ring->adev->vcn.vcn1_jpeg1_workaround);
+}
+
static const struct amd_ip_funcs vcn_v1_0_ip_funcs = {
.name = "vcn_v1_0",
.early_init = vcn_v1_0_early_init,
@@ -1891,7 +1911,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
.insert_end = vcn_v1_0_dec_ring_insert_end,
.pad_ib = amdgpu_ring_generic_pad_ib,
.begin_use = vcn_v1_0_ring_begin_use,
- .end_use = amdgpu_vcn_ring_end_use,
+ .end_use = vcn_v1_0_ring_end_use,
.emit_wreg = vcn_v1_0_dec_ring_emit_wreg,
.emit_reg_wait = vcn_v1_0_dec_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
@@ -1923,7 +1943,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
.insert_end = vcn_v1_0_enc_ring_insert_end,
.pad_ib = amdgpu_ring_generic_pad_ib,
.begin_use = vcn_v1_0_ring_begin_use,
- .end_use = amdgpu_vcn_ring_end_use,
+ .end_use = vcn_v1_0_ring_end_use,
.emit_wreg = vcn_v1_0_enc_ring_emit_wreg,
.emit_reg_wait = vcn_v1_0_enc_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h
index f67d7391fc21..1f1cc7f0ece7 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h
@@ -24,7 +24,8 @@
#ifndef __VCN_V1_0_H__
#define __VCN_V1_0_H__
-void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring);
+void vcn_v1_0_ring_end_use(struct amdgpu_ring *ring);
+void vcn_v1_0_set_pg_for_begin_use(struct amdgpu_ring *ring, bool set_clocks);
extern const struct amdgpu_ip_block_version vcn_v1_0_ip_block;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index d2981524dba0..5e2254b9e931 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1426,5 +1426,5 @@ int kfd_create_crat_image_virtual(void **crat_image, size_t *size,
*/
void kfd_destroy_crat_image(void *crat_image)
{
- kfree(crat_image);
+ kvfree(crat_image);
}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 9c1e003d9c29..34f6369bf51f 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -149,6 +149,8 @@ struct amdgpu_dm_backlight_caps {
* @cached_state: Caches device atomic state for suspend/resume
* @cached_dc_state: Cached state of content streams
* @compressor: Frame buffer compression buffer. See &struct dm_comressor_info
+ * @force_timing_sync: set via debugfs. When set, indicates that all connected
+ * displays will be forced to synchronize.
*/
struct amdgpu_display_manager {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index db741e47d194..eee19edeeee5 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -647,7 +647,7 @@ static void try_disable_dsc(struct drm_atomic_state *state,
for (i = 0; i < count; i++) {
if (vars[i].dsc_enabled
&& vars[i].bpp_x16 == params[i].bw_range.max_target_bpp_x16
- && !params[i].clock_force_enable == DSC_CLK_FORCE_DEFAULT) {
+ && params[i].clock_force_enable == DSC_CLK_FORCE_DEFAULT) {
kbps_increase[i] = params[i].bw_range.stream_kbps - params[i].bw_range.max_kbps;
tried[i] = false;
remaining_to_try += 1;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 2a725a5fba40..1eb29c362122 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -848,7 +848,7 @@ static void disable_vbios_mode_if_required(
struct dc *dc,
struct dc_state *context)
{
- unsigned int i;
+ unsigned int i, j;
/* check if timing_changed, disable stream*/
for (i = 0; i < dc->res_pool->pipe_count; i++) {
@@ -872,10 +872,10 @@ static void disable_vbios_mode_if_required(
enc_inst = link->link_enc->funcs->get_dig_frontend(link->link_enc);
if (enc_inst != ENGINE_ID_UNKNOWN) {
- for (i = 0; i < dc->res_pool->stream_enc_count; i++) {
- if (dc->res_pool->stream_enc[i]->id == enc_inst) {
- tg_inst = dc->res_pool->stream_enc[i]->funcs->dig_source_otg(
- dc->res_pool->stream_enc[i]);
+ for (j = 0; j < dc->res_pool->stream_enc_count; j++) {
+ if (dc->res_pool->stream_enc[j]->id == enc_inst) {
+ tg_inst = dc->res_pool->stream_enc[j]->funcs->dig_source_otg(
+ dc->res_pool->stream_enc[j]);
break;
}
}
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
index 3bf8be4d107b..1e8919b0acdb 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
@@ -2883,7 +2883,7 @@ static int smu7_vblank_too_short(struct pp_hwmgr *hwmgr,
if (hwmgr->is_kicker)
switch_limit_us = data->is_memory_gddr5 ? 450 : 150;
else
- switch_limit_us = data->is_memory_gddr5 ? 190 : 150;
+ switch_limit_us = data->is_memory_gddr5 ? 200 : 150;
break;
case CHIP_VEGAM:
switch_limit_us = 30;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 942793947b4b..fc4f95fa87cf 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -417,6 +417,9 @@ static int smu_early_init(void *handle)
smu->pm_enabled = !!amdgpu_dpm;
smu->is_apu = false;
mutex_init(&smu->mutex);
+ mutex_init(&smu->smu_baco.mutex);
+ smu->smu_baco.state = SMU_BACO_STATE_EXIT;
+ smu->smu_baco.platform_support = false;
return smu_set_funcs(adev);
}
@@ -795,10 +798,6 @@ static int smu_sw_init(void *handle)
bitmap_zero(smu->smu_feature.enabled, SMU_FEATURE_MAX);
bitmap_zero(smu->smu_feature.allowed, SMU_FEATURE_MAX);
- mutex_init(&smu->smu_baco.mutex);
- smu->smu_baco.state = SMU_BACO_STATE_EXIT;
- smu->smu_baco.platform_support = false;
-
mutex_init(&smu->sensor_lock);
mutex_init(&smu->metrics_lock);
mutex_init(&smu->message_lock);
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 11fe9ff76fd5..d6808f678db5 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -806,30 +806,27 @@ static const struct dma_buf_ops drm_gem_prime_dmabuf_ops = {
struct sg_table *drm_prime_pages_to_sg(struct drm_device *dev,
struct page **pages, unsigned int nr_pages)
{
- struct sg_table *sg = NULL;
+ struct sg_table *sg;
+ struct scatterlist *sge;
size_t max_segment = 0;
- int ret;
sg = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
- if (!sg) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!sg)
+ return ERR_PTR(-ENOMEM);
if (dev)
max_segment = dma_max_mapping_size(dev->dev);
if (max_segment == 0 || max_segment > SCATTERLIST_MAX_SEGMENT)
max_segment = SCATTERLIST_MAX_SEGMENT;
- ret = __sg_alloc_table_from_pages(sg, pages, nr_pages, 0,
+ sge = __sg_alloc_table_from_pages(sg, pages, nr_pages, 0,
nr_pages << PAGE_SHIFT,
- max_segment, GFP_KERNEL);
- if (ret)
- goto out;
-
+ max_segment,
+ NULL, 0, GFP_KERNEL);
+ if (IS_ERR(sge)) {
+ kfree(sg);
+ sg = ERR_CAST(sge);
+ }
return sg;
-out:
- kfree(sg);
- return ERR_PTR(ret);
}
EXPORT_SYMBOL(drm_prime_pages_to_sg);
diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 9afa5c4a6bf0..1e1cb245fca7 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -25,6 +25,7 @@ config DRM_I915
select CRC32
select SND_HDA_I915 if SND_HDA_CORE
select CEC_CORE if CEC_NOTIFIER
+ select VMAP_PFN
help
Choose this option if you have a system that has "Intel Graphics
Media Accelerator" or "HD Graphics" integrated graphics,
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index 4d06178cd76c..cdcb7b1034ae 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -2742,7 +2742,7 @@ tgl_dkl_phy_ddi_vswing_sequence(struct intel_encoder *encoder, int link_clock,
u32 n_entries, val, ln, dpcnt_mask, dpcnt_val;
int rate = 0;
- if (type == INTEL_OUTPUT_HDMI) {
+ if (type != INTEL_OUTPUT_HDMI) {
struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
rate = intel_dp->link_rate;
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 46496b9b1b02..abffab10d54a 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -4093,8 +4093,7 @@ static int skl_check_ccs_aux_surface(struct intel_plane_state *plane_state)
int skl_check_plane_surface(struct intel_plane_state *plane_state)
{
const struct drm_framebuffer *fb = plane_state->hw.fb;
- int ret;
- bool needs_aux = false;
+ int ret, i;
ret = intel_plane_compute_gtt(plane_state);
if (ret)
@@ -4108,7 +4107,6 @@ int skl_check_plane_surface(struct intel_plane_state *plane_state)
* it.
*/
if (is_ccs_modifier(fb->modifier)) {
- needs_aux = true;
ret = skl_check_ccs_aux_surface(plane_state);
if (ret)
return ret;
@@ -4116,20 +4114,15 @@ int skl_check_plane_surface(struct intel_plane_state *plane_state)
if (intel_format_info_is_yuv_semiplanar(fb->format,
fb->modifier)) {
- needs_aux = true;
ret = skl_check_nv12_aux_surface(plane_state);
if (ret)
return ret;
}
- if (!needs_aux) {
- int i;
-
- for (i = 1; i < fb->format->num_planes; i++) {
- plane_state->color_plane[i].offset = ~0xfff;
- plane_state->color_plane[i].x = 0;
- plane_state->color_plane[i].y = 0;
- }
+ for (i = fb->format->num_planes; i < ARRAY_SIZE(plane_state->color_plane); i++) {
+ plane_state->color_plane[i].offset = ~0xfff;
+ plane_state->color_plane[i].x = 0;
+ plane_state->color_plane[i].y = 0;
}
ret = skl_check_main_surface(plane_state);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index d6eeefab3d01..f60ca6dc911f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -162,8 +162,6 @@ static void unmap_object(struct drm_i915_gem_object *obj, void *ptr)
{
if (is_vmalloc_addr(ptr))
vunmap(ptr);
- else
- kunmap(kmap_to_page(ptr));
}
struct sg_table *
@@ -234,34 +232,21 @@ unlock:
return err;
}
-static inline pte_t iomap_pte(resource_size_t base,
- dma_addr_t offset,
- pgprot_t prot)
-{
- return pte_mkspecial(pfn_pte((base + offset) >> PAGE_SHIFT, prot));
-}
-
/* The 'mapping' part of i915_gem_object_pin_map() below */
-static void *i915_gem_object_map(struct drm_i915_gem_object *obj,
- enum i915_map_type type)
+static void *i915_gem_object_map_page(struct drm_i915_gem_object *obj,
+ enum i915_map_type type)
{
- unsigned long n_pte = obj->base.size >> PAGE_SHIFT;
- struct sg_table *sgt = obj->mm.pages;
- pte_t *stack[32], **mem;
- struct vm_struct *area;
+ unsigned long n_pages = obj->base.size >> PAGE_SHIFT, i;
+ struct page *stack[32], **pages = stack, *page;
+ struct sgt_iter iter;
pgprot_t pgprot;
+ void *vaddr;
- if (!i915_gem_object_has_struct_page(obj) && type != I915_MAP_WC)
- return NULL;
-
- if (GEM_WARN_ON(type == I915_MAP_WC &&
- !static_cpu_has(X86_FEATURE_PAT)))
- return NULL;
-
- /* A single page can always be kmapped */
- if (n_pte == 1 && type == I915_MAP_WB) {
- struct page *page = sg_page(sgt->sgl);
-
+ switch (type) {
+ default:
+ MISSING_CASE(type);
+ fallthrough; /* to use PAGE_KERNEL anyway */
+ case I915_MAP_WB:
/*
* On 32b, highmem using a finite set of indirect PTE (i.e.
* vmap) to provide virtual mappings of the high pages.
@@ -277,33 +262,10 @@ static void *i915_gem_object_map(struct drm_i915_gem_object *obj,
* forever.
*
* So if the page is beyond the 32b boundary, make an explicit
- * vmap. On 64b, this check will be optimised away as we can
- * directly kmap any page on the system.
+ * vmap.
*/
- if (!PageHighMem(page))
- return kmap(page);
- }
-
- mem = stack;
- if (n_pte > ARRAY_SIZE(stack)) {
- /* Too big for stack -- allocate temporary array instead */
- mem = kvmalloc_array(n_pte, sizeof(*mem), GFP_KERNEL);
- if (!mem)
- return NULL;
- }
-
- area = alloc_vm_area(obj->base.size, mem);
- if (!area) {
- if (mem != stack)
- kvfree(mem);
- return NULL;
- }
-
- switch (type) {
- default:
- MISSING_CASE(type);
- fallthrough; /* to use PAGE_KERNEL anyway */
- case I915_MAP_WB:
+ if (n_pages == 1 && !PageHighMem(sg_page(obj->mm.pages->sgl)))
+ return page_address(sg_page(obj->mm.pages->sgl));
pgprot = PAGE_KERNEL;
break;
case I915_MAP_WC:
@@ -311,30 +273,50 @@ static void *i915_gem_object_map(struct drm_i915_gem_object *obj,
break;
}
- if (i915_gem_object_has_struct_page(obj)) {
- struct sgt_iter iter;
- struct page *page;
- pte_t **ptes = mem;
+ if (n_pages > ARRAY_SIZE(stack)) {
+ /* Too big for stack -- allocate temporary array instead */
+ pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL);
+ if (!pages)
+ return NULL;
+ }
- for_each_sgt_page(page, iter, sgt)
- **ptes++ = mk_pte(page, pgprot);
- } else {
- resource_size_t iomap;
- struct sgt_iter iter;
- pte_t **ptes = mem;
- dma_addr_t addr;
+ i = 0;
+ for_each_sgt_page(page, iter, obj->mm.pages)
+ pages[i++] = page;
+ vaddr = vmap(pages, n_pages, 0, pgprot);
+ if (pages != stack)
+ kvfree(pages);
+ return vaddr;
+}
- iomap = obj->mm.region->iomap.base;
- iomap -= obj->mm.region->region.start;
+static void *i915_gem_object_map_pfn(struct drm_i915_gem_object *obj,
+ enum i915_map_type type)
+{
+ resource_size_t iomap = obj->mm.region->iomap.base -
+ obj->mm.region->region.start;
+ unsigned long n_pfn = obj->base.size >> PAGE_SHIFT;
+ unsigned long stack[32], *pfns = stack, i;
+ struct sgt_iter iter;
+ dma_addr_t addr;
+ void *vaddr;
+
+ if (type != I915_MAP_WC)
+ return NULL;
- for_each_sgt_daddr(addr, iter, sgt)
- **ptes++ = iomap_pte(iomap, addr, pgprot);
+ if (n_pfn > ARRAY_SIZE(stack)) {
+ /* Too big for stack -- allocate temporary array instead */
+ pfns = kvmalloc_array(n_pfn, sizeof(*pfns), GFP_KERNEL);
+ if (!pfns)
+ return NULL;
}
- if (mem != stack)
- kvfree(mem);
-
- return area->addr;
+ i = 0;
+ for_each_sgt_daddr(addr, iter, obj->mm.pages)
+ pfns[i++] = (iomap + addr) >> PAGE_SHIFT;
+ vaddr = vmap_pfn(pfns, n_pfn, pgprot_writecombine(PAGE_KERNEL_IO));
+ if (pfns != stack)
+ kvfree(pfns);
+ return vaddr;
}
/* get, pin, and map the pages of the object into kernel space */
@@ -386,7 +368,13 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
}
if (!ptr) {
- ptr = i915_gem_object_map(obj, type);
+ if (GEM_WARN_ON(type == I915_MAP_WC &&
+ !static_cpu_has(X86_FEATURE_PAT)))
+ ptr = NULL;
+ else if (i915_gem_object_has_struct_page(obj))
+ ptr = i915_gem_object_map_page(obj, type);
+ else
+ ptr = i915_gem_object_map_pfn(obj, type);
if (!ptr) {
err = -ENOMEM;
goto err_unpin;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index 12b30075134a..f2eaed6aca3d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -403,6 +403,7 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
unsigned int max_segment = i915_sg_segment_size();
struct sg_table *st;
unsigned int sg_page_sizes;
+ struct scatterlist *sg;
int ret;
st = kmalloc(sizeof(*st), GFP_KERNEL);
@@ -410,13 +411,12 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
return ERR_PTR(-ENOMEM);
alloc_table:
- ret = __sg_alloc_table_from_pages(st, pvec, num_pages,
- 0, num_pages << PAGE_SHIFT,
- max_segment,
- GFP_KERNEL);
- if (ret) {
+ sg = __sg_alloc_table_from_pages(st, pvec, num_pages, 0,
+ num_pages << PAGE_SHIFT, max_segment,
+ NULL, 0, GFP_KERNEL);
+ if (IS_ERR(sg)) {
kfree(st);
- return ERR_PTR(ret);
+ return ERR_CAST(sg);
}
ret = i915_gem_gtt_prepare_pages(obj, st);
diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c b/drivers/gpu/drm/i915/gt/shmem_utils.c
index 43c7acbdc79d..f011ea42487e 100644
--- a/drivers/gpu/drm/i915/gt/shmem_utils.c
+++ b/drivers/gpu/drm/i915/gt/shmem_utils.c
@@ -49,80 +49,40 @@ struct file *shmem_create_from_object(struct drm_i915_gem_object *obj)
return file;
}
-static size_t shmem_npte(struct file *file)
-{
- return file->f_mapping->host->i_size >> PAGE_SHIFT;
-}
-
-static void __shmem_unpin_map(struct file *file, void *ptr, size_t n_pte)
-{
- unsigned long pfn;
-
- vunmap(ptr);
-
- for (pfn = 0; pfn < n_pte; pfn++) {
- struct page *page;
-
- page = shmem_read_mapping_page_gfp(file->f_mapping, pfn,
- GFP_KERNEL);
- if (!WARN_ON(IS_ERR(page))) {
- put_page(page);
- put_page(page);
- }
- }
-}
-
void *shmem_pin_map(struct file *file)
{
- const size_t n_pte = shmem_npte(file);
- pte_t *stack[32], **ptes, **mem;
- struct vm_struct *area;
- unsigned long pfn;
-
- mem = stack;
- if (n_pte > ARRAY_SIZE(stack)) {
- mem = kvmalloc_array(n_pte, sizeof(*mem), GFP_KERNEL);
- if (!mem)
- return NULL;
- }
+ struct page **pages;
+ size_t n_pages, i;
+ void *vaddr;
- area = alloc_vm_area(n_pte << PAGE_SHIFT, mem);
- if (!area) {
- if (mem != stack)
- kvfree(mem);
+ n_pages = file->f_mapping->host->i_size >> PAGE_SHIFT;
+ pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL);
+ if (!pages)
return NULL;
- }
- ptes = mem;
- for (pfn = 0; pfn < n_pte; pfn++) {
- struct page *page;
-
- page = shmem_read_mapping_page_gfp(file->f_mapping, pfn,
- GFP_KERNEL);
- if (IS_ERR(page))
+ for (i = 0; i < n_pages; i++) {
+ pages[i] = shmem_read_mapping_page_gfp(file->f_mapping, i,
+ GFP_KERNEL);
+ if (IS_ERR(pages[i]))
goto err_page;
-
- **ptes++ = mk_pte(page, PAGE_KERNEL);
}
- if (mem != stack)
- kvfree(mem);
-
+ vaddr = vmap(pages, n_pages, VM_MAP_PUT_PAGES, PAGE_KERNEL);
+ if (!vaddr)
+ goto err_page;
mapping_set_unevictable(file->f_mapping);
- return area->addr;
-
+ return vaddr;
err_page:
- if (mem != stack)
- kvfree(mem);
-
- __shmem_unpin_map(file, area->addr, pfn);
+ while (--i >= 0)
+ put_page(pages[i]);
+ kvfree(pages);
return NULL;
}
void shmem_unpin_map(struct file *file, void *ptr)
{
mapping_clear_unevictable(file->f_mapping);
- __shmem_unpin_map(file, ptr, shmem_npte(file));
+ vfree(ptr);
}
static int __shmem_rw(struct file *file, loff_t off,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
index 7f0310441da1..73116ec70ba5 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
@@ -432,6 +432,7 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt)
int ret = 0;
static size_t sgl_size;
static size_t sgt_size;
+ struct scatterlist *sg;
if (vmw_tt->mapped)
return 0;
@@ -454,13 +455,15 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt)
if (unlikely(ret != 0))
return ret;
- ret = __sg_alloc_table_from_pages
- (&vmw_tt->sgt, vsgt->pages, vsgt->num_pages, 0,
- (unsigned long) vsgt->num_pages << PAGE_SHIFT,
- dma_get_max_seg_size(dev_priv->dev->dev),
- GFP_KERNEL);
- if (unlikely(ret != 0))
+ sg = __sg_alloc_table_from_pages(&vmw_tt->sgt, vsgt->pages,
+ vsgt->num_pages, 0,
+ (unsigned long) vsgt->num_pages << PAGE_SHIFT,
+ dma_get_max_seg_size(dev_priv->dev->dev),
+ NULL, 0, GFP_KERNEL);
+ if (IS_ERR(sg)) {
+ ret = PTR_ERR(sg);
goto out_sg_alloc_fail;
+ }
if (vsgt->num_pages > vmw_tt->sgt.orig_nents) {
uint64_t over_alloc =
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index 32e3bc0aa665..b64d2efbefe7 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -726,7 +726,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn));
ret = add_memory(nid, PFN_PHYS((start_pfn)),
- (HA_CHUNK << PAGE_SHIFT));
+ (HA_CHUNK << PAGE_SHIFT), MEMHP_MERGE_RESOURCE);
if (ret) {
pr_err("hot_add memory failed error is %d\n", ret);
diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c
index 97f2e29265da..1c6b78ad5ade 100644
--- a/drivers/i3c/master.c
+++ b/drivers/i3c/master.c
@@ -1373,7 +1373,9 @@ static int i3c_master_reattach_i3c_dev(struct i3c_dev_desc *dev,
enum i3c_addr_slot_status status;
int ret;
- if (dev->info.dyn_addr != old_dyn_addr) {
+ if (dev->info.dyn_addr != old_dyn_addr &&
+ (!dev->boardinfo ||
+ dev->info.dyn_addr != dev->boardinfo->init_dyn_addr)) {
status = i3c_bus_get_addr_slot_status(&master->bus,
dev->info.dyn_addr);
if (status != I3C_ADDR_SLOT_FREE)
@@ -1432,33 +1434,49 @@ static void i3c_master_detach_i2c_dev(struct i2c_dev_desc *dev)
master->ops->detach_i2c_dev(dev);
}
-static void i3c_master_pre_assign_dyn_addr(struct i3c_dev_desc *dev)
+static int i3c_master_early_i3c_dev_add(struct i3c_master_controller *master,
+ struct i3c_dev_boardinfo *boardinfo)
{
- struct i3c_master_controller *master = i3c_dev_get_master(dev);
+ struct i3c_device_info info = {
+ .static_addr = boardinfo->static_addr,
+ };
+ struct i3c_dev_desc *i3cdev;
int ret;
- if (!dev->boardinfo || !dev->boardinfo->init_dyn_addr ||
- !dev->boardinfo->static_addr)
- return;
+ i3cdev = i3c_master_alloc_i3c_dev(master, &info);
+ if (IS_ERR(i3cdev))
+ return -ENOMEM;
+
+ i3cdev->boardinfo = boardinfo;
+
+ ret = i3c_master_attach_i3c_dev(master, i3cdev);
+ if (ret)
+ goto err_free_dev;
- ret = i3c_master_setdasa_locked(master, dev->info.static_addr,
- dev->boardinfo->init_dyn_addr);
+ ret = i3c_master_setdasa_locked(master, i3cdev->info.static_addr,
+ i3cdev->boardinfo->init_dyn_addr);
if (ret)
- return;
+ goto err_detach_dev;
- dev->info.dyn_addr = dev->boardinfo->init_dyn_addr;
- ret = i3c_master_reattach_i3c_dev(dev, 0);
+ i3cdev->info.dyn_addr = i3cdev->boardinfo->init_dyn_addr;
+ ret = i3c_master_reattach_i3c_dev(i3cdev, 0);
if (ret)
goto err_rstdaa;
- ret = i3c_master_retrieve_dev_info(dev);
+ ret = i3c_master_retrieve_dev_info(i3cdev);
if (ret)
goto err_rstdaa;
- return;
+ return 0;
err_rstdaa:
- i3c_master_rstdaa_locked(master, dev->boardinfo->init_dyn_addr);
+ i3c_master_rstdaa_locked(master, i3cdev->boardinfo->init_dyn_addr);
+err_detach_dev:
+ i3c_master_detach_i3c_dev(i3cdev);
+err_free_dev:
+ i3c_master_free_i3c_dev(i3cdev);
+
+ return ret;
}
static void
@@ -1625,8 +1643,8 @@ static void i3c_master_detach_free_devs(struct i3c_master_controller *master)
* This function is following all initialisation steps described in the I3C
* specification:
*
- * 1. Attach I2C and statically defined I3C devs to the master so that the
- * master can fill its internal device table appropriately
+ * 1. Attach I2C devs to the master so that the master can fill its internal
+ * device table appropriately
*
* 2. Call &i3c_master_controller_ops->bus_init() method to initialize
* the master controller. That's usually where the bus mode is selected
@@ -1638,8 +1656,10 @@ static void i3c_master_detach_free_devs(struct i3c_master_controller *master)
*
* 4. Disable all slave events.
*
- * 5. Pre-assign dynamic addresses requested by the FW with SETDASA for I3C
- * devices that have a static address
+ * 5. Reserve address slots for I3C devices with init_dyn_addr. And if devices
+ * also have static_addr, try to pre-assign dynamic addresses requested by
+ * the FW with SETDASA and attach corresponding statically defined I3C
+ * devices to the master.
*
* 6. Do a DAA (Dynamic Address Assignment) to assign dynamic addresses to all
* remaining I3C devices
@@ -1653,7 +1673,6 @@ static int i3c_master_bus_init(struct i3c_master_controller *master)
enum i3c_addr_slot_status status;
struct i2c_dev_boardinfo *i2cboardinfo;
struct i3c_dev_boardinfo *i3cboardinfo;
- struct i3c_dev_desc *i3cdev;
struct i2c_dev_desc *i2cdev;
int ret;
@@ -1685,34 +1704,6 @@ static int i3c_master_bus_init(struct i3c_master_controller *master)
goto err_detach_devs;
}
}
- list_for_each_entry(i3cboardinfo, &master->boardinfo.i3c, node) {
- struct i3c_device_info info = {
- .static_addr = i3cboardinfo->static_addr,
- };
-
- if (i3cboardinfo->init_dyn_addr) {
- status = i3c_bus_get_addr_slot_status(&master->bus,
- i3cboardinfo->init_dyn_addr);
- if (status != I3C_ADDR_SLOT_FREE) {
- ret = -EBUSY;
- goto err_detach_devs;
- }
- }
-
- i3cdev = i3c_master_alloc_i3c_dev(master, &info);
- if (IS_ERR(i3cdev)) {
- ret = PTR_ERR(i3cdev);
- goto err_detach_devs;
- }
-
- i3cdev->boardinfo = i3cboardinfo;
-
- ret = i3c_master_attach_i3c_dev(master, i3cdev);
- if (ret) {
- i3c_master_free_i3c_dev(i3cdev);
- goto err_detach_devs;
- }
- }
/*
* Now execute the controller specific ->bus_init() routine, which
@@ -1749,11 +1740,43 @@ static int i3c_master_bus_init(struct i3c_master_controller *master)
goto err_bus_cleanup;
/*
- * Pre-assign dynamic address and retrieve device information if
- * needed.
+ * Reserve init_dyn_addr first, and then try to pre-assign dynamic
+ * address and retrieve device information if needed.
+ * In case pre-assign dynamic address fails, setting dynamic address to
+ * the requested init_dyn_addr is retried after DAA is done in
+ * i3c_master_add_i3c_dev_locked().
*/
- i3c_bus_for_each_i3cdev(&master->bus, i3cdev)
- i3c_master_pre_assign_dyn_addr(i3cdev);
+ list_for_each_entry(i3cboardinfo, &master->boardinfo.i3c, node) {
+
+ /*
+ * We don't reserve a dynamic address for devices that
+ * don't explicitly request one.
+ */
+ if (!i3cboardinfo->init_dyn_addr)
+ continue;
+
+ ret = i3c_bus_get_addr_slot_status(&master->bus,
+ i3cboardinfo->init_dyn_addr);
+ if (ret != I3C_ADDR_SLOT_FREE) {
+ ret = -EBUSY;
+ goto err_rstdaa;
+ }
+
+ i3c_bus_set_addr_slot_status(&master->bus,
+ i3cboardinfo->init_dyn_addr,
+ I3C_ADDR_SLOT_I3C_DEV);
+
+ /*
+ * Only try to create/attach devices that have a static
+ * address. Other devices will be created/attached when
+ * DAA happens, and the requested dynamic address will
+ * be set using SETNEWDA once those devices become
+ * addressable.
+ */
+
+ if (i3cboardinfo->static_addr)
+ i3c_master_early_i3c_dev_add(master, i3cboardinfo);
+ }
ret = i3c_master_do_daa(master);
if (ret)
@@ -1782,6 +1805,21 @@ static void i3c_master_bus_cleanup(struct i3c_master_controller *master)
i3c_master_detach_free_devs(master);
}
+static void i3c_master_attach_boardinfo(struct i3c_dev_desc *i3cdev)
+{
+ struct i3c_master_controller *master = i3cdev->common.master;
+ struct i3c_dev_boardinfo *i3cboardinfo;
+
+ list_for_each_entry(i3cboardinfo, &master->boardinfo.i3c, node) {
+ if (i3cdev->info.pid != i3cboardinfo->pid)
+ continue;
+
+ i3cdev->boardinfo = i3cboardinfo;
+ i3cdev->info.static_addr = i3cboardinfo->static_addr;
+ return;
+ }
+}
+
static struct i3c_dev_desc *
i3c_master_search_i3c_dev_duplicate(struct i3c_dev_desc *refdev)
{
@@ -1837,10 +1875,10 @@ int i3c_master_add_i3c_dev_locked(struct i3c_master_controller *master,
if (ret)
goto err_detach_dev;
+ i3c_master_attach_boardinfo(newdev);
+
olddev = i3c_master_search_i3c_dev_duplicate(newdev);
if (olddev) {
- newdev->boardinfo = olddev->boardinfo;
- newdev->info.static_addr = olddev->info.static_addr;
newdev->dev = olddev->dev;
if (newdev->dev)
newdev->dev->desc = newdev;
diff --git a/drivers/i3c/master/i3c-master-cdns.c b/drivers/i3c/master/i3c-master-cdns.c
index 3fee8bd7fe20..3f2226928fe0 100644
--- a/drivers/i3c/master/i3c-master-cdns.c
+++ b/drivers/i3c/master/i3c-master-cdns.c
@@ -1635,8 +1635,10 @@ static int cdns_i3c_master_probe(struct platform_device *pdev)
master->ibi.slots = devm_kcalloc(&pdev->dev, master->ibi.num_slots,
sizeof(*master->ibi.slots),
GFP_KERNEL);
- if (!master->ibi.slots)
+ if (!master->ibi.slots) {
+ ret = -ENOMEM;
goto err_disable_sysclk;
+ }
writel(IBIR_THR(1), master->regs + CMD_IBI_THR_CTRL);
writel(MST_INT_IBIR_THR, master->regs + MST_IER);
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 91b023341b77..32a51432ec4f 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -48,6 +48,7 @@ config INFINIBAND_ON_DEMAND_PAGING
depends on INFINIBAND_USER_MEM
select MMU_NOTIFIER
select INTERVAL_TREE
+ select HMM_MIRROR
default y
help
On demand paging support for the InfiniBand subsystem.
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 24cb71a16a28..ccf2670ef45e 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -17,7 +17,7 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o
ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o
-ib_cm-y := cm.o
+ib_cm-y := cm.o cm_trace.o
iw_cm-y := iwcm.o iwpm_util.o iwpm_msg.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 3a98439bba83..0abce004a959 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -647,13 +647,12 @@ static void process_one_req(struct work_struct *_work)
req->callback = NULL;
spin_lock_bh(&lock);
+ /*
+ * Although the work will normally have been canceled by the workqueue,
+ * it can still be requeued as long as it is on the req_list.
+ */
+ cancel_delayed_work(&req->work);
if (!list_empty(&req->list)) {
- /*
- * Although the work will normally have been canceled by the
- * workqueue, it can still be requeued as long as it is on the
- * req_list.
- */
- cancel_delayed_work(&req->work);
list_del_init(&req->list);
kfree(req);
}
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 5a76611e684a..8017c40dd110 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -133,7 +133,11 @@ static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
}
static const char * const gid_type_str[] = {
+ /* IB/RoCE v1 value is set for IB_GID_TYPE_IB and IB_GID_TYPE_ROCE for
+ * user space compatibility reasons.
+ */
[IB_GID_TYPE_IB] = "IB/RoCE v1",
+ [IB_GID_TYPE_ROCE] = "IB/RoCE v1",
[IB_GID_TYPE_ROCE_UDP_ENCAP] = "RoCE v2",
};
@@ -1220,7 +1224,7 @@ EXPORT_SYMBOL(ib_get_cached_port_state);
const struct ib_gid_attr *
rdma_get_gid_attr(struct ib_device *device, u8 port_num, int index)
{
- const struct ib_gid_attr *attr = ERR_PTR(-EINVAL);
+ const struct ib_gid_attr *attr = ERR_PTR(-ENODATA);
struct ib_gid_table *table;
unsigned long flags;
@@ -1244,6 +1248,67 @@ done:
EXPORT_SYMBOL(rdma_get_gid_attr);
/**
+ * rdma_query_gid_table - Reads GID table entries of all the ports of a device up to max_entries.
+ * @device: The device to query.
+ * @entries: Entries where GID entries are returned.
+ * @max_entries: Maximum number of entries that can be returned.
+ * Entries array must be allocated to hold max_entries number of entries.
+ * @num_entries: Updated to the number of entries that were successfully read.
+ *
+ * Returns number of entries on success or appropriate error code.
+ */
+ssize_t rdma_query_gid_table(struct ib_device *device,
+ struct ib_uverbs_gid_entry *entries,
+ size_t max_entries)
+{
+ const struct ib_gid_attr *gid_attr;
+ ssize_t num_entries = 0, ret;
+ struct ib_gid_table *table;
+ unsigned int port_num, i;
+ struct net_device *ndev;
+ unsigned long flags;
+
+ rdma_for_each_port(device, port_num) {
+ if (!rdma_ib_or_roce(device, port_num))
+ continue;
+
+ table = rdma_gid_table(device, port_num);
+ read_lock_irqsave(&table->rwlock, flags);
+ for (i = 0; i < table->sz; i++) {
+ if (!is_gid_entry_valid(table->data_vec[i]))
+ continue;
+ if (num_entries >= max_entries) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ gid_attr = &table->data_vec[i]->attr;
+
+ memcpy(&entries->gid, &gid_attr->gid,
+ sizeof(gid_attr->gid));
+ entries->gid_index = gid_attr->index;
+ entries->port_num = gid_attr->port_num;
+ entries->gid_type = gid_attr->gid_type;
+ ndev = rcu_dereference_protected(
+ gid_attr->ndev,
+ lockdep_is_held(&table->rwlock));
+ if (ndev)
+ entries->netdev_ifindex = ndev->ifindex;
+
+ num_entries++;
+ entries++;
+ }
+ read_unlock_irqrestore(&table->rwlock, flags);
+ }
+
+ return num_entries;
+err:
+ read_unlock_irqrestore(&table->rwlock, flags);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_query_gid_table);
+
+/**
* rdma_put_gid_attr - Release reference to the GID attribute
* @attr: Pointer to the GID attribute whose reference
* needs to be released.
@@ -1299,7 +1364,7 @@ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr)
struct ib_gid_table_entry *entry =
container_of(attr, struct ib_gid_table_entry, attr);
struct ib_device *device = entry->attr.device;
- struct net_device *ndev = ERR_PTR(-ENODEV);
+ struct net_device *ndev = ERR_PTR(-EINVAL);
u8 port_num = entry->attr.port_num;
struct ib_gid_table *table;
unsigned long flags;
@@ -1311,8 +1376,7 @@ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr)
valid = is_gid_entry_valid(table->data_vec[attr->index]);
if (valid) {
ndev = rcu_dereference(attr->ndev);
- if (!ndev ||
- (ndev && ((READ_ONCE(ndev->flags) & IFF_UP) == 0)))
+ if (!ndev)
ndev = ERR_PTR(-ENODEV);
}
read_unlock_irqrestore(&table->rwlock, flags);
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index fbc28f1a8b92..5740d1ba3568 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -27,6 +27,7 @@
#include <rdma/ib_cm.h>
#include "cm_msgs.h"
#include "core_priv.h"
+#include "cm_trace.h"
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("InfiniBand CM");
@@ -201,7 +202,6 @@ static struct attribute *cm_counter_default_attrs[] = {
struct cm_port {
struct cm_device *cm_dev;
struct ib_mad_agent *mad_agent;
- struct kobject port_obj;
u8 port_num;
struct list_head cm_priv_prim_list;
struct list_head cm_priv_altr_list;
@@ -1563,6 +1563,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
cm_id_priv->local_qpn = cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg));
+ trace_icm_send_req(&cm_id_priv->id);
spin_lock_irqsave(&cm_id_priv->lock, flags);
ret = ib_post_send_mad(cm_id_priv->msg, NULL);
if (ret) {
@@ -1610,6 +1611,9 @@ static int cm_issue_rej(struct cm_port *port,
IBA_SET_MEM(CM_REJ_ARI, rej_msg, ari, ari_length);
}
+ trace_icm_issue_rej(
+ IBA_GET(CM_REJ_LOCAL_COMM_ID, rcv_msg),
+ IBA_GET(CM_REJ_REMOTE_COMM_ID, rcv_msg));
ret = ib_post_send_mad(msg, NULL);
if (ret)
cm_free_msg(msg);
@@ -1961,6 +1965,7 @@ static void cm_dup_req_handler(struct cm_work *work,
}
spin_unlock_irq(&cm_id_priv->lock);
+ trace_icm_send_dup_req(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret)
goto free;
@@ -2124,8 +2129,7 @@ static int cm_req_handler(struct cm_work *work)
listen_cm_id_priv = cm_match_req(work, cm_id_priv);
if (!listen_cm_id_priv) {
- pr_debug("%s: local_id %d, no listen_cm_id_priv\n", __func__,
- be32_to_cpu(cm_id_priv->id.local_id));
+ trace_icm_no_listener_err(&cm_id_priv->id);
cm_id_priv->id.state = IB_CM_IDLE;
ret = -EINVAL;
goto destroy;
@@ -2274,8 +2278,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state != IB_CM_REQ_RCVD &&
cm_id->state != IB_CM_MRA_REQ_SENT) {
- pr_debug("%s: local_comm_id %d, cm_id->state: %d\n", __func__,
- be32_to_cpu(cm_id_priv->id.local_id), cm_id->state);
+ trace_icm_send_rep_err(cm_id_priv->id.local_id, cm_id->state);
ret = -EINVAL;
goto out;
}
@@ -2289,6 +2292,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
msg->timeout_ms = cm_id_priv->timeout_ms;
msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
+ trace_icm_send_rep(cm_id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -2348,8 +2352,7 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id,
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state != IB_CM_REP_RCVD &&
cm_id->state != IB_CM_MRA_REP_SENT) {
- pr_debug("%s: local_id %d, cm_id->state %d\n", __func__,
- be32_to_cpu(cm_id->local_id), cm_id->state);
+ trace_icm_send_cm_rtu_err(cm_id);
ret = -EINVAL;
goto error;
}
@@ -2361,6 +2364,7 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id,
cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
private_data, private_data_len);
+ trace_icm_send_rtu(cm_id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -2442,6 +2446,7 @@ static void cm_dup_rep_handler(struct cm_work *work)
goto unlock;
spin_unlock_irq(&cm_id_priv->lock);
+ trace_icm_send_dup_rep(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret)
goto free;
@@ -2465,7 +2470,7 @@ static int cm_rep_handler(struct cm_work *work)
cpu_to_be32(IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)), 0);
if (!cm_id_priv) {
cm_dup_rep_handler(work);
- pr_debug("%s: remote_comm_id %d, no cm_id_priv\n", __func__,
+ trace_icm_remote_no_priv_err(
IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
return -EINVAL;
}
@@ -2479,11 +2484,10 @@ static int cm_rep_handler(struct cm_work *work)
break;
default:
ret = -EINVAL;
- pr_debug(
- "%s: cm_id_priv->id.state: %d, local_comm_id %d, remote_comm_id %d\n",
- __func__, cm_id_priv->id.state,
+ trace_icm_rep_unknown_err(
IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
- IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
+ IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg),
+ cm_id_priv->id.state);
spin_unlock_irq(&cm_id_priv->lock);
goto error;
}
@@ -2500,7 +2504,7 @@ static int cm_rep_handler(struct cm_work *work)
spin_unlock(&cm.lock);
spin_unlock_irq(&cm_id_priv->lock);
ret = -EINVAL;
- pr_debug("%s: Failed to insert remote id %d\n", __func__,
+ trace_icm_insert_failed_err(
IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
goto error;
}
@@ -2517,9 +2521,8 @@ static int cm_rep_handler(struct cm_work *work)
IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
NULL, 0);
ret = -EINVAL;
- pr_debug(
- "%s: Stale connection. local_comm_id %d, remote_comm_id %d\n",
- __func__, IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
+ trace_icm_staleconn_err(
+ IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
if (cur_cm_id_priv) {
@@ -2646,9 +2649,7 @@ static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv,
return -EINVAL;
if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
- pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
- be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_dreq_skipped(&cm_id_priv->id);
return -EINVAL;
}
@@ -2667,6 +2668,7 @@ static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv,
msg->timeout_ms = cm_id_priv->timeout_ms;
msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
+ trace_icm_send_dreq(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
cm_enter_timewait(cm_id_priv);
@@ -2722,10 +2724,7 @@ static int cm_send_drep_locked(struct cm_id_private *cm_id_priv,
return -EINVAL;
if (cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
- pr_debug(
- "%s: local_id %d, cm_idcm_id->state(%d) != IB_CM_DREQ_RCVD\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_send_drep_err(&cm_id_priv->id);
kfree(private_data);
return -EINVAL;
}
@@ -2740,6 +2739,7 @@ static int cm_send_drep_locked(struct cm_id_private *cm_id_priv,
cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
private_data, private_data_len);
+ trace_icm_send_drep(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
cm_free_msg(msg);
@@ -2789,6 +2789,9 @@ static int cm_issue_drep(struct cm_port *port,
IBA_SET(CM_DREP_LOCAL_COMM_ID, drep_msg,
IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
+ trace_icm_issue_drep(
+ IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg),
+ IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
ret = ib_post_send_mad(msg, NULL);
if (ret)
cm_free_msg(msg);
@@ -2810,9 +2813,8 @@ static int cm_dreq_handler(struct cm_work *work)
atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
counter[CM_DREQ_COUNTER]);
cm_issue_drep(work->port, work->mad_recv_wc);
- pr_debug(
- "%s: no cm_id_priv, local_comm_id %d, remote_comm_id %d\n",
- __func__, IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg),
+ trace_icm_no_priv_err(
+ IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg),
IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
return -EINVAL;
}
@@ -2858,9 +2860,7 @@ static int cm_dreq_handler(struct cm_work *work)
counter[CM_DREQ_COUNTER]);
goto unlock;
default:
- pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_dreq_unknown_err(&cm_id_priv->id);
goto unlock;
}
cm_id_priv->id.state = IB_CM_DREQ_RCVD;
@@ -2945,12 +2945,11 @@ static int cm_send_rej_locked(struct cm_id_private *cm_id_priv,
state);
break;
default:
- pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
- be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_send_unknown_rej_err(&cm_id_priv->id);
return -EINVAL;
}
+ trace_icm_send_rej(&cm_id_priv->id, reason);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
cm_free_msg(msg);
@@ -3060,9 +3059,7 @@ static int cm_rej_handler(struct cm_work *work)
}
fallthrough;
default:
- pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_rej_unknown_err(&cm_id_priv->id);
spin_unlock_irq(&cm_id_priv->lock);
goto out;
}
@@ -3118,9 +3115,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
}
fallthrough;
default:
- pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_send_mra_unknown_err(&cm_id_priv->id);
ret = -EINVAL;
goto error1;
}
@@ -3133,6 +3128,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
msg_response, service_timeout,
private_data, private_data_len);
+ trace_icm_send_mra(cm_id);
ret = ib_post_send_mad(msg, NULL);
if (ret)
goto error2;
@@ -3229,9 +3225,7 @@ static int cm_mra_handler(struct cm_work *work)
counter[CM_MRA_COUNTER]);
fallthrough;
default:
- pr_debug("%s local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_mra_unknown_err(&cm_id_priv->id);
goto out;
}
@@ -3505,10 +3499,12 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id->state == IB_CM_IDLE)
+ if (cm_id->state == IB_CM_IDLE) {
+ trace_icm_send_sidr_req(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
- else
+ } else {
ret = -EINVAL;
+ }
if (ret) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -3670,6 +3666,7 @@ static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv,
cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
param);
+ trace_icm_send_sidr_rep(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
cm_free_msg(msg);
@@ -3767,8 +3764,7 @@ static void cm_process_send_error(struct ib_mad_send_buf *msg,
if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
goto discard;
- pr_debug_ratelimited("CM: failed sending MAD in state %d. (%s)\n",
- state, ib_wc_status_msg(wc_status));
+ trace_icm_mad_send_err(state, wc_status);
switch (state) {
case IB_CM_REQ_SENT:
case IB_CM_MRA_REQ_RCVD:
@@ -3891,7 +3887,7 @@ static void cm_work_handler(struct work_struct *_work)
ret = cm_timewait_handler(work);
break;
default:
- pr_debug("cm_event.event: 0x%x\n", work->cm_event.event);
+ trace_icm_handler_err(work->cm_event.event);
ret = -EINVAL;
break;
}
@@ -3927,8 +3923,7 @@ static int cm_establish(struct ib_cm_id *cm_id)
ret = -EISCONN;
break;
default:
- pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
- be32_to_cpu(cm_id->local_id), cm_id->state);
+ trace_icm_establish_err(cm_id);
ret = -EINVAL;
break;
}
@@ -4125,9 +4120,7 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
ret = 0;
break;
default:
- pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_qp_init_err(&cm_id_priv->id);
ret = -EINVAL;
break;
}
@@ -4175,9 +4168,7 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
ret = 0;
break;
default:
- pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_qp_rtr_err(&cm_id_priv->id);
ret = -EINVAL;
break;
}
@@ -4237,9 +4228,7 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
ret = 0;
break;
default:
- pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_qp_rts_err(&cm_id_priv->id);
ret = -EINVAL;
break;
}
@@ -4295,20 +4284,6 @@ static struct kobj_type cm_counter_obj_type = {
.default_attrs = cm_counter_default_attrs
};
-static char *cm_devnode(struct device *dev, umode_t *mode)
-{
- if (mode)
- *mode = 0666;
- return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
-}
-
-struct class cm_class = {
- .owner = THIS_MODULE,
- .name = "infiniband_cm",
- .devnode = cm_devnode,
-};
-EXPORT_SYMBOL(cm_class);
-
static int cm_create_port_fs(struct cm_port *port)
{
int i, ret;
@@ -4511,12 +4486,6 @@ static int __init ib_cm_init(void)
get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
INIT_LIST_HEAD(&cm.timewait_list);
- ret = class_register(&cm_class);
- if (ret) {
- ret = -ENOMEM;
- goto error1;
- }
-
cm.wq = alloc_workqueue("ib_cm", 0, 1);
if (!cm.wq) {
ret = -ENOMEM;
@@ -4531,8 +4500,6 @@ static int __init ib_cm_init(void)
error3:
destroy_workqueue(cm.wq);
error2:
- class_unregister(&cm_class);
-error1:
return ret;
}
@@ -4553,7 +4520,6 @@ static void __exit ib_cm_cleanup(void)
kfree(timewait_info);
}
- class_unregister(&cm_class);
WARN_ON(!xa_empty(&cm.local_id_table));
}
diff --git a/drivers/infiniband/core/cm_trace.c b/drivers/infiniband/core/cm_trace.c
new file mode 100644
index 000000000000..8f3482f66338
--- /dev/null
+++ b/drivers/infiniband/core/cm_trace.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Trace points for the IB Connection Manager.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2020, Oracle and/or its affiliates.
+ */
+
+#include <rdma/rdma_cm.h>
+#include "cma_priv.h"
+
+#define CREATE_TRACE_POINTS
+
+#include "cm_trace.h"
diff --git a/drivers/infiniband/core/cm_trace.h b/drivers/infiniband/core/cm_trace.h
new file mode 100644
index 000000000000..e9d282679ef1
--- /dev/null
+++ b/drivers/infiniband/core/cm_trace.h
@@ -0,0 +1,414 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Trace point definitions for the RDMA Connect Manager.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2020 Oracle and/or its affiliates.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM ib_cma
+
+#if !defined(_TRACE_IB_CMA_H) || defined(TRACE_HEADER_MULTI_READ)
+
+#define _TRACE_IB_CMA_H
+
+#include <linux/tracepoint.h>
+#include <rdma/ib_cm.h>
+#include <trace/events/rdma.h>
+
+/*
+ * enum ib_cm_state, from include/rdma/ib_cm.h
+ */
+#define IB_CM_STATE_LIST \
+ ib_cm_state(IDLE) \
+ ib_cm_state(LISTEN) \
+ ib_cm_state(REQ_SENT) \
+ ib_cm_state(REQ_RCVD) \
+ ib_cm_state(MRA_REQ_SENT) \
+ ib_cm_state(MRA_REQ_RCVD) \
+ ib_cm_state(REP_SENT) \
+ ib_cm_state(REP_RCVD) \
+ ib_cm_state(MRA_REP_SENT) \
+ ib_cm_state(MRA_REP_RCVD) \
+ ib_cm_state(ESTABLISHED) \
+ ib_cm_state(DREQ_SENT) \
+ ib_cm_state(DREQ_RCVD) \
+ ib_cm_state(TIMEWAIT) \
+ ib_cm_state(SIDR_REQ_SENT) \
+ ib_cm_state_end(SIDR_REQ_RCVD)
+
+#undef ib_cm_state
+#undef ib_cm_state_end
+#define ib_cm_state(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+#define ib_cm_state_end(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+
+IB_CM_STATE_LIST
+
+#undef ib_cm_state
+#undef ib_cm_state_end
+#define ib_cm_state(x) { IB_CM_##x, #x },
+#define ib_cm_state_end(x) { IB_CM_##x, #x }
+
+#define show_ib_cm_state(x) \
+ __print_symbolic(x, IB_CM_STATE_LIST)
+
+/*
+ * enum ib_cm_lap_state, from include/rdma/ib_cm.h
+ */
+#define IB_CM_LAP_STATE_LIST \
+ ib_cm_lap_state(LAP_UNINIT) \
+ ib_cm_lap_state(LAP_IDLE) \
+ ib_cm_lap_state(LAP_SENT) \
+ ib_cm_lap_state(LAP_RCVD) \
+ ib_cm_lap_state(MRA_LAP_SENT) \
+ ib_cm_lap_state_end(MRA_LAP_RCVD)
+
+#undef ib_cm_lap_state
+#undef ib_cm_lap_state_end
+#define ib_cm_lap_state(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+#define ib_cm_lap_state_end(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+
+IB_CM_LAP_STATE_LIST
+
+#undef ib_cm_lap_state
+#undef ib_cm_lap_state_end
+#define ib_cm_lap_state(x) { IB_CM_##x, #x },
+#define ib_cm_lap_state_end(x) { IB_CM_##x, #x }
+
+#define show_ib_cm_lap_state(x) \
+ __print_symbolic(x, IB_CM_LAP_STATE_LIST)
+
+/*
+ * enum ib_cm_rej_reason, from include/rdma/ib_cm.h
+ */
+#define IB_CM_REJ_REASON_LIST \
+ ib_cm_rej_reason(REJ_NO_QP) \
+ ib_cm_rej_reason(REJ_NO_EEC) \
+ ib_cm_rej_reason(REJ_NO_RESOURCES) \
+ ib_cm_rej_reason(REJ_TIMEOUT) \
+ ib_cm_rej_reason(REJ_UNSUPPORTED) \
+ ib_cm_rej_reason(REJ_INVALID_COMM_ID) \
+ ib_cm_rej_reason(REJ_INVALID_COMM_INSTANCE) \
+ ib_cm_rej_reason(REJ_INVALID_SERVICE_ID) \
+ ib_cm_rej_reason(REJ_INVALID_TRANSPORT_TYPE) \
+ ib_cm_rej_reason(REJ_STALE_CONN) \
+ ib_cm_rej_reason(REJ_RDC_NOT_EXIST) \
+ ib_cm_rej_reason(REJ_INVALID_GID) \
+ ib_cm_rej_reason(REJ_INVALID_LID) \
+ ib_cm_rej_reason(REJ_INVALID_SL) \
+ ib_cm_rej_reason(REJ_INVALID_TRAFFIC_CLASS) \
+ ib_cm_rej_reason(REJ_INVALID_HOP_LIMIT) \
+ ib_cm_rej_reason(REJ_INVALID_PACKET_RATE) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_GID) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_LID) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_SL) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_TRAFFIC_CLASS) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_HOP_LIMIT) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_PACKET_RATE) \
+ ib_cm_rej_reason(REJ_PORT_CM_REDIRECT) \
+ ib_cm_rej_reason(REJ_PORT_REDIRECT) \
+ ib_cm_rej_reason(REJ_INVALID_MTU) \
+ ib_cm_rej_reason(REJ_INSUFFICIENT_RESP_RESOURCES) \
+ ib_cm_rej_reason(REJ_CONSUMER_DEFINED) \
+ ib_cm_rej_reason(REJ_INVALID_RNR_RETRY) \
+ ib_cm_rej_reason(REJ_DUPLICATE_LOCAL_COMM_ID) \
+ ib_cm_rej_reason(REJ_INVALID_CLASS_VERSION) \
+ ib_cm_rej_reason(REJ_INVALID_FLOW_LABEL) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_FLOW_LABEL) \
+ ib_cm_rej_reason_end(REJ_VENDOR_OPTION_NOT_SUPPORTED)
+
+#undef ib_cm_rej_reason
+#undef ib_cm_rej_reason_end
+#define ib_cm_rej_reason(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+#define ib_cm_rej_reason_end(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+
+IB_CM_REJ_REASON_LIST
+
+#undef ib_cm_rej_reason
+#undef ib_cm_rej_reason_end
+#define ib_cm_rej_reason(x) { IB_CM_##x, #x },
+#define ib_cm_rej_reason_end(x) { IB_CM_##x, #x }
+
+#define show_ib_cm_rej_reason(x) \
+ __print_symbolic(x, IB_CM_REJ_REASON_LIST)
+
+DECLARE_EVENT_CLASS(icm_id_class,
+ TP_PROTO(
+ const struct ib_cm_id *cm_id
+ ),
+
+ TP_ARGS(cm_id),
+
+ TP_STRUCT__entry(
+ __field(const void *, cm_id) /* for eBPF scripts */
+ __field(unsigned int, local_id)
+ __field(unsigned int, remote_id)
+ __field(unsigned long, state)
+ __field(unsigned long, lap_state)
+ ),
+
+ TP_fast_assign(
+ __entry->cm_id = cm_id;
+ __entry->local_id = be32_to_cpu(cm_id->local_id);
+ __entry->remote_id = be32_to_cpu(cm_id->remote_id);
+ __entry->state = cm_id->state;
+ __entry->lap_state = cm_id->lap_state;
+ ),
+
+ TP_printk("local_id=%u remote_id=%u state=%s lap_state=%s",
+ __entry->local_id, __entry->remote_id,
+ show_ib_cm_state(__entry->state),
+ show_ib_cm_lap_state(__entry->lap_state)
+ )
+);
+
+#define DEFINE_CM_SEND_EVENT(name) \
+ DEFINE_EVENT(icm_id_class, \
+ icm_send_##name, \
+ TP_PROTO( \
+ const struct ib_cm_id *cm_id \
+ ), \
+ TP_ARGS(cm_id))
+
+DEFINE_CM_SEND_EVENT(req);
+DEFINE_CM_SEND_EVENT(rep);
+DEFINE_CM_SEND_EVENT(dup_req);
+DEFINE_CM_SEND_EVENT(dup_rep);
+DEFINE_CM_SEND_EVENT(rtu);
+DEFINE_CM_SEND_EVENT(mra);
+DEFINE_CM_SEND_EVENT(sidr_req);
+DEFINE_CM_SEND_EVENT(sidr_rep);
+DEFINE_CM_SEND_EVENT(dreq);
+DEFINE_CM_SEND_EVENT(drep);
+
+TRACE_EVENT(icm_send_rej,
+ TP_PROTO(
+ const struct ib_cm_id *cm_id,
+ enum ib_cm_rej_reason reason
+ ),
+
+ TP_ARGS(cm_id, reason),
+
+ TP_STRUCT__entry(
+ __field(const void *, cm_id)
+ __field(u32, local_id)
+ __field(u32, remote_id)
+ __field(unsigned long, state)
+ __field(unsigned long, reason)
+ ),
+
+ TP_fast_assign(
+ __entry->cm_id = cm_id;
+ __entry->local_id = be32_to_cpu(cm_id->local_id);
+ __entry->remote_id = be32_to_cpu(cm_id->remote_id);
+ __entry->state = cm_id->state;
+ __entry->reason = reason;
+ ),
+
+ TP_printk("local_id=%u remote_id=%u state=%s reason=%s",
+ __entry->local_id, __entry->remote_id,
+ show_ib_cm_state(__entry->state),
+ show_ib_cm_rej_reason(__entry->reason)
+ )
+);
+
+#define DEFINE_CM_ERR_EVENT(name) \
+ DEFINE_EVENT(icm_id_class, \
+ icm_##name##_err, \
+ TP_PROTO( \
+ const struct ib_cm_id *cm_id \
+ ), \
+ TP_ARGS(cm_id))
+
+DEFINE_CM_ERR_EVENT(send_cm_rtu);
+DEFINE_CM_ERR_EVENT(establish);
+DEFINE_CM_ERR_EVENT(no_listener);
+DEFINE_CM_ERR_EVENT(send_drep);
+DEFINE_CM_ERR_EVENT(dreq_unknown);
+DEFINE_CM_ERR_EVENT(send_unknown_rej);
+DEFINE_CM_ERR_EVENT(rej_unknown);
+DEFINE_CM_ERR_EVENT(send_mra_unknown);
+DEFINE_CM_ERR_EVENT(mra_unknown);
+DEFINE_CM_ERR_EVENT(qp_init);
+DEFINE_CM_ERR_EVENT(qp_rtr);
+DEFINE_CM_ERR_EVENT(qp_rts);
+
+DEFINE_EVENT(icm_id_class, \
+ icm_dreq_skipped, \
+ TP_PROTO( \
+ const struct ib_cm_id *cm_id \
+ ), \
+ TP_ARGS(cm_id) \
+);
+
+DECLARE_EVENT_CLASS(icm_local_class,
+ TP_PROTO(
+ unsigned int local_id,
+ unsigned int remote_id
+ ),
+
+ TP_ARGS(local_id, remote_id),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, local_id)
+ __field(unsigned int, remote_id)
+ ),
+
+ TP_fast_assign(
+ __entry->local_id = local_id;
+ __entry->remote_id = remote_id;
+ ),
+
+ TP_printk("local_id=%u remote_id=%u",
+ __entry->local_id, __entry->remote_id
+ )
+);
+
+#define DEFINE_CM_LOCAL_EVENT(name) \
+ DEFINE_EVENT(icm_local_class, \
+ icm_##name, \
+ TP_PROTO( \
+ unsigned int local_id, \
+ unsigned int remote_id \
+ ), \
+ TP_ARGS(local_id, remote_id))
+
+DEFINE_CM_LOCAL_EVENT(issue_rej);
+DEFINE_CM_LOCAL_EVENT(issue_drep);
+DEFINE_CM_LOCAL_EVENT(staleconn_err);
+DEFINE_CM_LOCAL_EVENT(no_priv_err);
+
+DECLARE_EVENT_CLASS(icm_remote_class,
+ TP_PROTO(
+ u32 remote_id
+ ),
+
+ TP_ARGS(remote_id),
+
+ TP_STRUCT__entry(
+ __field(u32, remote_id)
+ ),
+
+ TP_fast_assign(
+ __entry->remote_id = remote_id;
+ ),
+
+ TP_printk("remote_id=%u",
+ __entry->remote_id
+ )
+);
+
+#define DEFINE_CM_REMOTE_EVENT(name) \
+ DEFINE_EVENT(icm_remote_class, \
+ icm_##name, \
+ TP_PROTO( \
+ u32 remote_id \
+ ), \
+ TP_ARGS(remote_id))
+
+DEFINE_CM_REMOTE_EVENT(remote_no_priv_err);
+DEFINE_CM_REMOTE_EVENT(insert_failed_err);
+
+TRACE_EVENT(icm_send_rep_err,
+ TP_PROTO(
+ __be32 local_id,
+ enum ib_cm_state state
+ ),
+
+ TP_ARGS(local_id, state),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, local_id)
+ __field(unsigned long, state)
+ ),
+
+ TP_fast_assign(
+ __entry->local_id = be32_to_cpu(local_id);
+ __entry->state = state;
+ ),
+
+ TP_printk("local_id=%u state=%s",
+ __entry->local_id, show_ib_cm_state(__entry->state)
+ )
+);
+
+TRACE_EVENT(icm_rep_unknown_err,
+ TP_PROTO(
+ unsigned int local_id,
+ unsigned int remote_id,
+ enum ib_cm_state state
+ ),
+
+ TP_ARGS(local_id, remote_id, state),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, local_id)
+ __field(unsigned int, remote_id)
+ __field(unsigned long, state)
+ ),
+
+ TP_fast_assign(
+ __entry->local_id = local_id;
+ __entry->remote_id = remote_id;
+ __entry->state = state;
+ ),
+
+ TP_printk("local_id=%u remote_id=%u state=%s",
+ __entry->local_id, __entry->remote_id,
+ show_ib_cm_state(__entry->state)
+ )
+);
+
+TRACE_EVENT(icm_handler_err,
+ TP_PROTO(
+ enum ib_cm_event_type event
+ ),
+
+ TP_ARGS(event),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, event)
+ ),
+
+ TP_fast_assign(
+ __entry->event = event;
+ ),
+
+ TP_printk("unhandled event=%s",
+ rdma_show_ib_cm_event(__entry->event)
+ )
+);
+
+TRACE_EVENT(icm_mad_send_err,
+ TP_PROTO(
+ enum ib_cm_state state,
+ enum ib_wc_status wc_status
+ ),
+
+ TP_ARGS(state, wc_status),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, state)
+ __field(unsigned long, wc_status)
+ ),
+
+ TP_fast_assign(
+ __entry->state = state;
+ __entry->wc_status = wc_status;
+ ),
+
+ TP_printk("state=%s completion status=%s",
+ show_ib_cm_state(__entry->state),
+ rdma_show_wc_status(__entry->wc_status)
+ )
+);
+
+#endif /* _TRACE_IB_CMA_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../drivers/infiniband/core
+#define TRACE_INCLUDE_FILE cm_trace
+
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 5888311b2119..7c2ab1f2fbea 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -68,6 +68,9 @@ static const char * const cma_events[] = {
[RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit",
};
+static void cma_set_mgid(struct rdma_id_private *id_priv, struct sockaddr *addr,
+ union ib_gid *mgid);
+
const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event)
{
size_t index = event;
@@ -301,6 +304,10 @@ int cma_set_default_gid_type(struct cma_device *cma_dev,
if (!rdma_is_port_valid(cma_dev->device, port))
return -EINVAL;
+ if (default_gid_type == IB_GID_TYPE_IB &&
+ rdma_protocol_roce_eth_encap(cma_dev->device, port))
+ default_gid_type = IB_GID_TYPE_ROCE;
+
supported_gids = roce_gid_type_mask_support(cma_dev->device, port);
if (!(supported_gids & 1 << default_gid_type))
@@ -345,13 +352,10 @@ struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev)
struct cma_multicast {
struct rdma_id_private *id_priv;
- union {
- struct ib_sa_multicast *ib;
- } multicast;
+ struct ib_sa_multicast *sa_mc;
struct list_head list;
void *context;
struct sockaddr_storage addr;
- struct kref mcref;
u8 join_state;
};
@@ -363,18 +367,6 @@ struct cma_work {
struct rdma_cm_event event;
};
-struct cma_ndev_work {
- struct work_struct work;
- struct rdma_id_private *id;
- struct rdma_cm_event event;
-};
-
-struct iboe_mcast_work {
- struct work_struct work;
- struct rdma_id_private *id;
- struct cma_multicast *mc;
-};
-
union cma_ip_addr {
struct in6_addr ip6;
struct {
@@ -404,23 +396,21 @@ struct cma_req_info {
u16 pkey;
};
-static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp)
-{
- unsigned long flags;
- int ret;
-
- spin_lock_irqsave(&id_priv->lock, flags);
- ret = (id_priv->state == comp);
- spin_unlock_irqrestore(&id_priv->lock, flags);
- return ret;
-}
-
static int cma_comp_exch(struct rdma_id_private *id_priv,
enum rdma_cm_state comp, enum rdma_cm_state exch)
{
unsigned long flags;
int ret;
+ /*
+ * The FSM uses a funny double locking where state is protected by both
+ * the handler_mutex and the spinlock. State is not allowed to change
+ * away from a handler_mutex protected value without also holding
+ * handler_mutex.
+ */
+ if (comp == RDMA_CM_CONNECT)
+ lockdep_assert_held(&id_priv->handler_mutex);
+
spin_lock_irqsave(&id_priv->lock, flags);
if ((ret = (id_priv->state == comp)))
id_priv->state = exch;
@@ -467,10 +457,8 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
id_priv->id.route.addr.dev_addr.transport =
rdma_node_get_transport(cma_dev->device->node_type);
list_add_tail(&id_priv->list, &cma_dev->id_list);
- if (id_priv->res.kern_name)
- rdma_restrack_kadd(&id_priv->res);
- else
- rdma_restrack_uadd(&id_priv->res);
+ rdma_restrack_add(&id_priv->res);
+
trace_cm_id_attach(id_priv, cma_dev->device);
}
@@ -483,14 +471,6 @@ static void cma_attach_to_dev(struct rdma_id_private *id_priv,
rdma_start_port(cma_dev->device)];
}
-static inline void release_mc(struct kref *kref)
-{
- struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref);
-
- kfree(mc->multicast.ib);
- kfree(mc);
-}
-
static void cma_release_dev(struct rdma_id_private *id_priv)
{
mutex_lock(&lock);
@@ -844,10 +824,10 @@ static void cma_id_put(struct rdma_id_private *id_priv)
complete(&id_priv->comp);
}
-struct rdma_cm_id *__rdma_create_id(struct net *net,
- rdma_cm_event_handler event_handler,
- void *context, enum rdma_ucm_port_space ps,
- enum ib_qp_type qp_type, const char *caller)
+static struct rdma_id_private *
+__rdma_create_id(struct net *net, rdma_cm_event_handler event_handler,
+ void *context, enum rdma_ucm_port_space ps,
+ enum ib_qp_type qp_type, const struct rdma_id_private *parent)
{
struct rdma_id_private *id_priv;
@@ -855,8 +835,6 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
if (!id_priv)
return ERR_PTR(-ENOMEM);
- rdma_restrack_set_task(&id_priv->res, caller);
- id_priv->res.type = RDMA_RESTRACK_CM_ID;
id_priv->state = RDMA_CM_IDLE;
id_priv->id.context = context;
id_priv->id.event_handler = event_handler;
@@ -876,9 +854,45 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
id_priv->id.route.addr.dev_addr.net = get_net(net);
id_priv->seq_num &= 0x00ffffff;
- return &id_priv->id;
+ rdma_restrack_new(&id_priv->res, RDMA_RESTRACK_CM_ID);
+ if (parent)
+ rdma_restrack_parent_name(&id_priv->res, &parent->res);
+
+ return id_priv;
+}
+
+struct rdma_cm_id *
+__rdma_create_kernel_id(struct net *net, rdma_cm_event_handler event_handler,
+ void *context, enum rdma_ucm_port_space ps,
+ enum ib_qp_type qp_type, const char *caller)
+{
+ struct rdma_id_private *ret;
+
+ ret = __rdma_create_id(net, event_handler, context, ps, qp_type, NULL);
+ if (IS_ERR(ret))
+ return ERR_CAST(ret);
+
+ rdma_restrack_set_name(&ret->res, caller);
+ return &ret->id;
}
-EXPORT_SYMBOL(__rdma_create_id);
+EXPORT_SYMBOL(__rdma_create_kernel_id);
+
+struct rdma_cm_id *rdma_create_user_id(rdma_cm_event_handler event_handler,
+ void *context,
+ enum rdma_ucm_port_space ps,
+ enum ib_qp_type qp_type)
+{
+ struct rdma_id_private *ret;
+
+ ret = __rdma_create_id(current->nsproxy->net_ns, event_handler, context,
+ ps, qp_type, NULL);
+ if (IS_ERR(ret))
+ return ERR_CAST(ret);
+
+ rdma_restrack_set_name(&ret->res, NULL);
+ return &ret->id;
+}
+EXPORT_SYMBOL(rdma_create_user_id);
static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
{
@@ -1783,19 +1797,30 @@ static void cma_release_port(struct rdma_id_private *id_priv)
mutex_unlock(&lock);
}
-static void cma_leave_roce_mc_group(struct rdma_id_private *id_priv,
- struct cma_multicast *mc)
+static void destroy_mc(struct rdma_id_private *id_priv,
+ struct cma_multicast *mc)
{
- struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
- struct net_device *ndev = NULL;
+ if (rdma_cap_ib_mcast(id_priv->id.device, id_priv->id.port_num))
+ ib_sa_free_multicast(mc->sa_mc);
- if (dev_addr->bound_dev_if)
- ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
- if (ndev) {
- cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, false);
- dev_put(ndev);
+ if (rdma_protocol_roce(id_priv->id.device, id_priv->id.port_num)) {
+ struct rdma_dev_addr *dev_addr =
+ &id_priv->id.route.addr.dev_addr;
+ struct net_device *ndev = NULL;
+
+ if (dev_addr->bound_dev_if)
+ ndev = dev_get_by_index(dev_addr->net,
+ dev_addr->bound_dev_if);
+ if (ndev) {
+ union ib_gid mgid;
+
+ cma_set_mgid(id_priv, (struct sockaddr *)&mc->addr,
+ &mgid);
+ cma_igmp_send(ndev, &mgid, false);
+ dev_put(ndev);
+ }
}
- kref_put(&mc->mcref, release_mc);
+ kfree(mc);
}
static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
@@ -1803,16 +1828,10 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
struct cma_multicast *mc;
while (!list_empty(&id_priv->mc_list)) {
- mc = container_of(id_priv->mc_list.next,
- struct cma_multicast, list);
+ mc = list_first_entry(&id_priv->mc_list, struct cma_multicast,
+ list);
list_del(&mc->list);
- if (rdma_cap_ib_mcast(id_priv->cma_dev->device,
- id_priv->id.port_num)) {
- ib_sa_free_multicast(mc->multicast.ib);
- kfree(mc);
- } else {
- cma_leave_roce_mc_group(id_priv, mc);
- }
+ destroy_mc(id_priv, mc);
}
}
@@ -1821,7 +1840,6 @@ static void _destroy_id(struct rdma_id_private *id_priv,
{
cma_cancel_operation(id_priv, state);
- rdma_restrack_del(&id_priv->res);
if (id_priv->cma_dev) {
if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
if (id_priv->cm_id.ib)
@@ -1847,6 +1865,7 @@ static void _destroy_id(struct rdma_id_private *id_priv,
rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr);
put_net(id_priv->id.route.addr.dev_addr.net);
+ rdma_restrack_del(&id_priv->res);
kfree(id_priv);
}
@@ -1949,13 +1968,15 @@ static int cma_ib_handler(struct ib_cm_id *cm_id,
{
struct rdma_id_private *id_priv = cm_id->context;
struct rdma_cm_event event = {};
+ enum rdma_cm_state state;
int ret;
mutex_lock(&id_priv->handler_mutex);
+ state = READ_ONCE(id_priv->state);
if ((ib_event->event != IB_CM_TIMEWAIT_EXIT &&
- id_priv->state != RDMA_CM_CONNECT) ||
+ state != RDMA_CM_CONNECT) ||
(ib_event->event == IB_CM_TIMEWAIT_EXIT &&
- id_priv->state != RDMA_CM_DISCONNECT))
+ state != RDMA_CM_DISCONNECT))
goto out;
switch (ib_event->event) {
@@ -1965,7 +1986,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id,
event.status = -ETIMEDOUT;
break;
case IB_CM_REP_RECEIVED:
- if (cma_comp(id_priv, RDMA_CM_CONNECT) &&
+ if (state == RDMA_CM_CONNECT &&
(id_priv->id.qp_type != IB_QPT_UD)) {
trace_cm_send_mra(id_priv);
ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
@@ -2043,14 +2064,15 @@ cma_ib_new_conn_id(const struct rdma_cm_id *listen_id,
int ret;
listen_id_priv = container_of(listen_id, struct rdma_id_private, id);
- id = __rdma_create_id(listen_id->route.addr.dev_addr.net,
- listen_id->event_handler, listen_id->context,
- listen_id->ps, ib_event->param.req_rcvd.qp_type,
- listen_id_priv->res.kern_name);
- if (IS_ERR(id))
+ id_priv = __rdma_create_id(listen_id->route.addr.dev_addr.net,
+ listen_id->event_handler, listen_id->context,
+ listen_id->ps,
+ ib_event->param.req_rcvd.qp_type,
+ listen_id_priv);
+ if (IS_ERR(id_priv))
return NULL;
- id_priv = container_of(id, struct rdma_id_private, id);
+ id = &id_priv->id;
if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr,
(struct sockaddr *)&id->route.addr.dst_addr,
listen_id, ib_event, ss_family, service_id))
@@ -2104,13 +2126,13 @@ cma_ib_new_udp_id(const struct rdma_cm_id *listen_id,
int ret;
listen_id_priv = container_of(listen_id, struct rdma_id_private, id);
- id = __rdma_create_id(net, listen_id->event_handler, listen_id->context,
- listen_id->ps, IB_QPT_UD,
- listen_id_priv->res.kern_name);
- if (IS_ERR(id))
+ id_priv = __rdma_create_id(net, listen_id->event_handler,
+ listen_id->context, listen_id->ps, IB_QPT_UD,
+ listen_id_priv);
+ if (IS_ERR(id_priv))
return NULL;
- id_priv = container_of(id, struct rdma_id_private, id);
+ id = &id_priv->id;
if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr,
(struct sockaddr *)&id->route.addr.dst_addr,
listen_id, ib_event, ss_family,
@@ -2184,7 +2206,7 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
}
mutex_lock(&listen_id->handler_mutex);
- if (listen_id->state != RDMA_CM_LISTEN) {
+ if (READ_ONCE(listen_id->state) != RDMA_CM_LISTEN) {
ret = -ECONNABORTED;
goto err_unlock;
}
@@ -2226,8 +2248,8 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
goto net_dev_put;
}
- if (cma_comp(conn_id, RDMA_CM_CONNECT) &&
- (conn_id->id.qp_type != IB_QPT_UD)) {
+ if (READ_ONCE(conn_id->state) == RDMA_CM_CONNECT &&
+ conn_id->id.qp_type != IB_QPT_UD) {
trace_cm_send_mra(cm_id->context);
ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
}
@@ -2288,7 +2310,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
mutex_lock(&id_priv->handler_mutex);
- if (id_priv->state != RDMA_CM_CONNECT)
+ if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT)
goto out;
switch (iw_event->event) {
@@ -2346,7 +2368,6 @@ out:
static int iw_conn_req_handler(struct iw_cm_id *cm_id,
struct iw_cm_event *iw_event)
{
- struct rdma_cm_id *new_cm_id;
struct rdma_id_private *listen_id, *conn_id;
struct rdma_cm_event event = {};
int ret = -ECONNABORTED;
@@ -2362,20 +2383,18 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
listen_id = cm_id->context;
mutex_lock(&listen_id->handler_mutex);
- if (listen_id->state != RDMA_CM_LISTEN)
+ if (READ_ONCE(listen_id->state) != RDMA_CM_LISTEN)
goto out;
/* Create a new RDMA id for the new IW CM ID */
- new_cm_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net,
- listen_id->id.event_handler,
- listen_id->id.context,
- RDMA_PS_TCP, IB_QPT_RC,
- listen_id->res.kern_name);
- if (IS_ERR(new_cm_id)) {
+ conn_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net,
+ listen_id->id.event_handler,
+ listen_id->id.context, RDMA_PS_TCP,
+ IB_QPT_RC, listen_id);
+ if (IS_ERR(conn_id)) {
ret = -ENOMEM;
goto out;
}
- conn_id = container_of(new_cm_id, struct rdma_id_private, id);
mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
conn_id->state = RDMA_CM_CONNECT;
@@ -2480,7 +2499,6 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
struct cma_device *cma_dev)
{
struct rdma_id_private *dev_id_priv;
- struct rdma_cm_id *id;
struct net *net = id_priv->id.route.addr.dev_addr.net;
int ret;
@@ -2489,13 +2507,12 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
return;
- id = __rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
- id_priv->id.qp_type, id_priv->res.kern_name);
- if (IS_ERR(id))
+ dev_id_priv =
+ __rdma_create_id(net, cma_listen_handler, id_priv,
+ id_priv->id.ps, id_priv->id.qp_type, id_priv);
+ if (IS_ERR(dev_id_priv))
return;
- dev_id_priv = container_of(id, struct rdma_id_private, id);
-
dev_id_priv->state = RDMA_CM_ADDR_BOUND;
memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv),
rdma_addr_size(cma_src_addr(id_priv)));
@@ -2508,7 +2525,7 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
dev_id_priv->tos_set = id_priv->tos_set;
dev_id_priv->tos = id_priv->tos;
- ret = rdma_listen(id, id_priv->backlog);
+ ret = rdma_listen(&dev_id_priv->id, id_priv->backlog);
if (ret)
dev_warn(&cma_dev->device->dev,
"RDMA CMA: cma_listen_on_dev, error %d\n", ret);
@@ -2647,32 +2664,14 @@ static void cma_work_handler(struct work_struct *_work)
struct rdma_id_private *id_priv = work->id;
mutex_lock(&id_priv->handler_mutex);
- if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
+ if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING ||
+ READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL)
goto out_unlock;
-
- if (cma_cm_event_handler(id_priv, &work->event)) {
- cma_id_put(id_priv);
- destroy_id_handler_unlock(id_priv);
- goto out_free;
+ if (work->old_state != 0 || work->new_state != 0) {
+ if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
+ goto out_unlock;
}
-out_unlock:
- mutex_unlock(&id_priv->handler_mutex);
- cma_id_put(id_priv);
-out_free:
- kfree(work);
-}
-
-static void cma_ndev_work_handler(struct work_struct *_work)
-{
- struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work);
- struct rdma_id_private *id_priv = work->id;
-
- mutex_lock(&id_priv->handler_mutex);
- if (id_priv->state == RDMA_CM_DESTROYING ||
- id_priv->state == RDMA_CM_DEVICE_REMOVAL)
- goto out_unlock;
-
if (cma_cm_event_handler(id_priv, &work->event)) {
cma_id_put(id_priv);
destroy_id_handler_unlock(id_priv);
@@ -2683,6 +2682,8 @@ out_unlock:
mutex_unlock(&id_priv->handler_mutex);
cma_id_put(id_priv);
out_free:
+ if (work->event.event == RDMA_CM_EVENT_MULTICAST_JOIN)
+ rdma_destroy_ah_attr(&work->event.param.ud.ah_attr);
kfree(work);
}
@@ -3240,32 +3241,54 @@ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
return rdma_bind_addr(id, src_addr);
}
-int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
- const struct sockaddr *dst_addr, unsigned long timeout_ms)
+/*
+ * If required, resolve the source address for bind and leave the id_priv in
+ * state RDMA_CM_ADDR_BOUND. This oddly uses the state to determine the prior
+ * calls made by ULP, a previously bound ID will not be re-bound and src_addr is
+ * ignored.
+ */
+static int resolve_prepare_src(struct rdma_id_private *id_priv,
+ struct sockaddr *src_addr,
+ const struct sockaddr *dst_addr)
{
- struct rdma_id_private *id_priv;
int ret;
- id_priv = container_of(id, struct rdma_id_private, id);
memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
- if (id_priv->state == RDMA_CM_IDLE) {
- ret = cma_bind_addr(id, src_addr, dst_addr);
- if (ret) {
- memset(cma_dst_addr(id_priv), 0,
- rdma_addr_size(dst_addr));
- return ret;
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) {
+ /* For a well behaved ULP state will be RDMA_CM_IDLE */
+ ret = cma_bind_addr(&id_priv->id, src_addr, dst_addr);
+ if (ret)
+ goto err_dst;
+ if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND,
+ RDMA_CM_ADDR_QUERY))) {
+ ret = -EINVAL;
+ goto err_dst;
}
}
if (cma_family(id_priv) != dst_addr->sa_family) {
- memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
- return -EINVAL;
+ ret = -EINVAL;
+ goto err_state;
}
+ return 0;
- if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) {
- memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
- return -EINVAL;
- }
+err_state:
+ cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
+err_dst:
+ memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
+ return ret;
+}
+
+int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
+ const struct sockaddr *dst_addr, unsigned long timeout_ms)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+ int ret;
+
+ ret = resolve_prepare_src(id_priv, src_addr, dst_addr);
+ if (ret)
+ return ret;
if (cma_any_addr(dst_addr)) {
ret = cma_resolve_loopback(id_priv);
@@ -3297,7 +3320,8 @@ int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse)
id_priv = container_of(id, struct rdma_id_private, id);
spin_lock_irqsave(&id_priv->lock, flags);
- if (reuse || id_priv->state == RDMA_CM_IDLE) {
+ if ((reuse && id_priv->state != RDMA_CM_LISTEN) ||
+ id_priv->state == RDMA_CM_IDLE) {
id_priv->reuseaddr = reuse;
ret = 0;
} else {
@@ -3491,8 +3515,7 @@ static int cma_check_port(struct rdma_bind_list *bind_list,
if (id_priv == cur_id)
continue;
- if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr &&
- cur_id->reuseaddr)
+ if (reuseaddr && cur_id->reuseaddr)
continue;
cur_addr = cma_src_addr(cur_id);
@@ -3533,18 +3556,6 @@ static int cma_use_port(enum rdma_ucm_port_space ps,
return ret;
}
-static int cma_bind_listen(struct rdma_id_private *id_priv)
-{
- struct rdma_bind_list *bind_list = id_priv->bind_list;
- int ret = 0;
-
- mutex_lock(&lock);
- if (bind_list->owners.first->next)
- ret = cma_check_port(bind_list, id_priv, 0);
- mutex_unlock(&lock);
- return ret;
-}
-
static enum rdma_ucm_port_space
cma_select_inet_ps(struct rdma_id_private *id_priv)
{
@@ -3638,22 +3649,31 @@ static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
int rdma_listen(struct rdma_cm_id *id, int backlog)
{
- struct rdma_id_private *id_priv;
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
int ret;
- id_priv = container_of(id, struct rdma_id_private, id);
- if (id_priv->state == RDMA_CM_IDLE) {
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) {
+ /* For a well behaved ULP state will be RDMA_CM_IDLE */
id->route.addr.src_addr.ss_family = AF_INET;
ret = rdma_bind_addr(id, cma_src_addr(id_priv));
if (ret)
return ret;
+ if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND,
+ RDMA_CM_LISTEN)))
+ return -EINVAL;
}
- if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN))
- return -EINVAL;
-
+ /*
+ * Once the ID reaches RDMA_CM_LISTEN it is not allowed to be reusable
+ * any more, and has to be unique in the bind list.
+ */
if (id_priv->reuseaddr) {
- ret = cma_bind_listen(id_priv);
+ mutex_lock(&lock);
+ ret = cma_check_port(id_priv->bind_list, id_priv, 0);
+ if (!ret)
+ id_priv->reuseaddr = 0;
+ mutex_unlock(&lock);
if (ret)
goto err;
}
@@ -3678,6 +3698,10 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)
return 0;
err:
id_priv->backlog = 0;
+ /*
+ * All the failure paths that lead here will not allow the req_handler's
+ * to have run.
+ */
cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND);
return ret;
}
@@ -3732,7 +3756,6 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
return 0;
err2:
- rdma_restrack_del(&id_priv->res);
if (id_priv->cma_dev)
cma_release_dev(id_priv);
err1:
@@ -3781,7 +3804,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
int ret;
mutex_lock(&id_priv->handler_mutex);
- if (id_priv->state != RDMA_CM_CONNECT)
+ if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT)
goto out;
switch (ib_event->event) {
@@ -4017,12 +4040,15 @@ out:
int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
{
- struct rdma_id_private *id_priv;
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
int ret;
- id_priv = container_of(id, struct rdma_id_private, id);
- if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT))
- return -EINVAL;
+ mutex_lock(&id_priv->handler_mutex);
+ if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT)) {
+ ret = -EINVAL;
+ goto err_unlock;
+ }
if (!id->qp) {
id_priv->qp_num = conn_param->qp_num;
@@ -4039,11 +4065,13 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
else
ret = -ENOSYS;
if (ret)
- goto err;
-
+ goto err_state;
+ mutex_unlock(&id_priv->handler_mutex);
return 0;
-err:
+err_state:
cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED);
+err_unlock:
+ mutex_unlock(&id_priv->handler_mutex);
return ret;
}
EXPORT_SYMBOL(rdma_connect);
@@ -4155,17 +4183,33 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
}
-int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
- const char *caller)
+/**
+ * rdma_accept - Called to accept a connection request or response.
+ * @id: Connection identifier associated with the request.
+ * @conn_param: Information needed to establish the connection. This must be
+ * provided if accepting a connection request. If accepting a connection
+ * response, this parameter must be NULL.
+ *
+ * Typically, this routine is only called by the listener to accept a connection
+ * request. It must also be called on the active side of a connection if the
+ * user is performing their own QP transitions.
+ *
+ * In the case of error, a reject message is sent to the remote side and the
+ * state of the qp associated with the id is modified to error, such that any
+ * previously posted receive buffers would be flushed.
+ *
+ * This function is for use by kernel ULPs and must be called from under the
+ * handler callback.
+ */
+int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
{
- struct rdma_id_private *id_priv;
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
int ret;
- id_priv = container_of(id, struct rdma_id_private, id);
-
- rdma_restrack_set_task(&id_priv->res, caller);
+ lockdep_assert_held(&id_priv->handler_mutex);
- if (!cma_comp(id_priv, RDMA_CM_CONNECT))
+ if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT)
return -EINVAL;
if (!id->qp && conn_param) {
@@ -4203,10 +4247,10 @@ reject:
rdma_reject(id, NULL, 0, IB_CM_REJ_CONSUMER_DEFINED);
return ret;
}
-EXPORT_SYMBOL(__rdma_accept);
+EXPORT_SYMBOL(rdma_accept);
-int __rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
- const char *caller, struct rdma_ucm_ece *ece)
+int rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
+ struct rdma_ucm_ece *ece)
{
struct rdma_id_private *id_priv =
container_of(id, struct rdma_id_private, id);
@@ -4214,9 +4258,27 @@ int __rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
id_priv->ece.vendor_id = ece->vendor_id;
id_priv->ece.attr_mod = ece->attr_mod;
- return __rdma_accept(id, conn_param, caller);
+ return rdma_accept(id, conn_param);
+}
+EXPORT_SYMBOL(rdma_accept_ece);
+
+void rdma_lock_handler(struct rdma_cm_id *id)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+
+ mutex_lock(&id_priv->handler_mutex);
}
-EXPORT_SYMBOL(__rdma_accept_ece);
+EXPORT_SYMBOL(rdma_lock_handler);
+
+void rdma_unlock_handler(struct rdma_cm_id *id)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+
+ mutex_unlock(&id_priv->handler_mutex);
+}
+EXPORT_SYMBOL(rdma_unlock_handler);
int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
{
@@ -4299,63 +4361,66 @@ out:
}
EXPORT_SYMBOL(rdma_disconnect);
-static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
+static void cma_make_mc_event(int status, struct rdma_id_private *id_priv,
+ struct ib_sa_multicast *multicast,
+ struct rdma_cm_event *event,
+ struct cma_multicast *mc)
{
- struct rdma_id_private *id_priv;
- struct cma_multicast *mc = multicast->context;
- struct rdma_cm_event event = {};
- int ret = 0;
-
- id_priv = mc->id_priv;
- mutex_lock(&id_priv->handler_mutex);
- if (id_priv->state != RDMA_CM_ADDR_BOUND &&
- id_priv->state != RDMA_CM_ADDR_RESOLVED)
- goto out;
+ struct rdma_dev_addr *dev_addr;
+ enum ib_gid_type gid_type;
+ struct net_device *ndev;
if (!status)
status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
else
pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to join multicast. status %d\n",
status);
- mutex_lock(&id_priv->qp_mutex);
- if (!status && id_priv->id.qp) {
- status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
- be16_to_cpu(multicast->rec.mlid));
- if (status)
- pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to attach QP. status %d\n",
- status);
+
+ event->status = status;
+ event->param.ud.private_data = mc->context;
+ if (status) {
+ event->event = RDMA_CM_EVENT_MULTICAST_ERROR;
+ return;
}
- mutex_unlock(&id_priv->qp_mutex);
- event.status = status;
- event.param.ud.private_data = mc->context;
- if (!status) {
- struct rdma_dev_addr *dev_addr =
- &id_priv->id.route.addr.dev_addr;
- struct net_device *ndev =
- dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
- enum ib_gid_type gid_type =
- id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
- rdma_start_port(id_priv->cma_dev->device)];
-
- event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
- ret = ib_init_ah_from_mcmember(id_priv->id.device,
- id_priv->id.port_num,
- &multicast->rec,
- ndev, gid_type,
- &event.param.ud.ah_attr);
- if (ret)
- event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
+ dev_addr = &id_priv->id.route.addr.dev_addr;
+ ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
+ gid_type =
+ id_priv->cma_dev
+ ->default_gid_type[id_priv->id.port_num -
+ rdma_start_port(
+ id_priv->cma_dev->device)];
+
+ event->event = RDMA_CM_EVENT_MULTICAST_JOIN;
+ if (ib_init_ah_from_mcmember(id_priv->id.device, id_priv->id.port_num,
+ &multicast->rec, ndev, gid_type,
+ &event->param.ud.ah_attr)) {
+ event->event = RDMA_CM_EVENT_MULTICAST_ERROR;
+ goto out;
+ }
- event.param.ud.qp_num = 0xFFFFFF;
- event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
- if (ndev)
- dev_put(ndev);
- } else
- event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
+ event->param.ud.qp_num = 0xFFFFFF;
+ event->param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
- ret = cma_cm_event_handler(id_priv, &event);
+out:
+ if (ndev)
+ dev_put(ndev);
+}
+static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
+{
+ struct cma_multicast *mc = multicast->context;
+ struct rdma_id_private *id_priv = mc->id_priv;
+ struct rdma_cm_event event = {};
+ int ret = 0;
+
+ mutex_lock(&id_priv->handler_mutex);
+ if (READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL ||
+ READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING)
+ goto out;
+
+ cma_make_mc_event(status, id_priv, multicast, &event, mc);
+ ret = cma_cm_event_handler(id_priv, &event);
rdma_destroy_ah_attr(&event.param.ud.ah_attr);
if (ret) {
destroy_id_handler_unlock(id_priv);
@@ -4445,23 +4510,10 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
IB_SA_MCMEMBER_REC_MTU |
IB_SA_MCMEMBER_REC_HOP_LIMIT;
- mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
- id_priv->id.port_num, &rec,
- comp_mask, GFP_KERNEL,
- cma_ib_mc_handler, mc);
- return PTR_ERR_OR_ZERO(mc->multicast.ib);
-}
-
-static void iboe_mcast_work_handler(struct work_struct *work)
-{
- struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work);
- struct cma_multicast *mc = mw->mc;
- struct ib_sa_multicast *m = mc->multicast.ib;
-
- mc->multicast.ib->context = mc;
- cma_ib_mc_handler(0, m);
- kref_put(&mc->mcref, release_mc);
- kfree(mw);
+ mc->sa_mc = ib_sa_join_multicast(&sa_client, id_priv->id.device,
+ id_priv->id.port_num, &rec, comp_mask,
+ GFP_KERNEL, cma_ib_mc_handler, mc);
+ return PTR_ERR_OR_ZERO(mc->sa_mc);
}
static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
@@ -4496,52 +4548,47 @@ static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
struct cma_multicast *mc)
{
- struct iboe_mcast_work *work;
+ struct cma_work *work;
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
int err = 0;
struct sockaddr *addr = (struct sockaddr *)&mc->addr;
struct net_device *ndev = NULL;
+ struct ib_sa_multicast ib;
enum ib_gid_type gid_type;
bool send_only;
send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN);
- if (cma_zero_addr((struct sockaddr *)&mc->addr))
+ if (cma_zero_addr(addr))
return -EINVAL;
work = kzalloc(sizeof *work, GFP_KERNEL);
if (!work)
return -ENOMEM;
- mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL);
- if (!mc->multicast.ib) {
- err = -ENOMEM;
- goto out1;
- }
-
gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
rdma_start_port(id_priv->cma_dev->device)];
- cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid, gid_type);
+ cma_iboe_set_mgid(addr, &ib.rec.mgid, gid_type);
- mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff);
+ ib.rec.pkey = cpu_to_be16(0xffff);
if (id_priv->id.ps == RDMA_PS_UDP)
- mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
+ ib.rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
if (dev_addr->bound_dev_if)
ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
if (!ndev) {
err = -ENODEV;
- goto out2;
+ goto err_free;
}
- mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
- mc->multicast.ib->rec.hop_limit = 1;
- mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu);
+ ib.rec.rate = iboe_get_rate(ndev);
+ ib.rec.hop_limit = 1;
+ ib.rec.mtu = iboe_get_mtu(ndev->mtu);
if (addr->sa_family == AF_INET) {
if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
- mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
+ ib.rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
if (!send_only) {
- err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
+ err = cma_igmp_send(ndev, &ib.rec.mgid,
true);
}
}
@@ -4550,24 +4597,22 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
err = -ENOTSUPP;
}
dev_put(ndev);
- if (err || !mc->multicast.ib->rec.mtu) {
+ if (err || !ib.rec.mtu) {
if (!err)
err = -EINVAL;
- goto out2;
+ goto err_free;
}
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
- &mc->multicast.ib->rec.port_gid);
+ &ib.rec.port_gid);
work->id = id_priv;
- work->mc = mc;
- INIT_WORK(&work->work, iboe_mcast_work_handler);
- kref_get(&mc->mcref);
+ INIT_WORK(&work->work, cma_work_handler);
+ cma_make_mc_event(0, id_priv, &ib, &work->event, mc);
+ /* Balances with cma_id_put() in cma_work_handler */
+ cma_id_get(id_priv);
queue_work(cma_wq, &work->work);
-
return 0;
-out2:
- kfree(mc->multicast.ib);
-out1:
+err_free:
kfree(work);
return err;
}
@@ -4575,19 +4620,21 @@ out1:
int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
u8 join_state, void *context)
{
- struct rdma_id_private *id_priv;
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
struct cma_multicast *mc;
int ret;
- if (!id->device)
+ /* Not supported for kernel QPs */
+ if (WARN_ON(id->qp))
return -EINVAL;
- id_priv = container_of(id, struct rdma_id_private, id);
- if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) &&
- !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED))
+ /* ULP is calling this wrong. */
+ if (!id->device || (READ_ONCE(id_priv->state) != RDMA_CM_ADDR_BOUND &&
+ READ_ONCE(id_priv->state) != RDMA_CM_ADDR_RESOLVED))
return -EINVAL;
- mc = kmalloc(sizeof *mc, GFP_KERNEL);
+ mc = kzalloc(sizeof(*mc), GFP_KERNEL);
if (!mc)
return -ENOMEM;
@@ -4597,7 +4644,6 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
mc->join_state = join_state;
if (rdma_protocol_roce(id->device, id->port_num)) {
- kref_init(&mc->mcref);
ret = cma_iboe_join_multicast(id_priv, mc);
if (ret)
goto out_err;
@@ -4629,25 +4675,14 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
id_priv = container_of(id, struct rdma_id_private, id);
spin_lock_irq(&id_priv->lock);
list_for_each_entry(mc, &id_priv->mc_list, list) {
- if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) {
- list_del(&mc->list);
- spin_unlock_irq(&id_priv->lock);
-
- if (id->qp)
- ib_detach_mcast(id->qp,
- &mc->multicast.ib->rec.mgid,
- be16_to_cpu(mc->multicast.ib->rec.mlid));
-
- BUG_ON(id_priv->cma_dev->device != id->device);
-
- if (rdma_cap_ib_mcast(id->device, id->port_num)) {
- ib_sa_free_multicast(mc->multicast.ib);
- kfree(mc);
- } else if (rdma_protocol_roce(id->device, id->port_num)) {
- cma_leave_roce_mc_group(id_priv, mc);
- }
- return;
- }
+ if (memcmp(&mc->addr, addr, rdma_addr_size(addr)) != 0)
+ continue;
+ list_del(&mc->list);
+ spin_unlock_irq(&id_priv->lock);
+
+ WARN_ON(id_priv->cma_dev->device != id->device);
+ destroy_mc(id_priv, mc);
+ return;
}
spin_unlock_irq(&id_priv->lock);
}
@@ -4656,7 +4691,7 @@ EXPORT_SYMBOL(rdma_leave_multicast);
static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv)
{
struct rdma_dev_addr *dev_addr;
- struct cma_ndev_work *work;
+ struct cma_work *work;
dev_addr = &id_priv->id.route.addr.dev_addr;
@@ -4669,7 +4704,7 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id
if (!work)
return -ENOMEM;
- INIT_WORK(&work->work, cma_ndev_work_handler);
+ INIT_WORK(&work->work, cma_work_handler);
work->id = id_priv;
work->event.event = RDMA_CM_EVENT_ADDR_CHANGE;
cma_id_get(id_priv);
diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
index 3c1e2ca564fe..7ec4af2ed87a 100644
--- a/drivers/infiniband/core/cma_configfs.c
+++ b/drivers/infiniband/core/cma_configfs.c
@@ -123,16 +123,17 @@ static ssize_t default_roce_mode_store(struct config_item *item,
{
struct cma_device *cma_dev;
struct cma_dev_port_group *group;
- int gid_type = ib_cache_gid_parse_type_str(buf);
+ int gid_type;
ssize_t ret;
- if (gid_type < 0)
- return -EINVAL;
-
ret = cma_configfs_params_get(item, &cma_dev, &group);
if (ret)
return ret;
+ gid_type = ib_cache_gid_parse_type_str(buf);
+ if (gid_type < 0)
+ return -EINVAL;
+
ret = cma_set_default_gid_type(cma_dev, group->port_num, gid_type);
cma_configfs_params_put(cma_dev);
diff --git a/drivers/infiniband/core/cma_trace.h b/drivers/infiniband/core/cma_trace.h
index e6e20c36c538..e45264267bcc 100644
--- a/drivers/infiniband/core/cma_trace.h
+++ b/drivers/infiniband/core/cma_trace.h
@@ -17,46 +17,6 @@
#include <linux/tracepoint.h>
#include <trace/events/rdma.h>
-/*
- * enum ib_cm_event_type, from include/rdma/ib_cm.h
- */
-#define IB_CM_EVENT_LIST \
- ib_cm_event(REQ_ERROR) \
- ib_cm_event(REQ_RECEIVED) \
- ib_cm_event(REP_ERROR) \
- ib_cm_event(REP_RECEIVED) \
- ib_cm_event(RTU_RECEIVED) \
- ib_cm_event(USER_ESTABLISHED) \
- ib_cm_event(DREQ_ERROR) \
- ib_cm_event(DREQ_RECEIVED) \
- ib_cm_event(DREP_RECEIVED) \
- ib_cm_event(TIMEWAIT_EXIT) \
- ib_cm_event(MRA_RECEIVED) \
- ib_cm_event(REJ_RECEIVED) \
- ib_cm_event(LAP_ERROR) \
- ib_cm_event(LAP_RECEIVED) \
- ib_cm_event(APR_RECEIVED) \
- ib_cm_event(SIDR_REQ_ERROR) \
- ib_cm_event(SIDR_REQ_RECEIVED) \
- ib_cm_event_end(SIDR_REP_RECEIVED)
-
-#undef ib_cm_event
-#undef ib_cm_event_end
-
-#define ib_cm_event(x) TRACE_DEFINE_ENUM(IB_CM_##x);
-#define ib_cm_event_end(x) TRACE_DEFINE_ENUM(IB_CM_##x);
-
-IB_CM_EVENT_LIST
-
-#undef ib_cm_event
-#undef ib_cm_event_end
-
-#define ib_cm_event(x) { IB_CM_##x, #x },
-#define ib_cm_event_end(x) { IB_CM_##x, #x }
-
-#define rdma_show_ib_cm_event(x) \
- __print_symbolic(x, IB_CM_EVENT_LIST)
-
DECLARE_EVENT_CLASS(cma_fsm_class,
TP_PROTO(
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index a1e6a67b2c4a..e84b0fedaacb 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -44,6 +44,7 @@
#include <rdma/ib_mad.h>
#include <rdma/restrack.h>
#include "mad_priv.h"
+#include "restrack.h"
/* Total number of ports combined across all struct ib_devices's */
#define RDMA_MAX_PORTS 8192
@@ -352,6 +353,7 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
INIT_LIST_HEAD(&qp->rdma_mrs);
INIT_LIST_HEAD(&qp->sig_mrs);
+ rdma_restrack_new(&qp->res, RDMA_RESTRACK_QP);
/*
* We don't track XRC QPs for now, because they don't have PD
* and more importantly they are created internaly by driver,
@@ -359,14 +361,9 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
*/
is_xrc = qp_type == IB_QPT_XRC_INI || qp_type == IB_QPT_XRC_TGT;
if ((qp_type < IB_QPT_MAX && !is_xrc) || qp_type == IB_QPT_DRIVER) {
- qp->res.type = RDMA_RESTRACK_QP;
- if (uobj)
- rdma_restrack_uadd(&qp->res);
- else
- rdma_restrack_kadd(&qp->res);
- } else
- qp->res.valid = false;
-
+ rdma_restrack_parent_name(&qp->res, &pd->res);
+ rdma_restrack_add(&qp->res);
+ }
return qp;
}
diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c
index 636166880442..e4ff0d3328b6 100644
--- a/drivers/infiniband/core/counters.c
+++ b/drivers/infiniband/core/counters.c
@@ -80,8 +80,9 @@ static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port,
counter->device = dev;
counter->port = port;
- counter->res.type = RDMA_RESTRACK_COUNTER;
- counter->stats = dev->ops.counter_alloc_stats(counter);
+
+ rdma_restrack_new(&counter->res, RDMA_RESTRACK_COUNTER);
+ counter->stats = dev->ops.counter_alloc_stats(counter);
if (!counter->stats)
goto err_stats;
@@ -107,6 +108,7 @@ err_mode:
mutex_unlock(&port_counter->lock);
kfree(counter->stats);
err_stats:
+ rdma_restrack_put(&counter->res);
kfree(counter);
return NULL;
}
@@ -248,13 +250,8 @@ next:
static void rdma_counter_res_add(struct rdma_counter *counter,
struct ib_qp *qp)
{
- if (rdma_is_kernel_res(&qp->res)) {
- rdma_restrack_set_task(&counter->res, qp->res.kern_name);
- rdma_restrack_kadd(&counter->res);
- } else {
- rdma_restrack_attach_task(&counter->res, qp->res.task);
- rdma_restrack_uadd(&counter->res);
- }
+ rdma_restrack_parent_name(&counter->res, &qp->res);
+ rdma_restrack_add(&counter->res);
}
static void counter_release(struct kref *kref)
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
index a92fc3f90bb5..12ebacf52958 100644
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -197,24 +197,22 @@ static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
}
/**
- * __ib_alloc_cq_user - allocate a completion queue
+ * __ib_alloc_cq allocate a completion queue
* @dev: device to allocate the CQ for
* @private: driver private data, accessible from cq->cq_context
* @nr_cqe: number of CQEs to allocate
* @comp_vector: HCA completion vectors for this CQ
* @poll_ctx: context to poll the CQ from.
* @caller: module owner name.
- * @udata: Valid user data or NULL for kernel object
*
* This is the proper interface to allocate a CQ for in-kernel users. A
* CQ allocated with this interface will automatically be polled from the
* specified context. The ULP must use wr->wr_cqe instead of wr->wr_id
* to use this CQ abstraction.
*/
-struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
- int nr_cqe, int comp_vector,
- enum ib_poll_context poll_ctx,
- const char *caller, struct ib_udata *udata)
+struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, int nr_cqe,
+ int comp_vector, enum ib_poll_context poll_ctx,
+ const char *caller)
{
struct ib_cq_init_attr cq_attr = {
.cqe = nr_cqe,
@@ -237,15 +235,13 @@ struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
if (!cq->wc)
goto out_free_cq;
- cq->res.type = RDMA_RESTRACK_CQ;
- rdma_restrack_set_task(&cq->res, caller);
+ rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ);
+ rdma_restrack_set_name(&cq->res, caller);
ret = dev->ops.create_cq(cq, &cq_attr, NULL);
if (ret)
goto out_free_wc;
- rdma_restrack_kadd(&cq->res);
-
rdma_dim_init(cq);
switch (cq->poll_ctx) {
@@ -271,21 +267,22 @@ struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
goto out_destroy_cq;
}
+ rdma_restrack_add(&cq->res);
trace_cq_alloc(cq, nr_cqe, comp_vector, poll_ctx);
return cq;
out_destroy_cq:
rdma_dim_destroy(cq);
- rdma_restrack_del(&cq->res);
- cq->device->ops.destroy_cq(cq, udata);
+ cq->device->ops.destroy_cq(cq, NULL);
out_free_wc:
+ rdma_restrack_put(&cq->res);
kfree(cq->wc);
out_free_cq:
kfree(cq);
trace_cq_alloc_error(nr_cqe, comp_vector, poll_ctx, ret);
return ERR_PTR(ret);
}
-EXPORT_SYMBOL(__ib_alloc_cq_user);
+EXPORT_SYMBOL(__ib_alloc_cq);
/**
* __ib_alloc_cq_any - allocate a completion queue
@@ -310,18 +307,19 @@ struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private,
atomic_inc_return(&counter) %
min_t(int, dev->num_comp_vectors, num_online_cpus());
- return __ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx,
- caller, NULL);
+ return __ib_alloc_cq(dev, private, nr_cqe, comp_vector, poll_ctx,
+ caller);
}
EXPORT_SYMBOL(__ib_alloc_cq_any);
/**
- * ib_free_cq_user - free a completion queue
+ * ib_free_cq - free a completion queue
* @cq: completion queue to free.
- * @udata: User data or NULL for kernel object
*/
-void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata)
+void ib_free_cq(struct ib_cq *cq)
{
+ int ret;
+
if (WARN_ON_ONCE(atomic_read(&cq->usecnt)))
return;
if (WARN_ON_ONCE(cq->cqe_used))
@@ -343,12 +341,13 @@ void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata)
rdma_dim_destroy(cq);
trace_cq_free(cq);
+ ret = cq->device->ops.destroy_cq(cq, NULL);
+ WARN_ONCE(ret, "Destroy of kernel CQ shouldn't fail");
rdma_restrack_del(&cq->res);
- cq->device->ops.destroy_cq(cq, udata);
kfree(cq->wc);
kfree(cq);
}
-EXPORT_SYMBOL(ib_free_cq_user);
+EXPORT_SYMBOL(ib_free_cq);
void ib_cq_pool_init(struct ib_device *dev)
{
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 23ee65a9185f..a3b1fc84cdca 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -1177,58 +1177,23 @@ out:
return ret;
}
-static void setup_dma_device(struct ib_device *device)
+static void setup_dma_device(struct ib_device *device,
+ struct device *dma_device)
{
- struct device *parent = device->dev.parent;
-
- WARN_ON_ONCE(device->dma_device);
-
-#ifdef CONFIG_DMA_OPS
- if (device->dev.dma_ops) {
- /*
- * The caller provided custom DMA operations. Copy the
- * DMA-related fields that are used by e.g. dma_alloc_coherent()
- * into device->dev.
- */
- device->dma_device = &device->dev;
- if (!device->dev.dma_mask) {
- if (parent)
- device->dev.dma_mask = parent->dma_mask;
- else
- WARN_ON_ONCE(true);
- }
- if (!device->dev.coherent_dma_mask) {
- if (parent)
- device->dev.coherent_dma_mask =
- parent->coherent_dma_mask;
- else
- WARN_ON_ONCE(true);
- }
- } else
-#endif /* CONFIG_DMA_OPS */
- {
- /*
- * The caller did not provide custom DMA operations. Use the
- * DMA mapping operations of the parent device.
- */
- WARN_ON_ONCE(!parent);
- device->dma_device = parent;
- }
-
- if (!device->dev.dma_parms) {
- if (parent) {
- /*
- * The caller did not provide DMA parameters, so
- * 'parent' probably represents a PCI device. The PCI
- * core sets the maximum segment size to 64
- * KB. Increase this parameter to 2 GB.
- */
- device->dev.dma_parms = parent->dma_parms;
- dma_set_max_seg_size(device->dma_device, SZ_2G);
- } else {
- WARN_ON_ONCE(true);
- }
+ /*
+ * If the caller does not provide a DMA capable device then the IB
+ * device will be used. In this case the caller should fully setup the
+ * ibdev for DMA. This usually means using dma_virt_ops.
+ */
+#ifdef CONFIG_DMA_VIRT_OPS
+ if (!dma_device) {
+ device->dev.dma_ops = &dma_virt_ops;
+ dma_device = &device->dev;
}
+#endif
+ WARN_ON(!dma_device);
+ device->dma_device = dma_device;
+ WARN_ON(!device->dma_device->dma_parms);
}
/*
@@ -1241,7 +1206,6 @@ static int setup_device(struct ib_device *device)
struct ib_udata uhw = {.outlen = 0, .inlen = 0};
int ret;
- setup_dma_device(device);
ib_device_check_mandatory(device);
ret = setup_port_data(device);
@@ -1354,7 +1318,10 @@ static void prevent_dealloc_device(struct ib_device *ib_dev)
* ib_register_device - Register an IB device with IB core
* @device: Device to register
* @name: unique string device name. This may include a '%' which will
- * cause a unique index to be added to the passed device name.
+ * cause a unique index to be added to the passed device name.
+ * @dma_device: pointer to a DMA-capable device. If %NULL, then the IB
+ * device will be used. In this case the caller should fully
+ * setup the ibdev for DMA. This usually means using dma_virt_ops.
*
* Low-level drivers use ib_register_device() to register their
* devices with the IB core. All registered clients will receive a
@@ -1365,7 +1332,8 @@ static void prevent_dealloc_device(struct ib_device *ib_dev)
* asynchronously then the device pointer may become freed as soon as this
* function returns.
*/
-int ib_register_device(struct ib_device *device, const char *name)
+int ib_register_device(struct ib_device *device, const char *name,
+ struct device *dma_device)
{
int ret;
@@ -1373,6 +1341,7 @@ int ib_register_device(struct ib_device *device, const char *name)
if (ret)
return ret;
+ setup_dma_device(device, dma_device);
ret = setup_device(device);
if (ret)
return ret;
@@ -2697,7 +2666,9 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_OBJ_SIZE(dev_ops, ib_ah);
SET_OBJ_SIZE(dev_ops, ib_counters);
SET_OBJ_SIZE(dev_ops, ib_cq);
+ SET_OBJ_SIZE(dev_ops, ib_mw);
SET_OBJ_SIZE(dev_ops, ib_pd);
+ SET_OBJ_SIZE(dev_ops, ib_rwq_ind_table);
SET_OBJ_SIZE(dev_ops, ib_srq);
SET_OBJ_SIZE(dev_ops, ib_ucontext);
SET_OBJ_SIZE(dev_ops, ib_xrcd);
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index 6d3ed7c6e19e..ffe11b03724c 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -130,17 +130,6 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj,
lockdep_assert_held(&ufile->hw_destroy_rwsem);
assert_uverbs_usecnt(uobj, UVERBS_LOOKUP_WRITE);
- if (reason == RDMA_REMOVE_ABORT_HWOBJ) {
- reason = RDMA_REMOVE_ABORT;
- ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason,
- attrs);
- /*
- * Drivers are not permitted to ignore RDMA_REMOVE_ABORT, see
- * ib_is_destroy_retryable, cleanup_retryable == false here.
- */
- WARN_ON(ret);
- }
-
if (reason == RDMA_REMOVE_ABORT) {
WARN_ON(!list_empty(&uobj->list));
WARN_ON(!uobj->context);
@@ -674,11 +663,22 @@ void rdma_alloc_abort_uobject(struct ib_uobject *uobj,
bool hw_obj_valid)
{
struct ib_uverbs_file *ufile = uobj->ufile;
+ int ret;
+
+ if (hw_obj_valid) {
+ ret = uobj->uapi_object->type_class->destroy_hw(
+ uobj, RDMA_REMOVE_ABORT, attrs);
+ /*
+ * If the driver couldn't destroy the object then go ahead and
+ * commit it. Leaking objects that can't be destroyed is only
+ * done during FD close after the driver has a few more tries to
+ * destroy it.
+ */
+ if (WARN_ON(ret))
+ return rdma_alloc_commit_uobject(uobj, attrs);
+ }
- uverbs_destroy_uobject(uobj,
- hw_obj_valid ? RDMA_REMOVE_ABORT_HWOBJ :
- RDMA_REMOVE_ABORT,
- attrs);
+ uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT, attrs);
/* Matches the down_read in rdma_alloc_begin_uobject */
up_read(&ufile->hw_destroy_rwsem);
@@ -889,14 +889,14 @@ void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile,
if (!ufile->ucontext)
goto done;
- ufile->ucontext->closing = true;
ufile->ucontext->cleanup_retryable = true;
while (!list_empty(&ufile->uobjects))
if (__uverbs_cleanup_ufile(ufile, reason)) {
/*
* No entry was cleaned-up successfully during this
- * iteration
+ * iteration. It is a driver bug to fail destruction.
*/
+ WARN_ON(!list_empty(&ufile->uobjects));
break;
}
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index 62fbb0ae9cb4..4aeeaaed0f17 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -123,32 +123,6 @@ int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type)
}
EXPORT_SYMBOL(rdma_restrack_count);
-static void set_kern_name(struct rdma_restrack_entry *res)
-{
- struct ib_pd *pd;
-
- switch (res->type) {
- case RDMA_RESTRACK_QP:
- pd = container_of(res, struct ib_qp, res)->pd;
- if (!pd) {
- WARN_ONCE(true, "XRC QPs are not supported\n");
- /* Survive, despite the programmer's error */
- res->kern_name = " ";
- }
- break;
- case RDMA_RESTRACK_MR:
- pd = container_of(res, struct ib_mr, res)->pd;
- break;
- default:
- /* Other types set kern_name directly */
- pd = NULL;
- break;
- }
-
- if (pd)
- res->kern_name = pd->res.kern_name;
-}
-
static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
{
switch (res->type) {
@@ -173,36 +147,77 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
}
}
-void rdma_restrack_set_task(struct rdma_restrack_entry *res,
- const char *caller)
+/**
+ * rdma_restrack_attach_task() - attach the task onto this resource,
+ * valid for user space restrack entries.
+ * @res: resource entry
+ * @task: the task to attach
+ */
+static void rdma_restrack_attach_task(struct rdma_restrack_entry *res,
+ struct task_struct *task)
{
- if (caller) {
- res->kern_name = caller;
+ if (WARN_ON_ONCE(!task))
return;
- }
if (res->task)
put_task_struct(res->task);
- get_task_struct(current);
- res->task = current;
+ get_task_struct(task);
+ res->task = task;
+ res->user = true;
}
-EXPORT_SYMBOL(rdma_restrack_set_task);
/**
- * rdma_restrack_attach_task() - attach the task onto this resource
+ * rdma_restrack_set_name() - set the task for this resource
* @res: resource entry
- * @task: the task to attach, the current task will be used if it is NULL.
+ * @caller: kernel name, the current task will be used if the caller is NULL.
*/
-void rdma_restrack_attach_task(struct rdma_restrack_entry *res,
- struct task_struct *task)
+void rdma_restrack_set_name(struct rdma_restrack_entry *res, const char *caller)
{
- if (res->task)
- put_task_struct(res->task);
- get_task_struct(task);
- res->task = task;
+ if (caller) {
+ res->kern_name = caller;
+ return;
+ }
+
+ rdma_restrack_attach_task(res, current);
+}
+EXPORT_SYMBOL(rdma_restrack_set_name);
+
+/**
+ * rdma_restrack_parent_name() - set the restrack name properties based
+ * on parent restrack
+ * @dst: destination resource entry
+ * @parent: parent resource entry
+ */
+void rdma_restrack_parent_name(struct rdma_restrack_entry *dst,
+ const struct rdma_restrack_entry *parent)
+{
+ if (rdma_is_kernel_res(parent))
+ dst->kern_name = parent->kern_name;
+ else
+ rdma_restrack_attach_task(dst, parent->task);
+}
+EXPORT_SYMBOL(rdma_restrack_parent_name);
+
+/**
+ * rdma_restrack_new() - Initializes new restrack entry to allow _put() interface
+ * to release memory in fully automatic way.
+ * @res - Entry to initialize
+ * @type - REstrack type
+ */
+void rdma_restrack_new(struct rdma_restrack_entry *res,
+ enum rdma_restrack_type type)
+{
+ kref_init(&res->kref);
+ init_completion(&res->comp);
+ res->type = type;
}
+EXPORT_SYMBOL(rdma_restrack_new);
-static void rdma_restrack_add(struct rdma_restrack_entry *res)
+/**
+ * rdma_restrack_add() - add object to the reource tracking database
+ * @res: resource entry
+ */
+void rdma_restrack_add(struct rdma_restrack_entry *res)
{
struct ib_device *dev = res_to_dev(res);
struct rdma_restrack_root *rt;
@@ -213,8 +228,6 @@ static void rdma_restrack_add(struct rdma_restrack_entry *res)
rt = &dev->res[res->type];
- kref_init(&res->kref);
- init_completion(&res->comp);
if (res->type == RDMA_RESTRACK_QP) {
/* Special case to ensure that LQPN points to right QP */
struct ib_qp *qp = container_of(res, struct ib_qp, res);
@@ -236,38 +249,7 @@ static void rdma_restrack_add(struct rdma_restrack_entry *res)
if (!ret)
res->valid = true;
}
-
-/**
- * rdma_restrack_kadd() - add kernel object to the reource tracking database
- * @res: resource entry
- */
-void rdma_restrack_kadd(struct rdma_restrack_entry *res)
-{
- res->task = NULL;
- set_kern_name(res);
- res->user = false;
- rdma_restrack_add(res);
-}
-EXPORT_SYMBOL(rdma_restrack_kadd);
-
-/**
- * rdma_restrack_uadd() - add user object to the reource tracking database
- * @res: resource entry
- */
-void rdma_restrack_uadd(struct rdma_restrack_entry *res)
-{
- if ((res->type != RDMA_RESTRACK_CM_ID) &&
- (res->type != RDMA_RESTRACK_COUNTER))
- res->task = NULL;
-
- if (!res->task)
- rdma_restrack_set_task(res, NULL);
- res->kern_name = NULL;
-
- res->user = true;
- rdma_restrack_add(res);
-}
-EXPORT_SYMBOL(rdma_restrack_uadd);
+EXPORT_SYMBOL(rdma_restrack_add);
int __must_check rdma_restrack_get(struct rdma_restrack_entry *res)
{
@@ -305,6 +287,10 @@ static void restrack_release(struct kref *kref)
struct rdma_restrack_entry *res;
res = container_of(kref, struct rdma_restrack_entry, kref);
+ if (res->task) {
+ put_task_struct(res->task);
+ res->task = NULL;
+ }
complete(&res->comp);
}
@@ -314,14 +300,23 @@ int rdma_restrack_put(struct rdma_restrack_entry *res)
}
EXPORT_SYMBOL(rdma_restrack_put);
+/**
+ * rdma_restrack_del() - delete object from the reource tracking database
+ * @res: resource entry
+ */
void rdma_restrack_del(struct rdma_restrack_entry *res)
{
struct rdma_restrack_entry *old;
struct rdma_restrack_root *rt;
struct ib_device *dev;
- if (!res->valid)
- goto out;
+ if (!res->valid) {
+ if (res->task) {
+ put_task_struct(res->task);
+ res->task = NULL;
+ }
+ return;
+ }
dev = res_to_dev(res);
if (WARN_ON(!dev))
@@ -330,16 +325,12 @@ void rdma_restrack_del(struct rdma_restrack_entry *res)
rt = &dev->res[res->type];
old = xa_erase(&rt->xa, res->id);
+ if (res->type == RDMA_RESTRACK_MR || res->type == RDMA_RESTRACK_QP)
+ return;
WARN_ON(old != res);
res->valid = false;
rdma_restrack_put(res);
wait_for_completion(&res->comp);
-
-out:
- if (res->task) {
- put_task_struct(res->task);
- res->task = NULL;
- }
}
EXPORT_SYMBOL(rdma_restrack_del);
diff --git a/drivers/infiniband/core/restrack.h b/drivers/infiniband/core/restrack.h
index d084e5f89849..6a04fc41f738 100644
--- a/drivers/infiniband/core/restrack.h
+++ b/drivers/infiniband/core/restrack.h
@@ -25,6 +25,12 @@ struct rdma_restrack_root {
int rdma_restrack_init(struct ib_device *dev);
void rdma_restrack_clean(struct ib_device *dev);
-void rdma_restrack_attach_task(struct rdma_restrack_entry *res,
- struct task_struct *task);
+void rdma_restrack_add(struct rdma_restrack_entry *res);
+void rdma_restrack_del(struct rdma_restrack_entry *res);
+void rdma_restrack_new(struct rdma_restrack_entry *res,
+ enum rdma_restrack_type type);
+void rdma_restrack_set_name(struct rdma_restrack_entry *res,
+ const char *caller);
+void rdma_restrack_parent_name(struct rdma_restrack_entry *dst,
+ const struct rdma_restrack_entry *parent);
#endif /* _RDMA_CORE_RESTRACK_H_ */
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index c11e50510e49..914cddea525d 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -59,7 +59,7 @@ struct ib_port {
struct gid_attr_group *gid_attr_group;
struct attribute_group gid_group;
struct attribute_group *pkey_group;
- struct attribute_group *pma_table;
+ const struct attribute_group *pma_table;
struct attribute_group *hw_stats_ag;
struct rdma_hw_stats *hw_stats;
u8 port_num;
@@ -387,7 +387,8 @@ static ssize_t _show_port_gid_attr(
gid_attr = rdma_get_gid_attr(p->ibdev, p->port_num, tab_attr->index);
if (IS_ERR(gid_attr))
- return PTR_ERR(gid_attr);
+ /* -EINVAL is returned for user space compatibility reasons. */
+ return -EINVAL;
ret = print(gid_attr, buf);
rdma_put_gid_attr(gid_attr);
@@ -653,17 +654,17 @@ static struct attribute *pma_attrs_noietf[] = {
NULL
};
-static struct attribute_group pma_group = {
+static const struct attribute_group pma_group = {
.name = "counters",
.attrs = pma_attrs
};
-static struct attribute_group pma_group_ext = {
+static const struct attribute_group pma_group_ext = {
.name = "counters",
.attrs = pma_attrs_ext
};
-static struct attribute_group pma_group_noietf = {
+static const struct attribute_group pma_group_noietf = {
.name = "counters",
.attrs = pma_attrs_noietf
};
@@ -778,8 +779,8 @@ err:
* Figure out which counter table to use depending on
* the device capabilities.
*/
-static struct attribute_group *get_counter_table(struct ib_device *dev,
- int port_num)
+static const struct attribute_group *get_counter_table(struct ib_device *dev,
+ int port_num)
{
struct ib_class_port_info cpi;
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 1d184ea05eba..ffe2563ad345 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -80,7 +80,6 @@ struct ucma_file {
struct list_head ctx_list;
struct list_head event_list;
wait_queue_head_t poll_wait;
- struct workqueue_struct *close_wq;
};
struct ucma_context {
@@ -88,7 +87,7 @@ struct ucma_context {
struct completion comp;
refcount_t ref;
int events_reported;
- int backlog;
+ atomic_t backlog;
struct ucma_file *file;
struct rdma_cm_id *cm_id;
@@ -96,11 +95,6 @@ struct ucma_context {
u64 uid;
struct list_head list;
- struct list_head mc_list;
- /* mark that device is in process of destroying the internal HW
- * resources, protected by the ctx_table lock
- */
- int closing;
/* sync between removal event and id destroy, protected by file mut */
int destroying;
struct work_struct close_work;
@@ -113,23 +107,22 @@ struct ucma_multicast {
u64 uid;
u8 join_state;
- struct list_head list;
struct sockaddr_storage addr;
};
struct ucma_event {
struct ucma_context *ctx;
+ struct ucma_context *conn_req_ctx;
struct ucma_multicast *mc;
struct list_head list;
- struct rdma_cm_id *cm_id;
struct rdma_ucm_event_resp resp;
- struct work_struct close_work;
};
static DEFINE_XARRAY_ALLOC(ctx_table);
static DEFINE_XARRAY_ALLOC(multicast_table);
static const struct file_operations ucma_fops;
+static int __destroy_id(struct ucma_context *ctx);
static inline struct ucma_context *_ucma_find_context(int id,
struct ucma_file *file)
@@ -139,7 +132,7 @@ static inline struct ucma_context *_ucma_find_context(int id,
ctx = xa_load(&ctx_table, id);
if (!ctx)
ctx = ERR_PTR(-ENOENT);
- else if (ctx->file != file || !ctx->cm_id)
+ else if (ctx->file != file)
ctx = ERR_PTR(-EINVAL);
return ctx;
}
@@ -150,12 +143,9 @@ static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id)
xa_lock(&ctx_table);
ctx = _ucma_find_context(id, file);
- if (!IS_ERR(ctx)) {
- if (ctx->closing)
- ctx = ERR_PTR(-EIO);
- else
- refcount_inc(&ctx->ref);
- }
+ if (!IS_ERR(ctx))
+ if (!refcount_inc_not_zero(&ctx->ref))
+ ctx = ERR_PTR(-ENXIO);
xa_unlock(&ctx_table);
return ctx;
}
@@ -183,14 +173,6 @@ static struct ucma_context *ucma_get_ctx_dev(struct ucma_file *file, int id)
return ctx;
}
-static void ucma_close_event_id(struct work_struct *work)
-{
- struct ucma_event *uevent_close = container_of(work, struct ucma_event, close_work);
-
- rdma_destroy_id(uevent_close->cm_id);
- kfree(uevent_close);
-}
-
static void ucma_close_id(struct work_struct *work)
{
struct ucma_context *ctx = container_of(work, struct ucma_context, close_work);
@@ -203,6 +185,14 @@ static void ucma_close_id(struct work_struct *work)
wait_for_completion(&ctx->comp);
/* No new events will be generated after destroying the id. */
rdma_destroy_id(ctx->cm_id);
+
+ /*
+ * At this point ctx->ref is zero so the only place the ctx can be is in
+ * a uevent or in __destroy_id(). Since the former doesn't touch
+ * ctx->cm_id and the latter sync cancels this, there is no races with
+ * this store.
+ */
+ ctx->cm_id = NULL;
}
static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
@@ -216,39 +206,23 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
INIT_WORK(&ctx->close_work, ucma_close_id);
refcount_set(&ctx->ref, 1);
init_completion(&ctx->comp);
- INIT_LIST_HEAD(&ctx->mc_list);
+ /* So list_del() will work if we don't do ucma_finish_ctx() */
+ INIT_LIST_HEAD(&ctx->list);
ctx->file = file;
mutex_init(&ctx->mutex);
- if (xa_alloc(&ctx_table, &ctx->id, ctx, xa_limit_32b, GFP_KERNEL))
- goto error;
-
- list_add_tail(&ctx->list, &file->ctx_list);
+ if (xa_alloc(&ctx_table, &ctx->id, NULL, xa_limit_32b, GFP_KERNEL)) {
+ kfree(ctx);
+ return NULL;
+ }
return ctx;
-
-error:
- kfree(ctx);
- return NULL;
}
-static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx)
+static void ucma_finish_ctx(struct ucma_context *ctx)
{
- struct ucma_multicast *mc;
-
- mc = kzalloc(sizeof(*mc), GFP_KERNEL);
- if (!mc)
- return NULL;
-
- mc->ctx = ctx;
- if (xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b, GFP_KERNEL))
- goto error;
-
- list_add_tail(&mc->list, &ctx->mc_list);
- return mc;
-
-error:
- kfree(mc);
- return NULL;
+ lockdep_assert_held(&ctx->file->mut);
+ list_add_tail(&ctx->list, &ctx->file->ctx_list);
+ xa_store(&ctx_table, ctx->id, ctx, GFP_KERNEL);
}
static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst,
@@ -280,10 +254,15 @@ static void ucma_copy_ud_event(struct ib_device *device,
dst->qkey = src->qkey;
}
-static void ucma_set_event_context(struct ucma_context *ctx,
- struct rdma_cm_event *event,
- struct ucma_event *uevent)
+static struct ucma_event *ucma_create_uevent(struct ucma_context *ctx,
+ struct rdma_cm_event *event)
{
+ struct ucma_event *uevent;
+
+ uevent = kzalloc(sizeof(*uevent), GFP_KERNEL);
+ if (!uevent)
+ return NULL;
+
uevent->ctx = ctx;
switch (event->event) {
case RDMA_CM_EVENT_MULTICAST_JOIN:
@@ -298,44 +277,56 @@ static void ucma_set_event_context(struct ucma_context *ctx,
uevent->resp.id = ctx->id;
break;
}
+ uevent->resp.event = event->event;
+ uevent->resp.status = event->status;
+ if (ctx->cm_id->qp_type == IB_QPT_UD)
+ ucma_copy_ud_event(ctx->cm_id->device, &uevent->resp.param.ud,
+ &event->param.ud);
+ else
+ ucma_copy_conn_event(&uevent->resp.param.conn,
+ &event->param.conn);
+
+ uevent->resp.ece.vendor_id = event->ece.vendor_id;
+ uevent->resp.ece.attr_mod = event->ece.attr_mod;
+ return uevent;
}
-/* Called with file->mut locked for the relevant context. */
-static void ucma_removal_event_handler(struct rdma_cm_id *cm_id)
+static int ucma_connect_event_handler(struct rdma_cm_id *cm_id,
+ struct rdma_cm_event *event)
{
- struct ucma_context *ctx = cm_id->context;
- struct ucma_event *con_req_eve;
- int event_found = 0;
+ struct ucma_context *listen_ctx = cm_id->context;
+ struct ucma_context *ctx;
+ struct ucma_event *uevent;
- if (ctx->destroying)
- return;
+ if (!atomic_add_unless(&listen_ctx->backlog, -1, 0))
+ return -ENOMEM;
+ ctx = ucma_alloc_ctx(listen_ctx->file);
+ if (!ctx)
+ goto err_backlog;
+ ctx->cm_id = cm_id;
- /* only if context is pointing to cm_id that it owns it and can be
- * queued to be closed, otherwise that cm_id is an inflight one that
- * is part of that context event list pending to be detached and
- * reattached to its new context as part of ucma_get_event,
- * handled separately below.
- */
- if (ctx->cm_id == cm_id) {
- xa_lock(&ctx_table);
- ctx->closing = 1;
- xa_unlock(&ctx_table);
- queue_work(ctx->file->close_wq, &ctx->close_work);
- return;
- }
+ uevent = ucma_create_uevent(listen_ctx, event);
+ if (!uevent)
+ goto err_alloc;
+ uevent->conn_req_ctx = ctx;
+ uevent->resp.id = ctx->id;
- list_for_each_entry(con_req_eve, &ctx->file->event_list, list) {
- if (con_req_eve->cm_id == cm_id &&
- con_req_eve->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) {
- list_del(&con_req_eve->list);
- INIT_WORK(&con_req_eve->close_work, ucma_close_event_id);
- queue_work(ctx->file->close_wq, &con_req_eve->close_work);
- event_found = 1;
- break;
- }
- }
- if (!event_found)
- pr_err("ucma_removal_event_handler: warning: connect request event wasn't found\n");
+ ctx->cm_id->context = ctx;
+
+ mutex_lock(&ctx->file->mut);
+ ucma_finish_ctx(ctx);
+ list_add_tail(&uevent->list, &ctx->file->event_list);
+ mutex_unlock(&ctx->file->mut);
+ wake_up_interruptible(&ctx->file->poll_wait);
+ return 0;
+
+err_alloc:
+ xa_erase(&ctx_table, ctx->id);
+ kfree(ctx);
+err_backlog:
+ atomic_inc(&listen_ctx->backlog);
+ /* Returning error causes the new ID to be destroyed */
+ return -ENOMEM;
}
static int ucma_event_handler(struct rdma_cm_id *cm_id,
@@ -343,66 +334,38 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,
{
struct ucma_event *uevent;
struct ucma_context *ctx = cm_id->context;
- int ret = 0;
-
- uevent = kzalloc(sizeof(*uevent), GFP_KERNEL);
- if (!uevent)
- return event->event == RDMA_CM_EVENT_CONNECT_REQUEST;
- mutex_lock(&ctx->file->mut);
- uevent->cm_id = cm_id;
- ucma_set_event_context(ctx, event, uevent);
- uevent->resp.event = event->event;
- uevent->resp.status = event->status;
- if (cm_id->qp_type == IB_QPT_UD)
- ucma_copy_ud_event(cm_id->device, &uevent->resp.param.ud,
- &event->param.ud);
- else
- ucma_copy_conn_event(&uevent->resp.param.conn,
- &event->param.conn);
-
- uevent->resp.ece.vendor_id = event->ece.vendor_id;
- uevent->resp.ece.attr_mod = event->ece.attr_mod;
-
- if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) {
- if (!ctx->backlog) {
- ret = -ENOMEM;
- kfree(uevent);
- goto out;
- }
- ctx->backlog--;
- } else if (!ctx->uid || ctx->cm_id != cm_id) {
- /*
- * We ignore events for new connections until userspace has set
- * their context. This can only happen if an error occurs on a
- * new connection before the user accepts it. This is okay,
- * since the accept will just fail later. However, we do need
- * to release the underlying HW resources in case of a device
- * removal event.
- */
- if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
- ucma_removal_event_handler(cm_id);
+ if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST)
+ return ucma_connect_event_handler(cm_id, event);
- kfree(uevent);
- goto out;
+ /*
+ * We ignore events for new connections until userspace has set their
+ * context. This can only happen if an error occurs on a new connection
+ * before the user accepts it. This is okay, since the accept will just
+ * fail later. However, we do need to release the underlying HW
+ * resources in case of a device removal event.
+ */
+ if (ctx->uid) {
+ uevent = ucma_create_uevent(ctx, event);
+ if (!uevent)
+ return 0;
+
+ mutex_lock(&ctx->file->mut);
+ list_add_tail(&uevent->list, &ctx->file->event_list);
+ mutex_unlock(&ctx->file->mut);
+ wake_up_interruptible(&ctx->file->poll_wait);
}
- list_add_tail(&uevent->list, &ctx->file->event_list);
- wake_up_interruptible(&ctx->file->poll_wait);
- if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
- ucma_removal_event_handler(cm_id);
-out:
- mutex_unlock(&ctx->file->mut);
- return ret;
+ if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL && !ctx->destroying)
+ queue_work(system_unbound_wq, &ctx->close_work);
+ return 0;
}
static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
int in_len, int out_len)
{
- struct ucma_context *ctx;
struct rdma_ucm_get_event cmd;
struct ucma_event *uevent;
- int ret = 0;
/*
* Old 32 bit user space does not send the 4 byte padding in the
@@ -429,35 +392,25 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
mutex_lock(&file->mut);
}
- uevent = list_entry(file->event_list.next, struct ucma_event, list);
-
- if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) {
- ctx = ucma_alloc_ctx(file);
- if (!ctx) {
- ret = -ENOMEM;
- goto done;
- }
- uevent->ctx->backlog++;
- ctx->cm_id = uevent->cm_id;
- ctx->cm_id->context = ctx;
- uevent->resp.id = ctx->id;
- }
+ uevent = list_first_entry(&file->event_list, struct ucma_event, list);
if (copy_to_user(u64_to_user_ptr(cmd.response),
&uevent->resp,
min_t(size_t, out_len, sizeof(uevent->resp)))) {
- ret = -EFAULT;
- goto done;
+ mutex_unlock(&file->mut);
+ return -EFAULT;
}
list_del(&uevent->list);
uevent->ctx->events_reported++;
if (uevent->mc)
uevent->mc->events_reported++;
- kfree(uevent);
-done:
+ if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST)
+ atomic_inc(&uevent->ctx->backlog);
mutex_unlock(&file->mut);
- return ret;
+
+ kfree(uevent);
+ return 0;
}
static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_type)
@@ -498,58 +451,60 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
if (ret)
return ret;
- mutex_lock(&file->mut);
ctx = ucma_alloc_ctx(file);
- mutex_unlock(&file->mut);
if (!ctx)
return -ENOMEM;
ctx->uid = cmd.uid;
- cm_id = __rdma_create_id(current->nsproxy->net_ns,
- ucma_event_handler, ctx, cmd.ps, qp_type, NULL);
+ cm_id = rdma_create_user_id(ucma_event_handler, ctx, cmd.ps, qp_type);
if (IS_ERR(cm_id)) {
ret = PTR_ERR(cm_id);
goto err1;
}
+ ctx->cm_id = cm_id;
resp.id = ctx->id;
if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp))) {
- ret = -EFAULT;
- goto err2;
+ xa_erase(&ctx_table, ctx->id);
+ __destroy_id(ctx);
+ return -EFAULT;
}
- ctx->cm_id = cm_id;
+ mutex_lock(&file->mut);
+ ucma_finish_ctx(ctx);
+ mutex_unlock(&file->mut);
return 0;
-err2:
- rdma_destroy_id(cm_id);
err1:
xa_erase(&ctx_table, ctx->id);
- mutex_lock(&file->mut);
- list_del(&ctx->list);
- mutex_unlock(&file->mut);
kfree(ctx);
return ret;
}
static void ucma_cleanup_multicast(struct ucma_context *ctx)
{
- struct ucma_multicast *mc, *tmp;
+ struct ucma_multicast *mc;
+ unsigned long index;
- mutex_lock(&ctx->file->mut);
- list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) {
- list_del(&mc->list);
- xa_erase(&multicast_table, mc->id);
+ xa_for_each(&multicast_table, index, mc) {
+ if (mc->ctx != ctx)
+ continue;
+ /*
+ * At this point mc->ctx->ref is 0 so the mc cannot leave the
+ * lock on the reader and this is enough serialization
+ */
+ xa_erase(&multicast_table, index);
kfree(mc);
}
- mutex_unlock(&ctx->file->mut);
}
static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
{
struct ucma_event *uevent, *tmp;
+ rdma_lock_handler(mc->ctx->cm_id);
+ mutex_lock(&mc->ctx->file->mut);
list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) {
if (uevent->mc != mc)
continue;
@@ -557,6 +512,8 @@ static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
list_del(&uevent->list);
kfree(uevent);
}
+ mutex_unlock(&mc->ctx->file->mut);
+ rdma_unlock_handler(mc->ctx->cm_id);
}
/*
@@ -564,10 +521,6 @@ static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
* this point, no new events will be reported from the hardware. However, we
* still need to cleanup the UCMA context for this ID. Specifically, there
* might be events that have not yet been consumed by the user space software.
- * These might include pending connect requests which we have not completed
- * processing. We cannot call rdma_destroy_id while holding the lock of the
- * context (file->mut), as it might cause a deadlock. We therefore extract all
- * relevant events from the context pending events list while holding the
* mutex. After that we release them as needed.
*/
static int ucma_free_ctx(struct ucma_context *ctx)
@@ -576,31 +529,57 @@ static int ucma_free_ctx(struct ucma_context *ctx)
struct ucma_event *uevent, *tmp;
LIST_HEAD(list);
-
ucma_cleanup_multicast(ctx);
/* Cleanup events not yet reported to the user. */
mutex_lock(&ctx->file->mut);
list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) {
- if (uevent->ctx == ctx)
+ if (uevent->ctx == ctx || uevent->conn_req_ctx == ctx)
list_move_tail(&uevent->list, &list);
}
list_del(&ctx->list);
+ events_reported = ctx->events_reported;
mutex_unlock(&ctx->file->mut);
+ /*
+ * If this was a listening ID then any connections spawned from it
+ * that have not been delivered to userspace are cleaned up too.
+ * Must be done outside any locks.
+ */
list_for_each_entry_safe(uevent, tmp, &list, list) {
list_del(&uevent->list);
- if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST)
- rdma_destroy_id(uevent->cm_id);
+ if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST &&
+ uevent->conn_req_ctx != ctx)
+ __destroy_id(uevent->conn_req_ctx);
kfree(uevent);
}
- events_reported = ctx->events_reported;
mutex_destroy(&ctx->mutex);
kfree(ctx);
return events_reported;
}
+static int __destroy_id(struct ucma_context *ctx)
+{
+ /*
+ * If the refcount is already 0 then ucma_close_id() has already
+ * destroyed the cm_id, otherwise holding the refcount keeps cm_id
+ * valid. Prevent queue_work() from being called.
+ */
+ if (refcount_inc_not_zero(&ctx->ref)) {
+ rdma_lock_handler(ctx->cm_id);
+ ctx->destroying = 1;
+ rdma_unlock_handler(ctx->cm_id);
+ ucma_put_ctx(ctx);
+ }
+
+ cancel_work_sync(&ctx->close_work);
+ /* At this point it's guaranteed that there is no inflight closing task */
+ if (ctx->cm_id)
+ ucma_close_id(&ctx->close_work);
+ return ucma_free_ctx(ctx);
+}
+
static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
int in_len, int out_len)
{
@@ -624,24 +603,7 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- mutex_lock(&ctx->file->mut);
- ctx->destroying = 1;
- mutex_unlock(&ctx->file->mut);
-
- flush_workqueue(ctx->file->close_wq);
- /* At this point it's guaranteed that there is no inflight
- * closing task */
- xa_lock(&ctx_table);
- if (!ctx->closing) {
- xa_unlock(&ctx_table);
- ucma_put_ctx(ctx);
- wait_for_completion(&ctx->comp);
- rdma_destroy_id(ctx->cm_id);
- } else {
- xa_unlock(&ctx_table);
- }
-
- resp.events_reported = ucma_free_ctx(ctx);
+ resp.events_reported = __destroy_id(ctx);
if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
ret = -EFAULT;
@@ -1124,10 +1086,12 @@ static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- ctx->backlog = cmd.backlog > 0 && cmd.backlog < max_backlog ?
- cmd.backlog : max_backlog;
+ if (cmd.backlog <= 0 || cmd.backlog > max_backlog)
+ cmd.backlog = max_backlog;
+ atomic_set(&ctx->backlog, cmd.backlog);
+
mutex_lock(&ctx->mutex);
- ret = rdma_listen(ctx->cm_id, ctx->backlog);
+ ret = rdma_listen(ctx->cm_id, cmd.backlog);
mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
@@ -1160,16 +1124,20 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,
if (cmd.conn_param.valid) {
ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
- mutex_lock(&file->mut);
mutex_lock(&ctx->mutex);
- ret = __rdma_accept_ece(ctx->cm_id, &conn_param, NULL, &ece);
- mutex_unlock(&ctx->mutex);
- if (!ret)
+ rdma_lock_handler(ctx->cm_id);
+ ret = rdma_accept_ece(ctx->cm_id, &conn_param, &ece);
+ if (!ret) {
+ /* The uid must be set atomically with the handler */
ctx->uid = cmd.uid;
- mutex_unlock(&file->mut);
+ }
+ rdma_unlock_handler(ctx->cm_id);
+ mutex_unlock(&ctx->mutex);
} else {
mutex_lock(&ctx->mutex);
- ret = __rdma_accept_ece(ctx->cm_id, NULL, NULL, &ece);
+ rdma_lock_handler(ctx->cm_id);
+ ret = rdma_accept_ece(ctx->cm_id, NULL, &ece);
+ rdma_unlock_handler(ctx->cm_id);
mutex_unlock(&ctx->mutex);
}
ucma_put_ctx(ctx);
@@ -1482,44 +1450,52 @@ static ssize_t ucma_process_join(struct ucma_file *file,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- mutex_lock(&file->mut);
- mc = ucma_alloc_multicast(ctx);
+ mc = kzalloc(sizeof(*mc), GFP_KERNEL);
if (!mc) {
ret = -ENOMEM;
- goto err1;
+ goto err_put_ctx;
}
+
+ mc->ctx = ctx;
mc->join_state = join_state;
mc->uid = cmd->uid;
memcpy(&mc->addr, addr, cmd->addr_size);
+
+ if (xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b,
+ GFP_KERNEL)) {
+ ret = -ENOMEM;
+ goto err_free_mc;
+ }
+
mutex_lock(&ctx->mutex);
ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr,
join_state, mc);
mutex_unlock(&ctx->mutex);
if (ret)
- goto err2;
+ goto err_xa_erase;
resp.id = mc->id;
if (copy_to_user(u64_to_user_ptr(cmd->response),
&resp, sizeof(resp))) {
ret = -EFAULT;
- goto err3;
+ goto err_leave_multicast;
}
xa_store(&multicast_table, mc->id, mc, 0);
- mutex_unlock(&file->mut);
ucma_put_ctx(ctx);
return 0;
-err3:
+err_leave_multicast:
+ mutex_lock(&ctx->mutex);
rdma_leave_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr);
+ mutex_unlock(&ctx->mutex);
ucma_cleanup_mc_events(mc);
-err2:
+err_xa_erase:
xa_erase(&multicast_table, mc->id);
- list_del(&mc->list);
+err_free_mc:
kfree(mc);
-err1:
- mutex_unlock(&file->mut);
+err_put_ctx:
ucma_put_ctx(ctx);
return ret;
}
@@ -1581,7 +1557,7 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file,
mc = xa_load(&multicast_table, cmd.id);
if (!mc)
mc = ERR_PTR(-ENOENT);
- else if (mc->ctx->file != file)
+ else if (READ_ONCE(mc->ctx->file) != file)
mc = ERR_PTR(-EINVAL);
else if (!refcount_inc_not_zero(&mc->ctx->ref))
mc = ERR_PTR(-ENXIO);
@@ -1598,10 +1574,7 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file,
rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr);
mutex_unlock(&mc->ctx->mutex);
- mutex_lock(&mc->ctx->file->mut);
ucma_cleanup_mc_events(mc);
- list_del(&mc->list);
- mutex_unlock(&mc->ctx->file->mut);
ucma_put_ctx(mc->ctx);
resp.events_reported = mc->events_reported;
@@ -1614,45 +1587,15 @@ out:
return ret;
}
-static void ucma_lock_files(struct ucma_file *file1, struct ucma_file *file2)
-{
- /* Acquire mutex's based on pointer comparison to prevent deadlock. */
- if (file1 < file2) {
- mutex_lock(&file1->mut);
- mutex_lock_nested(&file2->mut, SINGLE_DEPTH_NESTING);
- } else {
- mutex_lock(&file2->mut);
- mutex_lock_nested(&file1->mut, SINGLE_DEPTH_NESTING);
- }
-}
-
-static void ucma_unlock_files(struct ucma_file *file1, struct ucma_file *file2)
-{
- if (file1 < file2) {
- mutex_unlock(&file2->mut);
- mutex_unlock(&file1->mut);
- } else {
- mutex_unlock(&file1->mut);
- mutex_unlock(&file2->mut);
- }
-}
-
-static void ucma_move_events(struct ucma_context *ctx, struct ucma_file *file)
-{
- struct ucma_event *uevent, *tmp;
-
- list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list)
- if (uevent->ctx == ctx)
- list_move_tail(&uevent->list, &file->event_list);
-}
-
static ssize_t ucma_migrate_id(struct ucma_file *new_file,
const char __user *inbuf,
int in_len, int out_len)
{
struct rdma_ucm_migrate_id cmd;
struct rdma_ucm_migrate_resp resp;
+ struct ucma_event *uevent, *tmp;
struct ucma_context *ctx;
+ LIST_HEAD(event_list);
struct fd f;
struct ucma_file *cur_file;
int ret = 0;
@@ -1668,40 +1611,53 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file,
ret = -EINVAL;
goto file_put;
}
+ cur_file = f.file->private_data;
/* Validate current fd and prevent destruction of id. */
- ctx = ucma_get_ctx(f.file->private_data, cmd.id);
+ ctx = ucma_get_ctx(cur_file, cmd.id);
if (IS_ERR(ctx)) {
ret = PTR_ERR(ctx);
goto file_put;
}
- cur_file = ctx->file;
- if (cur_file == new_file) {
- resp.events_reported = ctx->events_reported;
- goto response;
- }
-
+ rdma_lock_handler(ctx->cm_id);
/*
- * Migrate events between fd's, maintaining order, and avoiding new
- * events being added before existing events.
+ * ctx->file can only be changed under the handler & xa_lock. xa_load()
+ * must be checked again to ensure the ctx hasn't begun destruction
+ * since the ucma_get_ctx().
*/
- ucma_lock_files(cur_file, new_file);
xa_lock(&ctx_table);
-
- list_move_tail(&ctx->list, &new_file->ctx_list);
- ucma_move_events(ctx, new_file);
+ if (_ucma_find_context(cmd.id, cur_file) != ctx) {
+ xa_unlock(&ctx_table);
+ ret = -ENOENT;
+ goto err_unlock;
+ }
ctx->file = new_file;
+ xa_unlock(&ctx_table);
+
+ mutex_lock(&cur_file->mut);
+ list_del(&ctx->list);
+ /*
+ * At this point lock_handler() prevents addition of new uevents for
+ * this ctx.
+ */
+ list_for_each_entry_safe(uevent, tmp, &cur_file->event_list, list)
+ if (uevent->ctx == ctx)
+ list_move_tail(&uevent->list, &event_list);
resp.events_reported = ctx->events_reported;
+ mutex_unlock(&cur_file->mut);
- xa_unlock(&ctx_table);
- ucma_unlock_files(cur_file, new_file);
+ mutex_lock(&new_file->mut);
+ list_add_tail(&ctx->list, &new_file->ctx_list);
+ list_splice_tail(&event_list, &new_file->event_list);
+ mutex_unlock(&new_file->mut);
-response:
if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
ret = -EFAULT;
+err_unlock:
+ rdma_unlock_handler(ctx->cm_id);
ucma_put_ctx(ctx);
file_put:
fdput(f);
@@ -1801,13 +1757,6 @@ static int ucma_open(struct inode *inode, struct file *filp)
if (!file)
return -ENOMEM;
- file->close_wq = alloc_ordered_workqueue("ucma_close_id",
- WQ_MEM_RECLAIM);
- if (!file->close_wq) {
- kfree(file);
- return -ENOMEM;
- }
-
INIT_LIST_HEAD(&file->event_list);
INIT_LIST_HEAD(&file->ctx_list);
init_waitqueue_head(&file->poll_wait);
@@ -1822,37 +1771,22 @@ static int ucma_open(struct inode *inode, struct file *filp)
static int ucma_close(struct inode *inode, struct file *filp)
{
struct ucma_file *file = filp->private_data;
- struct ucma_context *ctx, *tmp;
- mutex_lock(&file->mut);
- list_for_each_entry_safe(ctx, tmp, &file->ctx_list, list) {
- ctx->destroying = 1;
- mutex_unlock(&file->mut);
+ /*
+ * All paths that touch ctx_list or ctx_list starting from write() are
+ * prevented by this being a FD release function. The list_add_tail() in
+ * ucma_connect_event_handler() can run concurrently, however it only
+ * adds to the list *after* a listening ID. By only reading the first of
+ * the list, and relying on __destroy_id() to block
+ * ucma_connect_event_handler(), no additional locking is needed.
+ */
+ while (!list_empty(&file->ctx_list)) {
+ struct ucma_context *ctx = list_first_entry(
+ &file->ctx_list, struct ucma_context, list);
xa_erase(&ctx_table, ctx->id);
- flush_workqueue(file->close_wq);
- /* At that step once ctx was marked as destroying and workqueue
- * was flushed we are safe from any inflights handlers that
- * might put other closing task.
- */
- xa_lock(&ctx_table);
- if (!ctx->closing) {
- xa_unlock(&ctx_table);
- ucma_put_ctx(ctx);
- wait_for_completion(&ctx->comp);
- /* rdma_destroy_id ensures that no event handlers are
- * inflight for that id before releasing it.
- */
- rdma_destroy_id(ctx->cm_id);
- } else {
- xa_unlock(&ctx_table);
- }
-
- ucma_free_ctx(ctx);
- mutex_lock(&file->mut);
+ __destroy_id(ctx);
}
- mutex_unlock(&file->mut);
- destroy_workqueue(file->close_wq);
kfree(file);
return 0;
}
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 831bff8d52e5..e9fecbdf391b 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -39,6 +39,7 @@
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
+#include <linux/count_zeros.h>
#include <rdma/ib_umem_odp.h>
#include "uverbs.h"
@@ -60,73 +61,6 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
sg_free_table(&umem->sg_head);
}
-/* ib_umem_add_sg_table - Add N contiguous pages to scatter table
- *
- * sg: current scatterlist entry
- * page_list: array of npage struct page pointers
- * npages: number of pages in page_list
- * max_seg_sz: maximum segment size in bytes
- * nents: [out] number of entries in the scatterlist
- *
- * Return new end of scatterlist
- */
-static struct scatterlist *ib_umem_add_sg_table(struct scatterlist *sg,
- struct page **page_list,
- unsigned long npages,
- unsigned int max_seg_sz,
- int *nents)
-{
- unsigned long first_pfn;
- unsigned long i = 0;
- bool update_cur_sg = false;
- bool first = !sg_page(sg);
-
- /* Check if new page_list is contiguous with end of previous page_list.
- * sg->length here is a multiple of PAGE_SIZE and sg->offset is 0.
- */
- if (!first && (page_to_pfn(sg_page(sg)) + (sg->length >> PAGE_SHIFT) ==
- page_to_pfn(page_list[0])))
- update_cur_sg = true;
-
- while (i != npages) {
- unsigned long len;
- struct page *first_page = page_list[i];
-
- first_pfn = page_to_pfn(first_page);
-
- /* Compute the number of contiguous pages we have starting
- * at i
- */
- for (len = 0; i != npages &&
- first_pfn + len == page_to_pfn(page_list[i]) &&
- len < (max_seg_sz >> PAGE_SHIFT);
- len++)
- i++;
-
- /* Squash N contiguous pages from page_list into current sge */
- if (update_cur_sg) {
- if ((max_seg_sz - sg->length) >= (len << PAGE_SHIFT)) {
- sg_set_page(sg, sg_page(sg),
- sg->length + (len << PAGE_SHIFT),
- 0);
- update_cur_sg = false;
- continue;
- }
- update_cur_sg = false;
- }
-
- /* Squash N contiguous pages into next sge or first sge */
- if (!first)
- sg = sg_next(sg);
-
- (*nents)++;
- sg_set_page(sg, first_page, len << PAGE_SHIFT, 0);
- first = false;
- }
-
- return sg;
-}
-
/**
* ib_umem_find_best_pgsz - Find best HW page size to use for this MR
*
@@ -146,18 +80,28 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
unsigned long virt)
{
struct scatterlist *sg;
- unsigned int best_pg_bit;
unsigned long va, pgoff;
dma_addr_t mask;
int i;
+ /* rdma_for_each_block() has a bug if the page size is smaller than the
+ * page size used to build the umem. For now prevent smaller page sizes
+ * from being returned.
+ */
+ pgsz_bitmap &= GENMASK(BITS_PER_LONG - 1, PAGE_SHIFT);
+
/* At minimum, drivers must support PAGE_SIZE or smaller */
if (WARN_ON(!(pgsz_bitmap & GENMASK(PAGE_SHIFT, 0))))
return 0;
- va = virt;
- /* max page size not to exceed MR length */
- mask = roundup_pow_of_two(umem->length);
+ umem->iova = va = virt;
+ /* The best result is the smallest page size that results in the minimum
+ * number of required pages. Compute the largest page size that could
+ * work based on VA address bits that don't change.
+ */
+ mask = pgsz_bitmap &
+ GENMASK(BITS_PER_LONG - 1,
+ bits_per((umem->length - 1 + virt) ^ virt));
/* offset into first SGL */
pgoff = umem->address & ~PAGE_MASK;
@@ -175,9 +119,14 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
mask |= va;
pgoff = 0;
}
- best_pg_bit = rdma_find_pg_bit(mask, pgsz_bitmap);
- return BIT_ULL(best_pg_bit);
+ /* The mask accumulates 1's in each position where the VA and physical
+ * address differ, thus the length of trailing 0 is the largest page
+ * size that can pass the VA through to the physical.
+ */
+ if (mask)
+ pgsz_bitmap &= GENMASK(count_trailing_zeros(mask), 0);
+ return rounddown_pow_of_two(pgsz_bitmap);
}
EXPORT_SYMBOL(ib_umem_find_best_pgsz);
@@ -201,7 +150,7 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
struct mm_struct *mm;
unsigned long npages;
int ret;
- struct scatterlist *sg;
+ struct scatterlist *sg = NULL;
unsigned int gup_flags = FOLL_WRITE;
/*
@@ -224,6 +173,11 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
umem->ibdev = device;
umem->length = size;
umem->address = addr;
+ /*
+ * Drivers should call ib_umem_find_best_pgsz() to set the iova
+ * correctly.
+ */
+ umem->iova = addr;
umem->writable = ib_access_writable(access);
umem->owning_mm = mm = current->mm;
mmgrab(mm);
@@ -251,15 +205,9 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
cur_base = addr & PAGE_MASK;
- ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL);
- if (ret)
- goto vma;
-
if (!umem->writable)
gup_flags |= FOLL_FORCE;
- sg = umem->sg_head.sgl;
-
while (npages) {
cond_resched();
ret = pin_user_pages_fast(cur_base,
@@ -271,15 +219,19 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
goto umem_release;
cur_base += ret * PAGE_SIZE;
- npages -= ret;
-
- sg = ib_umem_add_sg_table(sg, page_list, ret,
- dma_get_max_seg_size(device->dma_device),
- &umem->sg_nents);
+ npages -= ret;
+ sg = __sg_alloc_table_from_pages(
+ &umem->sg_head, page_list, ret, 0, ret << PAGE_SHIFT,
+ dma_get_max_seg_size(device->dma_device), sg, npages,
+ GFP_KERNEL);
+ umem->sg_nents = umem->sg_head.nents;
+ if (IS_ERR(sg)) {
+ unpin_user_pages_dirty_lock(page_list, ret, 0);
+ ret = PTR_ERR(sg);
+ goto umem_release;
+ }
}
- sg_mark_end(sg);
-
if (access & IB_ACCESS_RELAXED_ORDERING)
dma_attr |= DMA_ATTR_WEAK_ORDERING;
@@ -297,7 +249,6 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
umem_release:
__ib_umem_release(device, umem, 0);
-vma:
atomic64_sub(ib_umem_num_pages(umem), &mm->pinned_vm);
out:
free_page((unsigned long) page_list);
@@ -329,18 +280,6 @@ void ib_umem_release(struct ib_umem *umem)
}
EXPORT_SYMBOL(ib_umem_release);
-int ib_umem_page_count(struct ib_umem *umem)
-{
- int i, n = 0;
- struct scatterlist *sg;
-
- for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i)
- n += sg_dma_len(sg) >> PAGE_SHIFT;
-
- return n;
-}
-EXPORT_SYMBOL(ib_umem_page_count);
-
/*
* Copy from the given ib_umem's pages to the given buffer.
*
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index cc6b4befde7c..323f6cf00682 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -40,6 +40,7 @@
#include <linux/vmalloc.h>
#include <linux/hugetlb.h>
#include <linux/interval_tree.h>
+#include <linux/hmm.h>
#include <linux/pagemap.h>
#include <rdma/ib_verbs.h>
@@ -60,7 +61,7 @@ static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp,
size_t page_size = 1UL << umem_odp->page_shift;
unsigned long start;
unsigned long end;
- size_t pages;
+ size_t ndmas, npfns;
start = ALIGN_DOWN(umem_odp->umem.address, page_size);
if (check_add_overflow(umem_odp->umem.address,
@@ -71,20 +72,21 @@ static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp,
if (unlikely(end < page_size))
return -EOVERFLOW;
- pages = (end - start) >> umem_odp->page_shift;
- if (!pages)
+ ndmas = (end - start) >> umem_odp->page_shift;
+ if (!ndmas)
return -EINVAL;
- umem_odp->page_list = kvcalloc(
- pages, sizeof(*umem_odp->page_list), GFP_KERNEL);
- if (!umem_odp->page_list)
+ npfns = (end - start) >> PAGE_SHIFT;
+ umem_odp->pfn_list = kvcalloc(
+ npfns, sizeof(*umem_odp->pfn_list), GFP_KERNEL);
+ if (!umem_odp->pfn_list)
return -ENOMEM;
umem_odp->dma_list = kvcalloc(
- pages, sizeof(*umem_odp->dma_list), GFP_KERNEL);
+ ndmas, sizeof(*umem_odp->dma_list), GFP_KERNEL);
if (!umem_odp->dma_list) {
ret = -ENOMEM;
- goto out_page_list;
+ goto out_pfn_list;
}
ret = mmu_interval_notifier_insert(&umem_odp->notifier,
@@ -98,8 +100,8 @@ static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp,
out_dma_list:
kvfree(umem_odp->dma_list);
-out_page_list:
- kvfree(umem_odp->page_list);
+out_pfn_list:
+ kvfree(umem_odp->pfn_list);
return ret;
}
@@ -276,7 +278,7 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
mutex_unlock(&umem_odp->umem_mutex);
mmu_interval_notifier_remove(&umem_odp->notifier);
kvfree(umem_odp->dma_list);
- kvfree(umem_odp->page_list);
+ kvfree(umem_odp->pfn_list);
}
put_pid(umem_odp->tgid);
kfree(umem_odp);
@@ -287,87 +289,56 @@ EXPORT_SYMBOL(ib_umem_odp_release);
* Map for DMA and insert a single page into the on-demand paging page tables.
*
* @umem: the umem to insert the page to.
- * @page_index: index in the umem to add the page to.
+ * @dma_index: index in the umem to add the dma to.
* @page: the page struct to map and add.
* @access_mask: access permissions needed for this page.
* @current_seq: sequence number for synchronization with invalidations.
* the sequence number is taken from
* umem_odp->notifiers_seq.
*
- * The function returns -EFAULT if the DMA mapping operation fails. It returns
- * -EAGAIN if a concurrent invalidation prevents us from updating the page.
+ * The function returns -EFAULT if the DMA mapping operation fails.
*
- * The page is released via put_page even if the operation failed. For on-demand
- * pinning, the page is released whenever it isn't stored in the umem.
*/
static int ib_umem_odp_map_dma_single_page(
struct ib_umem_odp *umem_odp,
- unsigned int page_index,
+ unsigned int dma_index,
struct page *page,
- u64 access_mask,
- unsigned long current_seq)
+ u64 access_mask)
{
struct ib_device *dev = umem_odp->umem.ibdev;
- dma_addr_t dma_addr;
- int ret = 0;
+ dma_addr_t *dma_addr = &umem_odp->dma_list[dma_index];
- if (mmu_interval_check_retry(&umem_odp->notifier, current_seq)) {
- ret = -EAGAIN;
- goto out;
- }
- if (!(umem_odp->dma_list[page_index])) {
- dma_addr =
- ib_dma_map_page(dev, page, 0, BIT(umem_odp->page_shift),
- DMA_BIDIRECTIONAL);
- if (ib_dma_mapping_error(dev, dma_addr)) {
- ret = -EFAULT;
- goto out;
- }
- umem_odp->dma_list[page_index] = dma_addr | access_mask;
- umem_odp->page_list[page_index] = page;
- umem_odp->npages++;
- } else if (umem_odp->page_list[page_index] == page) {
- umem_odp->dma_list[page_index] |= access_mask;
- } else {
+ if (*dma_addr) {
/*
- * This is a race here where we could have done:
- *
- * CPU0 CPU1
- * get_user_pages()
- * invalidate()
- * page_fault()
- * mutex_lock(umem_mutex)
- * page from GUP != page in ODP
- *
- * It should be prevented by the retry test above as reading
- * the seq number should be reliable under the
- * umem_mutex. Thus something is really not working right if
- * things get here.
+ * If the page is already dma mapped it means it went through
+ * a non-invalidating trasition, like read-only to writable.
+ * Resync the flags.
*/
- WARN(true,
- "Got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n",
- umem_odp->page_list[page_index], page);
- ret = -EAGAIN;
+ *dma_addr = (*dma_addr & ODP_DMA_ADDR_MASK) | access_mask;
+ return 0;
}
-out:
- put_page(page);
- return ret;
+ *dma_addr = ib_dma_map_page(dev, page, 0, 1 << umem_odp->page_shift,
+ DMA_BIDIRECTIONAL);
+ if (ib_dma_mapping_error(dev, *dma_addr)) {
+ *dma_addr = 0;
+ return -EFAULT;
+ }
+ umem_odp->npages++;
+ *dma_addr |= access_mask;
+ return 0;
}
/**
- * ib_umem_odp_map_dma_pages - Pin and DMA map userspace memory in an ODP MR.
+ * ib_umem_odp_map_dma_and_lock - DMA map userspace memory in an ODP MR and lock it.
*
- * Pins the range of pages passed in the argument, and maps them to
- * DMA addresses. The DMA addresses of the mapped pages is updated in
- * umem_odp->dma_list.
+ * Maps the range passed in the argument to DMA addresses.
+ * The DMA addresses of the mapped pages is updated in umem_odp->dma_list.
+ * Upon success the ODP MR will be locked to let caller complete its device
+ * page table update.
*
* Returns the number of pages mapped in success, negative error code
* for failure.
- * An -EAGAIN error code is returned when a concurrent mmu notifier prevents
- * the function from completing its task.
- * An -ENOENT error code indicates that userspace process is being terminated
- * and mm was already destroyed.
* @umem_odp: the umem to map and pin
* @user_virt: the address from which we need to map.
* @bcnt: the minimal number of bytes to pin and map. The mapping might be
@@ -376,21 +347,19 @@ out:
* the return value.
* @access_mask: bit mask of the requested access permissions for the given
* range.
- * @current_seq: the MMU notifiers sequance value for synchronization with
- * invalidations. the sequance number is read from
- * umem_odp->notifiers_seq before calling this function
+ * @fault: is faulting required for the given range
*/
-int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
- u64 bcnt, u64 access_mask,
- unsigned long current_seq)
+int ib_umem_odp_map_dma_and_lock(struct ib_umem_odp *umem_odp, u64 user_virt,
+ u64 bcnt, u64 access_mask, bool fault)
+ __acquires(&umem_odp->umem_mutex)
{
struct task_struct *owning_process = NULL;
struct mm_struct *owning_mm = umem_odp->umem.owning_mm;
- struct page **local_page_list = NULL;
- u64 page_mask, off;
- int j, k, ret = 0, start_idx, npages = 0;
- unsigned int flags = 0, page_shift;
- phys_addr_t p = 0;
+ int pfn_index, dma_index, ret = 0, start_idx;
+ unsigned int page_shift, hmm_order, pfn_start_idx;
+ unsigned long num_pfns, current_seq;
+ struct hmm_range range = {};
+ unsigned long timeout;
if (access_mask == 0)
return -EINVAL;
@@ -399,15 +368,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
user_virt + bcnt > ib_umem_end(umem_odp))
return -EFAULT;
- local_page_list = (struct page **)__get_free_page(GFP_KERNEL);
- if (!local_page_list)
- return -ENOMEM;
-
page_shift = umem_odp->page_shift;
- page_mask = ~(BIT(page_shift) - 1);
- off = user_virt & (~page_mask);
- user_virt = user_virt & page_mask;
- bcnt += off; /* Charge for the first page offset as well. */
/*
* owning_process is allowed to be NULL, this means somehow the mm is
@@ -420,99 +381,104 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
goto out_put_task;
}
- if (access_mask & ODP_WRITE_ALLOWED_BIT)
- flags |= FOLL_WRITE;
+ range.notifier = &umem_odp->notifier;
+ range.start = ALIGN_DOWN(user_virt, 1UL << page_shift);
+ range.end = ALIGN(user_virt + bcnt, 1UL << page_shift);
+ pfn_start_idx = (range.start - ib_umem_start(umem_odp)) >> PAGE_SHIFT;
+ num_pfns = (range.end - range.start) >> PAGE_SHIFT;
+ if (fault) {
+ range.default_flags = HMM_PFN_REQ_FAULT;
- start_idx = (user_virt - ib_umem_start(umem_odp)) >> page_shift;
- k = start_idx;
+ if (access_mask & ODP_WRITE_ALLOWED_BIT)
+ range.default_flags |= HMM_PFN_REQ_WRITE;
+ }
- while (bcnt > 0) {
- const size_t gup_num_pages = min_t(size_t,
- ALIGN(bcnt, PAGE_SIZE) / PAGE_SIZE,
- PAGE_SIZE / sizeof(struct page *));
+ range.hmm_pfns = &(umem_odp->pfn_list[pfn_start_idx]);
+ timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
- mmap_read_lock(owning_mm);
- /*
- * Note: this might result in redundent page getting. We can
- * avoid this by checking dma_list to be 0 before calling
- * get_user_pages. However, this make the code much more
- * complex (and doesn't gain us much performance in most use
- * cases).
- */
- npages = get_user_pages_remote(owning_mm,
- user_virt, gup_num_pages,
- flags, local_page_list, NULL, NULL);
- mmap_read_unlock(owning_mm);
-
- if (npages < 0) {
- if (npages != -EAGAIN)
- pr_warn("fail to get %zu user pages with error %d\n", gup_num_pages, npages);
- else
- pr_debug("fail to get %zu user pages with error %d\n", gup_num_pages, npages);
- break;
- }
+retry:
+ current_seq = range.notifier_seq =
+ mmu_interval_read_begin(&umem_odp->notifier);
- bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt);
- mutex_lock(&umem_odp->umem_mutex);
- for (j = 0; j < npages; j++, user_virt += PAGE_SIZE) {
- if (user_virt & ~page_mask) {
- p += PAGE_SIZE;
- if (page_to_phys(local_page_list[j]) != p) {
- ret = -EFAULT;
- break;
- }
- put_page(local_page_list[j]);
- continue;
- }
+ mmap_read_lock(owning_mm);
+ ret = hmm_range_fault(&range);
+ mmap_read_unlock(owning_mm);
+ if (unlikely(ret)) {
+ if (ret == -EBUSY && !time_after(jiffies, timeout))
+ goto retry;
+ goto out_put_mm;
+ }
- ret = ib_umem_odp_map_dma_single_page(
- umem_odp, k, local_page_list[j],
- access_mask, current_seq);
- if (ret < 0) {
- if (ret != -EAGAIN)
- pr_warn("ib_umem_odp_map_dma_single_page failed with error %d\n", ret);
- else
- pr_debug("ib_umem_odp_map_dma_single_page failed with error %d\n", ret);
- break;
- }
+ start_idx = (range.start - ib_umem_start(umem_odp)) >> page_shift;
+ dma_index = start_idx;
- p = page_to_phys(local_page_list[j]);
- k++;
- }
+ mutex_lock(&umem_odp->umem_mutex);
+ if (mmu_interval_read_retry(&umem_odp->notifier, current_seq)) {
mutex_unlock(&umem_odp->umem_mutex);
+ goto retry;
+ }
- if (ret < 0) {
+ for (pfn_index = 0; pfn_index < num_pfns;
+ pfn_index += 1 << (page_shift - PAGE_SHIFT), dma_index++) {
+
+ if (fault) {
/*
- * Release pages, remembering that the first page
- * to hit an error was already released by
- * ib_umem_odp_map_dma_single_page().
+ * Since we asked for hmm_range_fault() to populate
+ * pages it shouldn't return an error entry on success.
*/
- if (npages - (j + 1) > 0)
- release_pages(&local_page_list[j+1],
- npages - (j + 1));
+ WARN_ON(range.hmm_pfns[pfn_index] & HMM_PFN_ERROR);
+ WARN_ON(!(range.hmm_pfns[pfn_index] & HMM_PFN_VALID));
+ } else {
+ if (!(range.hmm_pfns[pfn_index] & HMM_PFN_VALID)) {
+ WARN_ON(umem_odp->dma_list[dma_index]);
+ continue;
+ }
+ access_mask = ODP_READ_ALLOWED_BIT;
+ if (range.hmm_pfns[pfn_index] & HMM_PFN_WRITE)
+ access_mask |= ODP_WRITE_ALLOWED_BIT;
+ }
+
+ hmm_order = hmm_pfn_to_map_order(range.hmm_pfns[pfn_index]);
+ /* If a hugepage was detected and ODP wasn't set for, the umem
+ * page_shift will be used, the opposite case is an error.
+ */
+ if (hmm_order + PAGE_SHIFT < page_shift) {
+ ret = -EINVAL;
+ ibdev_dbg(umem_odp->umem.ibdev,
+ "%s: un-expected hmm_order %d, page_shift %d\n",
+ __func__, hmm_order, page_shift);
break;
}
- }
- if (ret >= 0) {
- if (npages < 0 && k == start_idx)
- ret = npages;
- else
- ret = k - start_idx;
+ ret = ib_umem_odp_map_dma_single_page(
+ umem_odp, dma_index, hmm_pfn_to_page(range.hmm_pfns[pfn_index]),
+ access_mask);
+ if (ret < 0) {
+ ibdev_dbg(umem_odp->umem.ibdev,
+ "ib_umem_odp_map_dma_single_page failed with error %d\n", ret);
+ break;
+ }
}
+ /* upon sucesss lock should stay on hold for the callee */
+ if (!ret)
+ ret = dma_index - start_idx;
+ else
+ mutex_unlock(&umem_odp->umem_mutex);
+out_put_mm:
mmput(owning_mm);
out_put_task:
if (owning_process)
put_task_struct(owning_process);
- free_page((unsigned long)local_page_list);
return ret;
}
-EXPORT_SYMBOL(ib_umem_odp_map_dma_pages);
+EXPORT_SYMBOL(ib_umem_odp_map_dma_and_lock);
void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
u64 bound)
{
+ dma_addr_t dma_addr;
+ dma_addr_t dma;
int idx;
u64 addr;
struct ib_device *dev = umem_odp->umem.ibdev;
@@ -521,20 +487,16 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
virt = max_t(u64, virt, ib_umem_start(umem_odp));
bound = min_t(u64, bound, ib_umem_end(umem_odp));
- /* Note that during the run of this function, the
- * notifiers_count of the MR is > 0, preventing any racing
- * faults from completion. We might be racing with other
- * invalidations, so we must make sure we free each page only
- * once. */
for (addr = virt; addr < bound; addr += BIT(umem_odp->page_shift)) {
idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
- if (umem_odp->page_list[idx]) {
- struct page *page = umem_odp->page_list[idx];
- dma_addr_t dma = umem_odp->dma_list[idx];
- dma_addr_t dma_addr = dma & ODP_DMA_ADDR_MASK;
+ dma = umem_odp->dma_list[idx];
- WARN_ON(!dma_addr);
+ /* The access flags guaranteed a valid DMA address in case was NULL */
+ if (dma) {
+ unsigned long pfn_idx = (addr - ib_umem_start(umem_odp)) >> PAGE_SHIFT;
+ struct page *page = hmm_pfn_to_page(umem_odp->pfn_list[pfn_idx]);
+ dma_addr = dma & ODP_DMA_ADDR_MASK;
ib_dma_unmap_page(dev, dma_addr,
BIT(umem_odp->page_shift),
DMA_BIDIRECTIONAL);
@@ -551,7 +513,6 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
*/
set_page_dirty(head_page);
}
- umem_odp->page_list[idx] = NULL;
umem_odp->dma_list[idx] = 0;
umem_odp->npages--;
}
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 2fbc583d5bdd..418d133a8fb0 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -218,10 +218,12 @@ int ib_alloc_ucontext(struct uverbs_attr_bundle *attrs)
if (!ucontext)
return -ENOMEM;
- ucontext->res.type = RDMA_RESTRACK_CTX;
ucontext->device = ib_dev;
ucontext->ufile = ufile;
xa_init_flags(&ucontext->mmap_xa, XA_FLAGS_ALLOC);
+
+ rdma_restrack_new(&ucontext->res, RDMA_RESTRACK_CTX);
+ rdma_restrack_set_name(&ucontext->res, NULL);
attrs->context = ucontext;
return 0;
}
@@ -250,7 +252,7 @@ int ib_init_ucontext(struct uverbs_attr_bundle *attrs)
if (ret)
goto err_uncharge;
- rdma_restrack_uadd(&ucontext->res);
+ rdma_restrack_add(&ucontext->res);
/*
* Make sure that ib_uverbs_get_ucontext() sees the pointer update
@@ -313,6 +315,7 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
err_uobj:
rdma_alloc_abort_uobject(uobj, attrs, false);
err_ucontext:
+ rdma_restrack_put(&attrs->context->res);
kfree(attrs->context);
attrs->context = NULL;
return ret;
@@ -439,12 +442,14 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs)
pd->device = ib_dev;
pd->uobject = uobj;
atomic_set(&pd->usecnt, 0);
- pd->res.type = RDMA_RESTRACK_PD;
+
+ rdma_restrack_new(&pd->res, RDMA_RESTRACK_PD);
+ rdma_restrack_set_name(&pd->res, NULL);
ret = ib_dev->ops.alloc_pd(pd, &attrs->driver_udata);
if (ret)
goto err_alloc;
- rdma_restrack_uadd(&pd->res);
+ rdma_restrack_add(&pd->res);
uobj->object = pd;
uobj_finalize_uobj_create(uobj, attrs);
@@ -453,6 +458,7 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs)
return uverbs_response(attrs, &resp, sizeof(resp));
err_alloc:
+ rdma_restrack_put(&pd->res);
kfree(pd);
err:
uobj_alloc_abort(uobj, attrs);
@@ -742,9 +748,11 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs)
mr->sig_attrs = NULL;
mr->uobject = uobj;
atomic_inc(&pd->usecnt);
- mr->res.type = RDMA_RESTRACK_MR;
mr->iova = cmd.hca_va;
- rdma_restrack_uadd(&mr->res);
+
+ rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
+ rdma_restrack_set_name(&mr->res, NULL);
+ rdma_restrack_add(&mr->res);
uobj->object = mr;
uobj_put_obj_read(pd);
@@ -858,7 +866,7 @@ static int ib_uverbs_dereg_mr(struct uverbs_attr_bundle *attrs)
static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_alloc_mw cmd;
- struct ib_uverbs_alloc_mw_resp resp;
+ struct ib_uverbs_alloc_mw_resp resp = {};
struct ib_uobject *uobj;
struct ib_pd *pd;
struct ib_mw *mw;
@@ -884,15 +892,21 @@ static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs)
goto err_put;
}
- mw = pd->device->ops.alloc_mw(pd, cmd.mw_type, &attrs->driver_udata);
- if (IS_ERR(mw)) {
- ret = PTR_ERR(mw);
+ mw = rdma_zalloc_drv_obj(ib_dev, ib_mw);
+ if (!mw) {
+ ret = -ENOMEM;
goto err_put;
}
- mw->device = pd->device;
- mw->pd = pd;
+ mw->device = ib_dev;
+ mw->pd = pd;
mw->uobject = uobj;
+ mw->type = cmd.mw_type;
+
+ ret = pd->device->ops.alloc_mw(mw, &attrs->driver_udata);
+ if (ret)
+ goto err_alloc;
+
atomic_inc(&pd->usecnt);
uobj->object = mw;
@@ -903,6 +917,8 @@ static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs)
resp.mw_handle = uobj->id;
return uverbs_response(attrs, &resp, sizeof(resp));
+err_alloc:
+ kfree(mw);
err_put:
uobj_put_obj_read(pd);
err_free:
@@ -994,12 +1010,14 @@ static int create_cq(struct uverbs_attr_bundle *attrs,
cq->event_handler = ib_uverbs_cq_event_handler;
cq->cq_context = ev_file ? &ev_file->ev_queue : NULL;
atomic_set(&cq->usecnt, 0);
- cq->res.type = RDMA_RESTRACK_CQ;
+
+ rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ);
+ rdma_restrack_set_name(&cq->res, NULL);
ret = ib_dev->ops.create_cq(cq, &attr, &attrs->driver_udata);
if (ret)
goto err_free;
- rdma_restrack_uadd(&cq->res);
+ rdma_restrack_add(&cq->res);
obj->uevent.uobject.object = cq;
obj->uevent.event_file = READ_ONCE(attrs->ufile->default_async_file);
@@ -1013,6 +1031,7 @@ static int create_cq(struct uverbs_attr_bundle *attrs,
return uverbs_response(attrs, &resp, sizeof(resp));
err_free:
+ rdma_restrack_put(&cq->res);
kfree(cq);
err_file:
if (ev_file)
@@ -1237,8 +1256,21 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
bool has_sq = true;
struct ib_device *ib_dev;
- if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
- return -EPERM;
+ switch (cmd->qp_type) {
+ case IB_QPT_RAW_PACKET:
+ if (!capable(CAP_NET_RAW))
+ return -EPERM;
+ break;
+ case IB_QPT_RC:
+ case IB_QPT_UC:
+ case IB_QPT_UD:
+ case IB_QPT_XRC_INI:
+ case IB_QPT_XRC_TGT:
+ case IB_QPT_DRIVER:
+ break;
+ default:
+ return -EINVAL;
+ }
obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, attrs,
&ib_dev);
@@ -2985,11 +3017,11 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_ex_create_rwq_ind_table cmd;
struct ib_uverbs_ex_create_rwq_ind_table_resp resp = {};
- struct ib_uobject *uobj;
+ struct ib_uobject *uobj;
int err;
struct ib_rwq_ind_table_init_attr init_attr = {};
struct ib_rwq_ind_table *rwq_ind_tbl;
- struct ib_wq **wqs = NULL;
+ struct ib_wq **wqs = NULL;
u32 *wqs_handles = NULL;
struct ib_wq *wq = NULL;
int i, num_read_wqs;
@@ -3047,17 +3079,15 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs)
goto put_wqs;
}
- init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size;
- init_attr.ind_tbl = wqs;
-
- rwq_ind_tbl = ib_dev->ops.create_rwq_ind_table(ib_dev, &init_attr,
- &attrs->driver_udata);
-
- if (IS_ERR(rwq_ind_tbl)) {
- err = PTR_ERR(rwq_ind_tbl);
+ rwq_ind_tbl = rdma_zalloc_drv_obj(ib_dev, ib_rwq_ind_table);
+ if (!rwq_ind_tbl) {
+ err = -ENOMEM;
goto err_uobj;
}
+ init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size;
+ init_attr.ind_tbl = wqs;
+
rwq_ind_tbl->ind_tbl = wqs;
rwq_ind_tbl->log_ind_tbl_size = init_attr.log_ind_tbl_size;
rwq_ind_tbl->uobject = uobj;
@@ -3065,6 +3095,11 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs)
rwq_ind_tbl->device = ib_dev;
atomic_set(&rwq_ind_tbl->usecnt, 0);
+ err = ib_dev->ops.create_rwq_ind_table(rwq_ind_tbl, &init_attr,
+ &attrs->driver_udata);
+ if (err)
+ goto err_create;
+
for (i = 0; i < num_wq_handles; i++)
rdma_lookup_put_uobject(&wqs[i]->uobject->uevent.uobject,
UVERBS_LOOKUP_READ);
@@ -3076,6 +3111,8 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs)
resp.response_length = uverbs_response_length(attrs, sizeof(resp));
return uverbs_response(attrs, &resp, sizeof(resp));
+err_create:
+ kfree(rwq_ind_tbl);
err_uobj:
uobj_alloc_abort(uobj, attrs);
put_wqs:
@@ -3232,8 +3269,8 @@ static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs)
goto err_free;
}
- flow_id = qp->device->ops.create_flow(
- qp, flow_attr, IB_FLOW_DOMAIN_USER, &attrs->driver_udata);
+ flow_id = qp->device->ops.create_flow(qp, flow_attr,
+ &attrs->driver_udata);
if (IS_ERR(flow_id)) {
err = PTR_ERR(flow_id);
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 37794d88b1f3..4bb7c642f80c 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -108,8 +108,11 @@ int uverbs_dealloc_mw(struct ib_mw *mw)
int ret;
ret = mw->device->ops.dealloc_mw(mw);
- if (!ret)
- atomic_dec(&pd->usecnt);
+ if (ret)
+ return ret;
+
+ atomic_dec(&pd->usecnt);
+ kfree(mw);
return ret;
}
@@ -845,8 +848,6 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
* will only be one mm, so no big deal.
*/
mmap_read_lock(mm);
- if (!mmget_still_valid(mm))
- goto skip_mm;
mutex_lock(&ufile->umap_lock);
list_for_each_entry_safe (priv, next_priv, &ufile->umaps,
list) {
@@ -865,7 +866,6 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
}
}
mutex_unlock(&ufile->umap_lock);
- skip_mm:
mmap_read_unlock(mm);
mmput(mm);
}
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index 08c39cfb1bd9..0658101fca00 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -81,12 +81,20 @@ static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject,
{
struct ib_rwq_ind_table *rwq_ind_tbl = uobject->object;
struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl;
- int ret;
+ u32 table_size = (1 << rwq_ind_tbl->log_ind_tbl_size);
+ int ret, i;
+
+ if (atomic_read(&rwq_ind_tbl->usecnt))
+ return -EBUSY;
- ret = ib_destroy_rwq_ind_table(rwq_ind_tbl);
+ ret = rwq_ind_tbl->device->ops.destroy_rwq_ind_table(rwq_ind_tbl);
if (ib_is_destroy_retryable(ret, why, uobject))
return ret;
+ for (i = 0; i < table_size; i++)
+ atomic_dec(&ind_tbl[i]->usecnt);
+
+ kfree(rwq_ind_tbl);
kfree(ind_tbl);
return ret;
}
@@ -122,8 +130,7 @@ static int uverbs_free_pd(struct ib_uobject *uobject,
if (ret)
return ret;
- ib_dealloc_pd_user(pd, &attrs->driver_udata);
- return 0;
+ return ib_dealloc_pd_user(pd, &attrs->driver_udata);
}
void ib_uverbs_free_event_queue(struct ib_uverbs_event_queue *event_queue)
diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c
index c7e7438752bc..b3c6c066b601 100644
--- a/drivers/infiniband/core/uverbs_std_types_counters.c
+++ b/drivers/infiniband/core/uverbs_std_types_counters.c
@@ -46,7 +46,9 @@ static int uverbs_free_counters(struct ib_uobject *uobject,
if (ret)
return ret;
- counters->device->ops.destroy_counters(counters);
+ ret = counters->device->ops.destroy_counters(counters);
+ if (ret)
+ return ret;
kfree(counters);
return 0;
}
diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c
index b1c7dacc02de..8dabd05988b2 100644
--- a/drivers/infiniband/core/uverbs_std_types_cq.c
+++ b/drivers/infiniband/core/uverbs_std_types_cq.c
@@ -33,6 +33,7 @@
#include <rdma/uverbs_std_types.h>
#include "rdma_core.h"
#include "uverbs.h"
+#include "restrack.h"
static int uverbs_free_cq(struct ib_uobject *uobject,
enum rdma_remove_reason why,
@@ -123,7 +124,9 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
cq->event_handler = ib_uverbs_cq_event_handler;
cq->cq_context = ev_file ? &ev_file->ev_queue : NULL;
atomic_set(&cq->usecnt, 0);
- cq->res.type = RDMA_RESTRACK_CQ;
+
+ rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ);
+ rdma_restrack_set_name(&cq->res, NULL);
ret = ib_dev->ops.create_cq(cq, &attr, &attrs->driver_udata);
if (ret)
@@ -131,7 +134,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
obj->uevent.uobject.object = cq;
obj->uevent.uobject.user_handle = user_handle;
- rdma_restrack_uadd(&cq->res);
+ rdma_restrack_add(&cq->res);
uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE);
ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_CQ_RESP_CQE, &cq->cqe,
@@ -139,6 +142,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
return ret;
err_free:
+ rdma_restrack_put(&cq->res);
kfree(cq);
err_event_file:
if (obj->uevent.event_file)
diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c
index 75df2094a010..f367d523a46b 100644
--- a/drivers/infiniband/core/uverbs_std_types_device.c
+++ b/drivers/infiniband/core/uverbs_std_types_device.c
@@ -3,11 +3,13 @@
* Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
*/
+#include <linux/overflow.h>
#include <rdma/uverbs_std_types.h>
#include "rdma_core.h"
#include "uverbs.h"
#include <rdma/uverbs_ioctl.h>
#include <rdma/opa_addr.h>
+#include <rdma/ib_cache.h>
/*
* This ioctl method allows calling any defined write or write_ex
@@ -165,7 +167,8 @@ void copy_port_attr_to_resp(struct ib_port_attr *attr,
resp->subnet_timeout = attr->subnet_timeout;
resp->init_type_reply = attr->init_type_reply;
resp->active_width = attr->active_width;
- resp->active_speed = attr->active_speed;
+ /* This ABI needs to be extended to provide any speed more than IB_SPEED_NDR */
+ resp->active_speed = min_t(u16, attr->active_speed, IB_SPEED_NDR);
resp->phys_state = attr->phys_state;
resp->link_layer = rdma_port_get_link_layer(ib_dev, port_num);
}
@@ -265,6 +268,172 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_CONTEXT)(
return ucontext->device->ops.query_ucontext(ucontext, attrs);
}
+static int copy_gid_entries_to_user(struct uverbs_attr_bundle *attrs,
+ struct ib_uverbs_gid_entry *entries,
+ size_t num_entries, size_t user_entry_size)
+{
+ const struct uverbs_attr *attr;
+ void __user *user_entries;
+ size_t copy_len;
+ int ret;
+ int i;
+
+ if (user_entry_size == sizeof(*entries)) {
+ ret = uverbs_copy_to(attrs,
+ UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES,
+ entries, sizeof(*entries) * num_entries);
+ return ret;
+ }
+
+ copy_len = min_t(size_t, user_entry_size, sizeof(*entries));
+ attr = uverbs_attr_get(attrs, UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES);
+ if (IS_ERR(attr))
+ return PTR_ERR(attr);
+
+ user_entries = u64_to_user_ptr(attr->ptr_attr.data);
+ for (i = 0; i < num_entries; i++) {
+ if (copy_to_user(user_entries, entries, copy_len))
+ return -EFAULT;
+
+ if (user_entry_size > sizeof(*entries)) {
+ if (clear_user(user_entries + sizeof(*entries),
+ user_entry_size - sizeof(*entries)))
+ return -EFAULT;
+ }
+
+ entries++;
+ user_entries += user_entry_size;
+ }
+
+ return uverbs_output_written(attrs,
+ UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES);
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_GID_TABLE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uverbs_gid_entry *entries;
+ struct ib_ucontext *ucontext;
+ struct ib_device *ib_dev;
+ size_t user_entry_size;
+ ssize_t num_entries;
+ size_t max_entries;
+ size_t num_bytes;
+ u32 flags;
+ int ret;
+
+ ret = uverbs_get_flags32(&flags, attrs,
+ UVERBS_ATTR_QUERY_GID_TABLE_FLAGS, 0);
+ if (ret)
+ return ret;
+
+ ret = uverbs_get_const(&user_entry_size, attrs,
+ UVERBS_ATTR_QUERY_GID_TABLE_ENTRY_SIZE);
+ if (ret)
+ return ret;
+
+ max_entries = uverbs_attr_ptr_get_array_size(
+ attrs, UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES,
+ user_entry_size);
+ if (max_entries <= 0)
+ return -EINVAL;
+
+ ucontext = ib_uverbs_get_ucontext(attrs);
+ if (IS_ERR(ucontext))
+ return PTR_ERR(ucontext);
+ ib_dev = ucontext->device;
+
+ if (check_mul_overflow(max_entries, sizeof(*entries), &num_bytes))
+ return -EINVAL;
+
+ entries = uverbs_zalloc(attrs, num_bytes);
+ if (!entries)
+ return -ENOMEM;
+
+ num_entries = rdma_query_gid_table(ib_dev, entries, max_entries);
+ if (num_entries < 0)
+ return -EINVAL;
+
+ ret = copy_gid_entries_to_user(attrs, entries, num_entries,
+ user_entry_size);
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs,
+ UVERBS_ATTR_QUERY_GID_TABLE_RESP_NUM_ENTRIES,
+ &num_entries, sizeof(num_entries));
+ return ret;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_GID_ENTRY)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uverbs_gid_entry entry = {};
+ const struct ib_gid_attr *gid_attr;
+ struct ib_ucontext *ucontext;
+ struct ib_device *ib_dev;
+ struct net_device *ndev;
+ u32 gid_index;
+ u32 port_num;
+ u32 flags;
+ int ret;
+
+ ret = uverbs_get_flags32(&flags, attrs,
+ UVERBS_ATTR_QUERY_GID_ENTRY_FLAGS, 0);
+ if (ret)
+ return ret;
+
+ ret = uverbs_get_const(&port_num, attrs,
+ UVERBS_ATTR_QUERY_GID_ENTRY_PORT);
+ if (ret)
+ return ret;
+
+ ret = uverbs_get_const(&gid_index, attrs,
+ UVERBS_ATTR_QUERY_GID_ENTRY_GID_INDEX);
+ if (ret)
+ return ret;
+
+ ucontext = ib_uverbs_get_ucontext(attrs);
+ if (IS_ERR(ucontext))
+ return PTR_ERR(ucontext);
+ ib_dev = ucontext->device;
+
+ if (!rdma_is_port_valid(ib_dev, port_num))
+ return -EINVAL;
+
+ if (!rdma_ib_or_roce(ib_dev, port_num))
+ return -EOPNOTSUPP;
+
+ gid_attr = rdma_get_gid_attr(ib_dev, port_num, gid_index);
+ if (IS_ERR(gid_attr))
+ return PTR_ERR(gid_attr);
+
+ memcpy(&entry.gid, &gid_attr->gid, sizeof(gid_attr->gid));
+ entry.gid_index = gid_attr->index;
+ entry.port_num = gid_attr->port_num;
+ entry.gid_type = gid_attr->gid_type;
+
+ rcu_read_lock();
+ ndev = rdma_read_gid_attr_ndev_rcu(gid_attr);
+ if (IS_ERR(ndev)) {
+ if (PTR_ERR(ndev) != -ENODEV) {
+ ret = PTR_ERR(ndev);
+ rcu_read_unlock();
+ goto out;
+ }
+ } else {
+ entry.netdev_ifindex = ndev->ifindex;
+ }
+ rcu_read_unlock();
+
+ ret = uverbs_copy_to_struct_or_zero(
+ attrs, UVERBS_ATTR_QUERY_GID_ENTRY_RESP_ENTRY, &entry,
+ sizeof(entry));
+out:
+ rdma_put_gid_attr(gid_attr);
+ return ret;
+}
+
DECLARE_UVERBS_NAMED_METHOD(
UVERBS_METHOD_GET_CONTEXT,
UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_GET_CONTEXT_NUM_COMP_VECTORS,
@@ -299,12 +468,38 @@ DECLARE_UVERBS_NAMED_METHOD(
reserved),
UA_MANDATORY));
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_QUERY_GID_TABLE,
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_QUERY_GID_TABLE_ENTRY_SIZE, u64,
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_QUERY_GID_TABLE_FLAGS, u32,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES,
+ UVERBS_ATTR_MIN_SIZE(0), UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_GID_TABLE_RESP_NUM_ENTRIES,
+ UVERBS_ATTR_TYPE(u64), UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_QUERY_GID_ENTRY,
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_QUERY_GID_ENTRY_PORT, u32,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_QUERY_GID_ENTRY_GID_INDEX, u32,
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_QUERY_GID_ENTRY_FLAGS, u32,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_GID_ENTRY_RESP_ENTRY,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_gid_entry,
+ netdev_ifindex),
+ UA_MANDATORY));
+
DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DEVICE,
&UVERBS_METHOD(UVERBS_METHOD_GET_CONTEXT),
&UVERBS_METHOD(UVERBS_METHOD_INVOKE_WRITE),
&UVERBS_METHOD(UVERBS_METHOD_INFO_HANDLES),
&UVERBS_METHOD(UVERBS_METHOD_QUERY_PORT),
- &UVERBS_METHOD(UVERBS_METHOD_QUERY_CONTEXT));
+ &UVERBS_METHOD(UVERBS_METHOD_QUERY_CONTEXT),
+ &UVERBS_METHOD(UVERBS_METHOD_QUERY_GID_TABLE),
+ &UVERBS_METHOD(UVERBS_METHOD_QUERY_GID_ENTRY));
const struct uapi_definition uverbs_def_obj_device[] = {
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DEVICE),
diff --git a/drivers/infiniband/core/uverbs_std_types_wq.c b/drivers/infiniband/core/uverbs_std_types_wq.c
index cad842ede077..f2e6a625724a 100644
--- a/drivers/infiniband/core/uverbs_std_types_wq.c
+++ b/drivers/infiniband/core/uverbs_std_types_wq.c
@@ -16,7 +16,7 @@ static int uverbs_free_wq(struct ib_uobject *uobject,
container_of(uobject, struct ib_uwq_object, uevent.uobject);
int ret;
- ret = ib_destroy_wq(wq, &attrs->driver_udata);
+ ret = ib_destroy_wq_user(wq, &attrs->driver_udata);
if (ib_is_destroy_retryable(ret, why, uobject))
return ret;
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 307886737646..740f8454b6b4 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -272,15 +272,16 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
atomic_set(&pd->usecnt, 0);
pd->flags = flags;
- pd->res.type = RDMA_RESTRACK_PD;
- rdma_restrack_set_task(&pd->res, caller);
+ rdma_restrack_new(&pd->res, RDMA_RESTRACK_PD);
+ rdma_restrack_set_name(&pd->res, caller);
ret = device->ops.alloc_pd(pd, NULL);
if (ret) {
+ rdma_restrack_put(&pd->res);
kfree(pd);
return ERR_PTR(ret);
}
- rdma_restrack_kadd(&pd->res);
+ rdma_restrack_add(&pd->res);
if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
pd->local_dma_lkey = device->local_dma_lkey;
@@ -329,7 +330,7 @@ EXPORT_SYMBOL(__ib_alloc_pd);
* exist. The caller is responsible to synchronously destroy them and
* guarantee no new allocations will happen.
*/
-void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata)
+int ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata)
{
int ret;
@@ -343,9 +344,13 @@ void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata)
requires the caller to guarantee we can't race here. */
WARN_ON(atomic_read(&pd->usecnt));
+ ret = pd->device->ops.dealloc_pd(pd, udata);
+ if (ret)
+ return ret;
+
rdma_restrack_del(&pd->res);
- pd->device->ops.dealloc_pd(pd, udata);
kfree(pd);
+ return ret;
}
EXPORT_SYMBOL(ib_dealloc_pd_user);
@@ -728,7 +733,7 @@ int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
(struct in6_addr *)dgid);
return 0;
} else if (net_type == RDMA_NETWORK_IPV6 ||
- net_type == RDMA_NETWORK_IB) {
+ net_type == RDMA_NETWORK_IB || RDMA_NETWORK_ROCE_V1) {
*dgid = hdr->ibgrh.dgid;
*sgid = hdr->ibgrh.sgid;
return 0;
@@ -964,18 +969,22 @@ int rdma_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata)
{
const struct ib_gid_attr *sgid_attr = ah->sgid_attr;
struct ib_pd *pd;
+ int ret;
might_sleep_if(flags & RDMA_DESTROY_AH_SLEEPABLE);
pd = ah->pd;
- ah->device->ops.destroy_ah(ah, flags);
+ ret = ah->device->ops.destroy_ah(ah, flags);
+ if (ret)
+ return ret;
+
atomic_dec(&pd->usecnt);
if (sgid_attr)
rdma_put_gid_attr(sgid_attr);
kfree(ah);
- return 0;
+ return ret;
}
EXPORT_SYMBOL(rdma_destroy_ah_user);
@@ -1060,10 +1069,14 @@ EXPORT_SYMBOL(ib_query_srq);
int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata)
{
+ int ret;
+
if (atomic_read(&srq->usecnt))
return -EBUSY;
- srq->device->ops.destroy_srq(srq, udata);
+ ret = srq->device->ops.destroy_srq(srq, udata);
+ if (ret)
+ return ret;
atomic_dec(&srq->pd->usecnt);
if (srq->srq_type == IB_SRQT_XRC)
@@ -1072,7 +1085,7 @@ int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata)
atomic_dec(&srq->ext.cq->usecnt);
kfree(srq);
- return 0;
+ return ret;
}
EXPORT_SYMBOL(ib_destroy_srq_user);
@@ -1781,7 +1794,7 @@ int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr,
}
EXPORT_SYMBOL(ib_modify_qp_with_udata);
-int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width)
+int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u16 *speed, u8 *width)
{
int rc;
u32 netdev_speed;
@@ -1984,16 +1997,18 @@ struct ib_cq *__ib_create_cq(struct ib_device *device,
cq->event_handler = event_handler;
cq->cq_context = cq_context;
atomic_set(&cq->usecnt, 0);
- cq->res.type = RDMA_RESTRACK_CQ;
- rdma_restrack_set_task(&cq->res, caller);
+
+ rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ);
+ rdma_restrack_set_name(&cq->res, caller);
ret = device->ops.create_cq(cq, cq_attr, NULL);
if (ret) {
+ rdma_restrack_put(&cq->res);
kfree(cq);
return ERR_PTR(ret);
}
- rdma_restrack_kadd(&cq->res);
+ rdma_restrack_add(&cq->res);
return cq;
}
EXPORT_SYMBOL(__ib_create_cq);
@@ -2011,16 +2026,21 @@ EXPORT_SYMBOL(rdma_set_cq_moderation);
int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata)
{
+ int ret;
+
if (WARN_ON_ONCE(cq->shared))
return -EOPNOTSUPP;
if (atomic_read(&cq->usecnt))
return -EBUSY;
+ ret = cq->device->ops.destroy_cq(cq, udata);
+ if (ret)
+ return ret;
+
rdma_restrack_del(&cq->res);
- cq->device->ops.destroy_cq(cq, udata);
kfree(cq);
- return 0;
+ return ret;
}
EXPORT_SYMBOL(ib_destroy_cq_user);
@@ -2059,8 +2079,10 @@ struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mr->pd = pd;
mr->dm = NULL;
atomic_inc(&pd->usecnt);
- mr->res.type = RDMA_RESTRACK_MR;
- rdma_restrack_kadd(&mr->res);
+
+ rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
+ rdma_restrack_parent_name(&mr->res, &pd->res);
+ rdma_restrack_add(&mr->res);
return mr;
}
@@ -2139,11 +2161,12 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
mr->uobject = NULL;
atomic_inc(&pd->usecnt);
mr->need_inval = false;
- mr->res.type = RDMA_RESTRACK_MR;
- rdma_restrack_kadd(&mr->res);
mr->type = mr_type;
mr->sig_attrs = NULL;
+ rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
+ rdma_restrack_parent_name(&mr->res, &pd->res);
+ rdma_restrack_add(&mr->res);
out:
trace_mr_alloc(pd, mr_type, max_num_sg, mr);
return mr;
@@ -2199,11 +2222,12 @@ struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd,
mr->uobject = NULL;
atomic_inc(&pd->usecnt);
mr->need_inval = false;
- mr->res.type = RDMA_RESTRACK_MR;
- rdma_restrack_kadd(&mr->res);
mr->type = IB_MR_TYPE_INTEGRITY;
mr->sig_attrs = sig_attrs;
+ rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
+ rdma_restrack_parent_name(&mr->res, &pd->res);
+ rdma_restrack_add(&mr->res);
out:
trace_mr_integ_alloc(pd, max_num_data_sg, max_num_meta_sg, mr);
return mr;
@@ -2328,13 +2352,17 @@ EXPORT_SYMBOL(ib_alloc_xrcd_user);
*/
int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata)
{
+ int ret;
+
if (atomic_read(&xrcd->usecnt))
return -EBUSY;
WARN_ON(!xa_empty(&xrcd->tgt_qps));
- xrcd->device->ops.dealloc_xrcd(xrcd, udata);
+ ret = xrcd->device->ops.dealloc_xrcd(xrcd, udata);
+ if (ret)
+ return ret;
kfree(xrcd);
- return 0;
+ return ret;
}
EXPORT_SYMBOL(ib_dealloc_xrcd_user);
@@ -2378,25 +2406,28 @@ struct ib_wq *ib_create_wq(struct ib_pd *pd,
EXPORT_SYMBOL(ib_create_wq);
/**
- * ib_destroy_wq - Destroys the specified user WQ.
+ * ib_destroy_wq_user - Destroys the specified user WQ.
* @wq: The WQ to destroy.
* @udata: Valid user data
*/
-int ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata)
+int ib_destroy_wq_user(struct ib_wq *wq, struct ib_udata *udata)
{
struct ib_cq *cq = wq->cq;
struct ib_pd *pd = wq->pd;
+ int ret;
if (atomic_read(&wq->usecnt))
return -EBUSY;
- wq->device->ops.destroy_wq(wq, udata);
+ ret = wq->device->ops.destroy_wq(wq, udata);
+ if (ret)
+ return ret;
+
atomic_dec(&pd->usecnt);
atomic_dec(&cq->usecnt);
-
- return 0;
+ return ret;
}
-EXPORT_SYMBOL(ib_destroy_wq);
+EXPORT_SYMBOL(ib_destroy_wq_user);
/**
* ib_modify_wq - Modifies the specified WQ.
@@ -2419,29 +2450,6 @@ int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
}
EXPORT_SYMBOL(ib_modify_wq);
-/*
- * ib_destroy_rwq_ind_table - Destroys the specified Indirection Table.
- * @wq_ind_table: The Indirection Table to destroy.
-*/
-int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table)
-{
- int err, i;
- u32 table_size = (1 << rwq_ind_table->log_ind_tbl_size);
- struct ib_wq **ind_tbl = rwq_ind_table->ind_tbl;
-
- if (atomic_read(&rwq_ind_table->usecnt))
- return -EBUSY;
-
- err = rwq_ind_table->device->ops.destroy_rwq_ind_table(rwq_ind_table);
- if (!err) {
- for (i = 0; i < table_size; i++)
- atomic_dec(&ind_tbl[i]->usecnt);
- }
-
- return err;
-}
-EXPORT_SYMBOL(ib_destroy_rwq_ind_table);
-
int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status)
{
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index a300588634c5..b930ea3dab7a 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -150,7 +150,7 @@ struct bnxt_re_dev {
struct delayed_work worker;
u8 cur_prio_map;
- u8 active_speed;
+ u16 active_speed;
u8 active_width;
/* FP Notification Queue (CQ & SRQ) */
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 1d7a9ca5240c..cf3db9628397 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -532,7 +532,7 @@ fail:
}
/* Protection Domains */
-void bnxt_re_dealloc_pd(struct ib_pd *ib_pd, struct ib_udata *udata)
+int bnxt_re_dealloc_pd(struct ib_pd *ib_pd, struct ib_udata *udata)
{
struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
struct bnxt_re_dev *rdev = pd->rdev;
@@ -542,6 +542,7 @@ void bnxt_re_dealloc_pd(struct ib_pd *ib_pd, struct ib_udata *udata)
if (pd->qplib_pd.id)
bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl,
&pd->qplib_pd);
+ return 0;
}
int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
@@ -601,13 +602,14 @@ fail:
}
/* Address Handles */
-void bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags)
+int bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags)
{
struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah);
struct bnxt_re_dev *rdev = ah->rdev;
bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah,
!(flags & RDMA_DESTROY_AH_SLEEPABLE));
+ return 0;
}
static u8 bnxt_re_stack_to_dev_nw_type(enum rdma_network_type ntype)
@@ -938,9 +940,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd,
return PTR_ERR(umem);
qp->sumem = umem;
- qplib_qp->sq.sg_info.sghead = umem->sg_head.sgl;
- qplib_qp->sq.sg_info.npages = ib_umem_num_pages(umem);
- qplib_qp->sq.sg_info.nmap = umem->nmap;
+ qplib_qp->sq.sg_info.umem = umem;
qplib_qp->sq.sg_info.pgsize = PAGE_SIZE;
qplib_qp->sq.sg_info.pgshft = PAGE_SHIFT;
qplib_qp->qp_handle = ureq.qp_handle;
@@ -953,9 +953,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd,
if (IS_ERR(umem))
goto rqfail;
qp->rumem = umem;
- qplib_qp->rq.sg_info.sghead = umem->sg_head.sgl;
- qplib_qp->rq.sg_info.npages = ib_umem_num_pages(umem);
- qplib_qp->rq.sg_info.nmap = umem->nmap;
+ qplib_qp->rq.sg_info.umem = umem;
qplib_qp->rq.sg_info.pgsize = PAGE_SIZE;
qplib_qp->rq.sg_info.pgshft = PAGE_SHIFT;
}
@@ -1568,7 +1566,7 @@ static enum ib_mtu __to_ib_mtu(u32 mtu)
}
/* Shared Receive Queues */
-void bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata)
+int bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata)
{
struct bnxt_re_srq *srq = container_of(ib_srq, struct bnxt_re_srq,
ib_srq);
@@ -1583,6 +1581,7 @@ void bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata)
atomic_dec(&rdev->srq_count);
if (nq)
nq->budget--;
+ return 0;
}
static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev,
@@ -1608,9 +1607,7 @@ static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev,
return PTR_ERR(umem);
srq->umem = umem;
- qplib_srq->sg_info.sghead = umem->sg_head.sgl;
- qplib_srq->sg_info.npages = ib_umem_num_pages(umem);
- qplib_srq->sg_info.nmap = umem->nmap;
+ qplib_srq->sg_info.umem = umem;
qplib_srq->sg_info.pgsize = PAGE_SIZE;
qplib_srq->sg_info.pgshft = PAGE_SHIFT;
qplib_srq->srq_handle = ureq.srq_handle;
@@ -2800,7 +2797,7 @@ int bnxt_re_post_recv(struct ib_qp *ib_qp, const struct ib_recv_wr *wr,
}
/* Completion Queues */
-void bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
+int bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
{
struct bnxt_re_cq *cq;
struct bnxt_qplib_nq *nq;
@@ -2816,6 +2813,7 @@ void bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
atomic_dec(&rdev->cq_count);
nq->budget--;
kfree(cq->cql);
+ return 0;
}
int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
@@ -2860,9 +2858,7 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
rc = PTR_ERR(cq->umem);
goto fail;
}
- cq->qplib_cq.sg_info.sghead = cq->umem->sg_head.sgl;
- cq->qplib_cq.sg_info.npages = ib_umem_num_pages(cq->umem);
- cq->qplib_cq.sg_info.nmap = cq->umem->nmap;
+ cq->qplib_cq.sg_info.umem = cq->umem;
cq->qplib_cq.dpi = &uctx->dpi;
} else {
cq->max_cql = min_t(u32, entries, MAX_CQL_PER_POLL);
@@ -3774,23 +3770,6 @@ int bnxt_re_dealloc_mw(struct ib_mw *ib_mw)
return rc;
}
-static int bnxt_re_page_size_ok(int page_shift)
-{
- switch (page_shift) {
- case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_4K:
- case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_8K:
- case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_64K:
- case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_2M:
- case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_256K:
- case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_1M:
- case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_4M:
- case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_1G:
- return 1;
- default:
- return 0;
- }
-}
-
static int fill_umem_pbl_tbl(struct ib_umem *umem, u64 *pbl_tbl_orig,
int page_shift)
{
@@ -3798,7 +3777,7 @@ static int fill_umem_pbl_tbl(struct ib_umem *umem, u64 *pbl_tbl_orig,
u64 page_size = BIT_ULL(page_shift);
struct ib_block_iter biter;
- rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap, page_size)
+ rdma_umem_for_each_dma_block(umem, &biter, page_size)
*pbl_tbl++ = rdma_block_iter_dma_address(&biter);
return pbl_tbl - pbl_tbl_orig;
@@ -3814,7 +3793,8 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
struct bnxt_re_mr *mr;
struct ib_umem *umem;
u64 *pbl_tbl = NULL;
- int umem_pgs, page_shift, rc;
+ unsigned long page_size;
+ int umem_pgs, rc;
if (length > BNXT_RE_MAX_MR_SIZE) {
ibdev_err(&rdev->ibdev, "MR Size: %lld > Max supported:%lld\n",
@@ -3848,42 +3828,34 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
mr->ib_umem = umem;
mr->qplib_mr.va = virt_addr;
- umem_pgs = ib_umem_page_count(umem);
- if (!umem_pgs) {
- ibdev_err(&rdev->ibdev, "umem is invalid!");
- rc = -EINVAL;
- goto free_umem;
- }
- mr->qplib_mr.total_size = length;
-
- pbl_tbl = kcalloc(umem_pgs, sizeof(u64 *), GFP_KERNEL);
- if (!pbl_tbl) {
- rc = -ENOMEM;
- goto free_umem;
- }
-
- page_shift = __ffs(ib_umem_find_best_pgsz(umem,
- BNXT_RE_PAGE_SIZE_4K | BNXT_RE_PAGE_SIZE_2M,
- virt_addr));
-
- if (!bnxt_re_page_size_ok(page_shift)) {
+ page_size = ib_umem_find_best_pgsz(
+ umem, BNXT_RE_PAGE_SIZE_4K | BNXT_RE_PAGE_SIZE_2M, virt_addr);
+ if (!page_size) {
ibdev_err(&rdev->ibdev, "umem page size unsupported!");
rc = -EFAULT;
- goto fail;
+ goto free_umem;
}
+ mr->qplib_mr.total_size = length;
- if (page_shift == BNXT_RE_PAGE_SHIFT_4K &&
+ if (page_size == BNXT_RE_PAGE_SIZE_4K &&
length > BNXT_RE_MAX_MR_SIZE_LOW) {
ibdev_err(&rdev->ibdev, "Requested MR Sz:%llu Max sup:%llu",
length, (u64)BNXT_RE_MAX_MR_SIZE_LOW);
rc = -EINVAL;
- goto fail;
+ goto free_umem;
+ }
+
+ umem_pgs = ib_umem_num_dma_blocks(umem, page_size);
+ pbl_tbl = kcalloc(umem_pgs, sizeof(*pbl_tbl), GFP_KERNEL);
+ if (!pbl_tbl) {
+ rc = -ENOMEM;
+ goto free_umem;
}
/* Map umem buf ptrs to the PBL */
- umem_pgs = fill_umem_pbl_tbl(umem, pbl_tbl, page_shift);
+ umem_pgs = fill_umem_pbl_tbl(umem, pbl_tbl, order_base_2(page_size));
rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, pbl_tbl,
- umem_pgs, false, 1 << page_shift);
+ umem_pgs, false, page_size);
if (rc) {
ibdev_err(&rdev->ibdev, "Failed to register user MR");
goto fail;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index 1daeb30e06fd..9a8130b79256 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -163,12 +163,12 @@ int bnxt_re_query_gid(struct ib_device *ibdev, u8 port_num,
enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev,
u8 port_num);
int bnxt_re_alloc_pd(struct ib_pd *pd, struct ib_udata *udata);
-void bnxt_re_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
+int bnxt_re_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
int bnxt_re_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
struct ib_udata *udata);
int bnxt_re_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
int bnxt_re_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
-void bnxt_re_destroy_ah(struct ib_ah *ah, u32 flags);
+int bnxt_re_destroy_ah(struct ib_ah *ah, u32 flags);
int bnxt_re_create_srq(struct ib_srq *srq,
struct ib_srq_init_attr *srq_init_attr,
struct ib_udata *udata);
@@ -176,7 +176,7 @@ int bnxt_re_modify_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr,
enum ib_srq_attr_mask srq_attr_mask,
struct ib_udata *udata);
int bnxt_re_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
-void bnxt_re_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
+int bnxt_re_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
int bnxt_re_post_srq_recv(struct ib_srq *srq, const struct ib_recv_wr *recv_wr,
const struct ib_recv_wr **bad_recv_wr);
struct ib_qp *bnxt_re_create_qp(struct ib_pd *pd,
@@ -193,7 +193,7 @@ int bnxt_re_post_recv(struct ib_qp *qp, const struct ib_recv_wr *recv_wr,
const struct ib_recv_wr **bad_recv_wr);
int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct ib_udata *udata);
-void bnxt_re_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
+int bnxt_re_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
int bnxt_re_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
int bnxt_re_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 53aee5a42ab8..04621ba8fa76 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -736,7 +736,8 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
if (ret)
return ret;
- return ib_register_device(ibdev, "bnxt_re%d");
+ dma_set_max_seg_size(&rdev->en_dev->pdev->dev, UINT_MAX);
+ return ib_register_device(ibdev, "bnxt_re%d", &rdev->en_dev->pdev->dev);
}
static void bnxt_re_dev_remove(struct bnxt_re_dev *rdev)
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index f78da54a0bc5..995d4633b0a1 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -295,9 +295,9 @@ static void __wait_for_all_nqes(struct bnxt_qplib_cq *cq, u16 cnq_events)
}
}
-static void bnxt_qplib_service_nq(unsigned long data)
+static void bnxt_qplib_service_nq(struct tasklet_struct *t)
{
- struct bnxt_qplib_nq *nq = (struct bnxt_qplib_nq *)data;
+ struct bnxt_qplib_nq *nq = from_tasklet(nq, t, nq_tasklet);
struct bnxt_qplib_hwq *hwq = &nq->hwq;
int num_srqne_processed = 0;
int num_cqne_processed = 0;
@@ -448,8 +448,7 @@ int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx,
nq->msix_vec = msix_vector;
if (need_init)
- tasklet_init(&nq->nq_tasklet, bnxt_qplib_service_nq,
- (unsigned long)nq);
+ tasklet_setup(&nq->nq_tasklet, bnxt_qplib_service_nq);
else
tasklet_enable(&nq->nq_tasklet);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index f7736e34ac64..441eb421e5e5 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -50,7 +50,7 @@
#include "qplib_sp.h"
#include "qplib_fp.h"
-static void bnxt_qplib_service_creq(unsigned long data);
+static void bnxt_qplib_service_creq(struct tasklet_struct *t);
/* Hardware communication channel */
static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
@@ -79,7 +79,7 @@ static int __block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
goto done;
do {
mdelay(1); /* 1m sec */
- bnxt_qplib_service_creq((unsigned long)rcfw);
+ bnxt_qplib_service_creq(&rcfw->creq.creq_tasklet);
} while (test_bit(cbit, cmdq->cmdq_bitmap) && --count);
done:
return count ? 0 : -ETIMEDOUT;
@@ -370,9 +370,9 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
}
/* SP - CREQ Completion handlers */
-static void bnxt_qplib_service_creq(unsigned long data)
+static void bnxt_qplib_service_creq(struct tasklet_struct *t)
{
- struct bnxt_qplib_rcfw *rcfw = (struct bnxt_qplib_rcfw *)data;
+ struct bnxt_qplib_rcfw *rcfw = from_tasklet(rcfw, t, creq.creq_tasklet);
struct bnxt_qplib_creq_ctx *creq = &rcfw->creq;
u32 type, budget = CREQ_ENTRY_POLL_BUDGET;
struct bnxt_qplib_hwq *hwq = &creq->hwq;
@@ -687,8 +687,7 @@ int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector,
creq->msix_vec = msix_vector;
if (need_init)
- tasklet_init(&creq->creq_tasklet,
- bnxt_qplib_service_creq, (unsigned long)rcfw);
+ tasklet_setup(&creq->creq_tasklet, bnxt_qplib_service_creq);
else
tasklet_enable(&creq->creq_tasklet);
rc = request_irq(creq->msix_vec, bnxt_qplib_creq_irq, 0,
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c
index 7efa6e5dce62..fa7878336100 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c
@@ -45,6 +45,9 @@
#include <linux/dma-mapping.h>
#include <linux/if_vlan.h>
#include <linux/vmalloc.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
+
#include "roce_hsi.h"
#include "qplib_res.h"
#include "qplib_sp.h"
@@ -87,12 +90,11 @@ static void __free_pbl(struct bnxt_qplib_res *res, struct bnxt_qplib_pbl *pbl,
static void bnxt_qplib_fill_user_dma_pages(struct bnxt_qplib_pbl *pbl,
struct bnxt_qplib_sg_info *sginfo)
{
- struct scatterlist *sghead = sginfo->sghead;
- struct sg_dma_page_iter sg_iter;
+ struct ib_block_iter biter;
int i = 0;
- for_each_sg_dma_page(sghead, &sg_iter, sginfo->nmap, 0) {
- pbl->pg_map_arr[i] = sg_page_iter_dma_address(&sg_iter);
+ rdma_umem_for_each_dma_block(sginfo->umem, &biter, sginfo->pgsize) {
+ pbl->pg_map_arr[i] = rdma_block_iter_dma_address(&biter);
pbl->pg_arr[i] = NULL;
pbl->pg_count++;
i++;
@@ -104,15 +106,16 @@ static int __alloc_pbl(struct bnxt_qplib_res *res,
struct bnxt_qplib_sg_info *sginfo)
{
struct pci_dev *pdev = res->pdev;
- struct scatterlist *sghead;
bool is_umem = false;
u32 pages;
int i;
if (sginfo->nopte)
return 0;
- pages = sginfo->npages;
- sghead = sginfo->sghead;
+ if (sginfo->umem)
+ pages = ib_umem_num_dma_blocks(sginfo->umem, sginfo->pgsize);
+ else
+ pages = sginfo->npages;
/* page ptr arrays */
pbl->pg_arr = vmalloc(pages * sizeof(void *));
if (!pbl->pg_arr)
@@ -127,7 +130,7 @@ static int __alloc_pbl(struct bnxt_qplib_res *res,
pbl->pg_count = 0;
pbl->pg_size = sginfo->pgsize;
- if (!sghead) {
+ if (!sginfo->umem) {
for (i = 0; i < pages; i++) {
pbl->pg_arr[i] = dma_alloc_coherent(&pdev->dev,
pbl->pg_size,
@@ -183,14 +186,12 @@ int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq,
struct bnxt_qplib_sg_info sginfo = {};
u32 depth, stride, npbl, npde;
dma_addr_t *src_phys_ptr, **dst_virt_ptr;
- struct scatterlist *sghead = NULL;
struct bnxt_qplib_res *res;
struct pci_dev *pdev;
int i, rc, lvl;
res = hwq_attr->res;
pdev = res->pdev;
- sghead = hwq_attr->sginfo->sghead;
pg_size = hwq_attr->sginfo->pgsize;
hwq->level = PBL_LVL_MAX;
@@ -204,7 +205,7 @@ int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq,
aux_pages++;
}
- if (!sghead) {
+ if (!hwq_attr->sginfo->umem) {
hwq->is_user = false;
npages = (depth * stride) / pg_size + aux_pages;
if ((depth * stride) % pg_size)
@@ -213,11 +214,14 @@ int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq,
return -EINVAL;
hwq_attr->sginfo->npages = npages;
} else {
+ unsigned long sginfo_num_pages = ib_umem_num_dma_blocks(
+ hwq_attr->sginfo->umem, hwq_attr->sginfo->pgsize);
+
hwq->is_user = true;
- npages = hwq_attr->sginfo->npages;
+ npages = sginfo_num_pages;
npages = (npages * PAGE_SIZE) /
BIT_ULL(hwq_attr->sginfo->pgshft);
- if ((hwq_attr->sginfo->npages * PAGE_SIZE) %
+ if ((sginfo_num_pages * PAGE_SIZE) %
BIT_ULL(hwq_attr->sginfo->pgshft))
if (!npages)
npages++;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
index 9da470d1e4a3..7a1ab38b95da 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
@@ -126,8 +126,7 @@ struct bnxt_qplib_pbl {
};
struct bnxt_qplib_sg_info {
- struct scatterlist *sghead;
- u32 nmap;
+ struct ib_umem *umem;
u32 npages;
u32 pgshft;
u32 pgsize;
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 1f288c73ccfc..8769e7aa097f 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -77,9 +77,9 @@ static int enable_ecn;
module_param(enable_ecn, int, 0644);
MODULE_PARM_DESC(enable_ecn, "Enable ECN (default=0/disabled)");
-static int dack_mode = 1;
+static int dack_mode;
module_param(dack_mode, int, 0644);
-MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=1)");
+MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=0)");
uint c4iw_max_read_depth = 32;
module_param(c4iw_max_read_depth, int, 0644);
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 352b8af1998a..28349ed50885 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -967,7 +967,7 @@ int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
return !err || err == -ENODATA ? npolled : err;
}
-void c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
+int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
{
struct c4iw_cq *chp;
struct c4iw_ucontext *ucontext;
@@ -985,6 +985,7 @@ void c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx,
chp->destroy_skb, chp->wr_waitp);
c4iw_put_wr_wait(chp->wr_waitp);
+ return 0;
}
int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 2b2b009b371a..a27899402f59 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -985,21 +985,20 @@ int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset);
int c4iw_dealloc_mw(struct ib_mw *mw);
void c4iw_dealloc(struct uld_ctx *ctx);
-struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
- struct ib_udata *udata);
+int c4iw_alloc_mw(struct ib_mw *mw, struct ib_udata *udata);
struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start,
u64 length, u64 virt, int acc,
struct ib_udata *udata);
struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc);
int c4iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata);
-void c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
+int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct ib_udata *udata);
int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask srq_attr_mask,
struct ib_udata *udata);
-void c4iw_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata);
+int c4iw_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata);
int c4iw_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *attrs,
struct ib_udata *udata);
int c4iw_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 73936c3341b7..42234df896fb 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -510,7 +510,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
__be64 *pages;
int shift, n, i;
int err = -ENOMEM;
- struct sg_dma_page_iter sg_iter;
+ struct ib_block_iter biter;
struct c4iw_dev *rhp;
struct c4iw_pd *php;
struct c4iw_mr *mhp;
@@ -548,7 +548,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
shift = PAGE_SHIFT;
- n = ib_umem_num_pages(mhp->umem);
+ n = ib_umem_num_dma_blocks(mhp->umem, 1 << shift);
err = alloc_pbl(mhp, n);
if (err)
goto err_umem_release;
@@ -561,8 +561,8 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
i = n = 0;
- for_each_sg_dma_page(mhp->umem->sg_head.sgl, &sg_iter, mhp->umem->nmap, 0) {
- pages[i++] = cpu_to_be64(sg_page_iter_dma_address(&sg_iter));
+ rdma_umem_for_each_dma_block(mhp->umem, &biter, 1 << shift) {
+ pages[i++] = cpu_to_be64(rdma_block_iter_dma_address(&biter));
if (i == PAGE_SIZE / sizeof(*pages)) {
err = write_pbl(&mhp->rhp->rdev, pages,
mhp->attr.pbl_addr + (n << 3), i,
@@ -611,30 +611,23 @@ err_free_mhp:
return ERR_PTR(err);
}
-struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
- struct ib_udata *udata)
+int c4iw_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
{
+ struct c4iw_mw *mhp = to_c4iw_mw(ibmw);
struct c4iw_dev *rhp;
struct c4iw_pd *php;
- struct c4iw_mw *mhp;
u32 mmid;
u32 stag = 0;
int ret;
- if (type != IB_MW_TYPE_1)
- return ERR_PTR(-EINVAL);
+ if (ibmw->type != IB_MW_TYPE_1)
+ return -EINVAL;
- php = to_c4iw_pd(pd);
+ php = to_c4iw_pd(ibmw->pd);
rhp = php->rhp;
- mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
- if (!mhp)
- return ERR_PTR(-ENOMEM);
-
mhp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL);
- if (!mhp->wr_waitp) {
- ret = -ENOMEM;
- goto free_mhp;
- }
+ if (!mhp->wr_waitp)
+ return -ENOMEM;
mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL);
if (!mhp->dereg_skb) {
@@ -645,18 +638,19 @@ struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
ret = allocate_window(&rhp->rdev, &stag, php->pdid, mhp->wr_waitp);
if (ret)
goto free_skb;
+
mhp->rhp = rhp;
mhp->attr.pdid = php->pdid;
mhp->attr.type = FW_RI_STAG_MW;
mhp->attr.stag = stag;
mmid = (stag) >> 8;
- mhp->ibmw.rkey = stag;
+ ibmw->rkey = stag;
if (xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL)) {
ret = -ENOMEM;
goto dealloc_win;
}
pr_debug("mmid 0x%x mhp %p stag 0x%x\n", mmid, mhp, stag);
- return &(mhp->ibmw);
+ return 0;
dealloc_win:
deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb,
@@ -665,9 +659,7 @@ free_skb:
kfree_skb(mhp->dereg_skb);
free_wr_wait:
c4iw_put_wr_wait(mhp->wr_waitp);
-free_mhp:
- kfree(mhp);
- return ERR_PTR(ret);
+ return ret;
}
int c4iw_dealloc_mw(struct ib_mw *mw)
@@ -684,8 +676,6 @@ int c4iw_dealloc_mw(struct ib_mw *mw)
mhp->wr_waitp);
kfree_skb(mhp->dereg_skb);
c4iw_put_wr_wait(mhp->wr_waitp);
- pr_debug("ib_mw %p mmid 0x%x ptr %p\n", mw, mmid, mhp);
- kfree(mhp);
return 0;
}
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 6c579d2d3997..8138c57a1e43 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -190,7 +190,7 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
return ret;
}
-static void c4iw_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata)
+static int c4iw_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata)
{
struct c4iw_dev *rhp;
struct c4iw_pd *php;
@@ -202,6 +202,7 @@ static void c4iw_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata)
mutex_lock(&rhp->rdev.stats.lock);
rhp->rdev.stats.pd.cur--;
mutex_unlock(&rhp->rdev.stats.lock);
+ return 0;
}
static int c4iw_allocate_pd(struct ib_pd *pd, struct ib_udata *udata)
@@ -497,8 +498,10 @@ static const struct ib_device_ops c4iw_dev_ops = {
.query_qp = c4iw_ib_query_qp,
.reg_user_mr = c4iw_reg_user_mr,
.req_notify_cq = c4iw_arm_cq,
- INIT_RDMA_OBJ_SIZE(ib_pd, c4iw_pd, ibpd),
+
INIT_RDMA_OBJ_SIZE(ib_cq, c4iw_cq, ibcq),
+ INIT_RDMA_OBJ_SIZE(ib_mw, c4iw_mw, ibmw),
+ INIT_RDMA_OBJ_SIZE(ib_pd, c4iw_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_srq, c4iw_srq, ibsrq),
INIT_RDMA_OBJ_SIZE(ib_ucontext, c4iw_ucontext, ibucontext),
};
@@ -567,7 +570,9 @@ void c4iw_register_device(struct work_struct *work)
ret = set_netdevs(&dev->ibdev, &dev->rdev);
if (ret)
goto err_dealloc_ctx;
- ret = ib_register_device(&dev->ibdev, "cxgb4_%d");
+ dma_set_max_seg_size(&dev->rdev.lldi.pdev->dev, UINT_MAX);
+ ret = ib_register_device(&dev->ibdev, "cxgb4_%d",
+ &dev->rdev.lldi.pdev->dev);
if (ret)
goto err_dealloc_ctx;
return;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index cbddb20c6121..f20379e4e2ec 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -2797,7 +2797,7 @@ err_free_wr_wait:
return ret;
}
-void c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
+int c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
{
struct c4iw_dev *rhp;
struct c4iw_srq *srq;
@@ -2813,4 +2813,5 @@ void c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
srq->wr_waitp);
c4iw_free_srq_idx(&rhp->rdev, srq->idx);
c4iw_put_wr_wait(srq->wr_waitp);
+ return 0;
}
diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h
index 1889dd172a25..e5d9712e98c4 100644
--- a/drivers/infiniband/hw/efa/efa.h
+++ b/drivers/infiniband/hw/efa/efa.h
@@ -33,7 +33,8 @@ struct efa_irq {
char name[EFA_IRQNAME_SIZE];
};
-struct efa_sw_stats {
+/* Don't use anything other than atomic64 */
+struct efa_stats {
atomic64_t alloc_pd_err;
atomic64_t create_qp_err;
atomic64_t create_cq_err;
@@ -41,11 +42,6 @@ struct efa_sw_stats {
atomic64_t alloc_ucontext_err;
atomic64_t create_ah_err;
atomic64_t mmap_err;
-};
-
-/* Don't use anything other than atomic64 */
-struct efa_stats {
- struct efa_sw_stats sw_stats;
atomic64_t keep_alive_rcvd;
};
@@ -134,12 +130,12 @@ int efa_query_gid(struct ib_device *ibdev, u8 port, int index,
int efa_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
u16 *pkey);
int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
-void efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
+int efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata);
-void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
+int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct ib_udata *udata);
struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
@@ -156,7 +152,7 @@ void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry);
int efa_create_ah(struct ib_ah *ibah,
struct rdma_ah_init_attr *init_attr,
struct ib_udata *udata);
-void efa_destroy_ah(struct ib_ah *ibah, u32 flags);
+int efa_destroy_ah(struct ib_ah *ibah, u32 flags);
int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
int qp_attr_mask, struct ib_udata *udata);
enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev,
diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
index 5484b08bbc5d..b199e4ac6cf9 100644
--- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
+++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
@@ -61,6 +61,8 @@ enum efa_admin_qp_state {
enum efa_admin_get_stats_type {
EFA_ADMIN_GET_STATS_TYPE_BASIC = 0,
+ EFA_ADMIN_GET_STATS_TYPE_MESSAGES = 1,
+ EFA_ADMIN_GET_STATS_TYPE_RDMA_READ = 2,
};
enum efa_admin_get_stats_scope {
@@ -68,14 +70,6 @@ enum efa_admin_get_stats_scope {
EFA_ADMIN_GET_STATS_SCOPE_QUEUE = 1,
};
-enum efa_admin_modify_qp_mask_bits {
- EFA_ADMIN_QP_STATE_BIT = 0,
- EFA_ADMIN_CUR_QP_STATE_BIT = 1,
- EFA_ADMIN_QKEY_BIT = 2,
- EFA_ADMIN_SQ_PSN_BIT = 3,
- EFA_ADMIN_SQ_DRAINED_ASYNC_NOTIFY_BIT = 4,
-};
-
/*
* QP allocation sizes, converted by fabric QueuePair (QP) create command
* from QP capabilities.
@@ -199,8 +193,14 @@ struct efa_admin_modify_qp_cmd {
struct efa_admin_aq_common_desc aq_common_desc;
/*
- * Mask indicating which fields should be updated see enum
- * efa_admin_modify_qp_mask_bits
+ * Mask indicating which fields should be updated
+ * 0 : qp_state
+ * 1 : cur_qp_state
+ * 2 : qkey
+ * 3 : sq_psn
+ * 4 : sq_drained_async_notify
+ * 5 : rnr_retry
+ * 31:6 : reserved
*/
u32 modify_mask;
@@ -222,8 +222,8 @@ struct efa_admin_modify_qp_cmd {
/* Enable async notification when SQ is drained */
u8 sq_drained_async_notify;
- /* MBZ */
- u8 reserved1;
+ /* Number of RNR retries (valid only for SRD QPs) */
+ u8 rnr_retry;
/* MBZ */
u16 reserved2;
@@ -258,8 +258,8 @@ struct efa_admin_query_qp_resp {
/* Indicates that draining is in progress */
u8 sq_draining;
- /* MBZ */
- u8 reserved1;
+ /* Number of RNR retries (valid only for SRD QPs) */
+ u8 rnr_retry;
/* MBZ */
u16 reserved2;
@@ -530,10 +530,36 @@ struct efa_admin_basic_stats {
u64 rx_drops;
};
+struct efa_admin_messages_stats {
+ u64 send_bytes;
+
+ u64 send_wrs;
+
+ u64 recv_bytes;
+
+ u64 recv_wrs;
+};
+
+struct efa_admin_rdma_read_stats {
+ u64 read_wrs;
+
+ u64 read_bytes;
+
+ u64 read_wr_err;
+
+ u64 read_resp_bytes;
+};
+
struct efa_admin_acq_get_stats_resp {
struct efa_admin_acq_common_desc acq_common_desc;
- struct efa_admin_basic_stats basic_stats;
+ union {
+ struct efa_admin_basic_stats basic_stats;
+
+ struct efa_admin_messages_stats messages_stats;
+
+ struct efa_admin_rdma_read_stats rdma_read_stats;
+ } u;
};
struct efa_admin_get_set_feature_common_desc {
@@ -576,7 +602,9 @@ struct efa_admin_feature_device_attr_desc {
/*
* 0 : rdma_read - If set, RDMA Read is supported on
* TX queues
- * 31:1 : reserved - MBZ
+ * 1 : rnr_retry - If set, RNR retry is supported on
+ * modify QP command
+ * 31:2 : reserved - MBZ
*/
u32 device_caps;
@@ -862,6 +890,14 @@ struct efa_admin_host_info {
#define EFA_ADMIN_CREATE_QP_CMD_SQ_VIRT_MASK BIT(0)
#define EFA_ADMIN_CREATE_QP_CMD_RQ_VIRT_MASK BIT(1)
+/* modify_qp_cmd */
+#define EFA_ADMIN_MODIFY_QP_CMD_QP_STATE_MASK BIT(0)
+#define EFA_ADMIN_MODIFY_QP_CMD_CUR_QP_STATE_MASK BIT(1)
+#define EFA_ADMIN_MODIFY_QP_CMD_QKEY_MASK BIT(2)
+#define EFA_ADMIN_MODIFY_QP_CMD_SQ_PSN_MASK BIT(3)
+#define EFA_ADMIN_MODIFY_QP_CMD_SQ_DRAINED_ASYNC_NOTIFY_MASK BIT(4)
+#define EFA_ADMIN_MODIFY_QP_CMD_RNR_RETRY_MASK BIT(5)
+
/* reg_mr_cmd */
#define EFA_ADMIN_REG_MR_CMD_PHYS_PAGE_SIZE_SHIFT_MASK GENMASK(4, 0)
#define EFA_ADMIN_REG_MR_CMD_MEM_ADDR_PHY_MODE_EN_MASK BIT(7)
@@ -878,6 +914,7 @@ struct efa_admin_host_info {
/* feature_device_attr_desc */
#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK BIT(0)
+#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RNR_RETRY_MASK BIT(1)
/* host_info */
#define EFA_ADMIN_HOST_INFO_DRIVER_MODULE_TYPE_MASK GENMASK(7, 0)
diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c
index 6ac23627f65a..f752ef64159c 100644
--- a/drivers/infiniband/hw/efa/efa_com_cmd.c
+++ b/drivers/infiniband/hw/efa/efa_com_cmd.c
@@ -76,6 +76,7 @@ int efa_com_modify_qp(struct efa_com_dev *edev,
cmd.qkey = params->qkey;
cmd.sq_psn = params->sq_psn;
cmd.sq_drained_async_notify = params->sq_drained_async_notify;
+ cmd.rnr_retry = params->rnr_retry;
err = efa_com_cmd_exec(aq,
(struct efa_admin_aq_entry *)&cmd,
@@ -121,6 +122,7 @@ int efa_com_query_qp(struct efa_com_dev *edev,
result->qkey = resp.qkey;
result->sq_draining = resp.sq_draining;
result->sq_psn = resp.sq_psn;
+ result->rnr_retry = resp.rnr_retry;
return 0;
}
@@ -750,11 +752,27 @@ int efa_com_get_stats(struct efa_com_dev *edev,
return err;
}
- result->basic_stats.tx_bytes = resp.basic_stats.tx_bytes;
- result->basic_stats.tx_pkts = resp.basic_stats.tx_pkts;
- result->basic_stats.rx_bytes = resp.basic_stats.rx_bytes;
- result->basic_stats.rx_pkts = resp.basic_stats.rx_pkts;
- result->basic_stats.rx_drops = resp.basic_stats.rx_drops;
+ switch (cmd.type) {
+ case EFA_ADMIN_GET_STATS_TYPE_BASIC:
+ result->basic_stats.tx_bytes = resp.u.basic_stats.tx_bytes;
+ result->basic_stats.tx_pkts = resp.u.basic_stats.tx_pkts;
+ result->basic_stats.rx_bytes = resp.u.basic_stats.rx_bytes;
+ result->basic_stats.rx_pkts = resp.u.basic_stats.rx_pkts;
+ result->basic_stats.rx_drops = resp.u.basic_stats.rx_drops;
+ break;
+ case EFA_ADMIN_GET_STATS_TYPE_MESSAGES:
+ result->messages_stats.send_bytes = resp.u.messages_stats.send_bytes;
+ result->messages_stats.send_wrs = resp.u.messages_stats.send_wrs;
+ result->messages_stats.recv_bytes = resp.u.messages_stats.recv_bytes;
+ result->messages_stats.recv_wrs = resp.u.messages_stats.recv_wrs;
+ break;
+ case EFA_ADMIN_GET_STATS_TYPE_RDMA_READ:
+ result->rdma_read_stats.read_wrs = resp.u.rdma_read_stats.read_wrs;
+ result->rdma_read_stats.read_bytes = resp.u.rdma_read_stats.read_bytes;
+ result->rdma_read_stats.read_wr_err = resp.u.rdma_read_stats.read_wr_err;
+ result->rdma_read_stats.read_resp_bytes = resp.u.rdma_read_stats.read_resp_bytes;
+ break;
+ }
return 0;
}
diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h
index 190bac23f585..eea4ebfbe6ec 100644
--- a/drivers/infiniband/hw/efa/efa_com_cmd.h
+++ b/drivers/infiniband/hw/efa/efa_com_cmd.h
@@ -47,6 +47,7 @@ struct efa_com_modify_qp_params {
u32 qkey;
u32 sq_psn;
u8 sq_drained_async_notify;
+ u8 rnr_retry;
};
struct efa_com_query_qp_params {
@@ -58,6 +59,7 @@ struct efa_com_query_qp_result {
u32 qkey;
u32 sq_draining;
u32 sq_psn;
+ u8 rnr_retry;
};
struct efa_com_destroy_qp_params {
@@ -238,8 +240,24 @@ struct efa_com_basic_stats {
u64 rx_drops;
};
+struct efa_com_messages_stats {
+ u64 send_bytes;
+ u64 send_wrs;
+ u64 recv_bytes;
+ u64 recv_wrs;
+};
+
+struct efa_com_rdma_read_stats {
+ u64 read_wrs;
+ u64 read_bytes;
+ u64 read_wr_err;
+ u64 read_resp_bytes;
+};
+
union efa_com_get_stats_result {
struct efa_com_basic_stats basic_stats;
+ struct efa_com_messages_stats messages_stats;
+ struct efa_com_rdma_read_stats rdma_read_stats;
};
void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low);
diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c
index 92d701146320..6faed3a81e08 100644
--- a/drivers/infiniband/hw/efa/efa_main.c
+++ b/drivers/infiniband/hw/efa/efa_main.c
@@ -331,7 +331,7 @@ static int efa_ib_device_add(struct efa_dev *dev)
ib_set_device_ops(&dev->ibdev, &efa_dev_ops);
- err = ib_register_device(&dev->ibdev, "efa_%d");
+ err = ib_register_device(&dev->ibdev, "efa_%d", &pdev->dev);
if (err)
goto err_release_doorbell_bar;
@@ -418,7 +418,7 @@ static int efa_device_init(struct efa_com_dev *edev, struct pci_dev *pdev)
err);
return err;
}
-
+ dma_set_max_seg_size(&pdev->dev, UINT_MAX);
return 0;
}
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
index 9e201f169289..191e0843f090 100644
--- a/drivers/infiniband/hw/efa/efa_verbs.c
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -4,6 +4,7 @@
*/
#include <linux/vmalloc.h>
+#include <linux/log2.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_umem.h>
@@ -35,6 +36,14 @@ struct efa_user_mmap_entry {
op(EFA_RX_BYTES, "rx_bytes") \
op(EFA_RX_PKTS, "rx_pkts") \
op(EFA_RX_DROPS, "rx_drops") \
+ op(EFA_SEND_BYTES, "send_bytes") \
+ op(EFA_SEND_WRS, "send_wrs") \
+ op(EFA_RECV_BYTES, "recv_bytes") \
+ op(EFA_RECV_WRS, "recv_wrs") \
+ op(EFA_RDMA_READ_WRS, "rdma_read_wrs") \
+ op(EFA_RDMA_READ_BYTES, "rdma_read_bytes") \
+ op(EFA_RDMA_READ_WR_ERR, "rdma_read_wr_err") \
+ op(EFA_RDMA_READ_RESP_BYTES, "rdma_read_resp_bytes") \
op(EFA_SUBMITTED_CMDS, "submitted_cmds") \
op(EFA_COMPLETED_CMDS, "completed_cmds") \
op(EFA_CMDS_ERR, "cmds_err") \
@@ -142,10 +151,9 @@ to_emmap(struct rdma_user_mmap_entry *rdma_entry)
return container_of(rdma_entry, struct efa_user_mmap_entry, rdma_entry);
}
-static inline bool is_rdma_read_cap(struct efa_dev *dev)
-{
- return dev->dev_attr.device_caps & EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK;
-}
+#define EFA_DEV_CAP(dev, cap) \
+ ((dev)->dev_attr.device_caps & \
+ EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_##cap##_MASK)
#define is_reserved_cleared(reserved) \
!memchr_inv(reserved, 0, sizeof(reserved))
@@ -221,9 +229,12 @@ int efa_query_device(struct ib_device *ibdev,
resp.max_rq_wr = dev_attr->max_rq_depth;
resp.max_rdma_size = dev_attr->max_rdma_size;
- if (is_rdma_read_cap(dev))
+ if (EFA_DEV_CAP(dev, RDMA_READ))
resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_READ;
+ if (EFA_DEV_CAP(dev, RNR_RETRY))
+ resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RNR_RETRY;
+
err = ib_copy_to_udata(udata, &resp,
min(sizeof(resp), udata->outlen));
if (err) {
@@ -269,7 +280,7 @@ int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
#define EFA_QUERY_QP_SUPP_MASK \
(IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | \
- IB_QP_QKEY | IB_QP_SQ_PSN | IB_QP_CAP)
+ IB_QP_QKEY | IB_QP_SQ_PSN | IB_QP_CAP | IB_QP_RNR_RETRY)
if (qp_attr_mask & ~EFA_QUERY_QP_SUPP_MASK) {
ibdev_dbg(&dev->ibdev,
@@ -291,6 +302,7 @@ int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
qp_attr->sq_psn = result.sq_psn;
qp_attr->sq_draining = result.sq_draining;
qp_attr->port_num = 1;
+ qp_attr->rnr_retry = result.rnr_retry;
qp_attr->cap.max_send_wr = qp->max_send_wr;
qp_attr->cap.max_recv_wr = qp->max_recv_wr;
@@ -376,17 +388,18 @@ int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
err_dealloc_pd:
efa_pd_dealloc(dev, result.pdn);
err_out:
- atomic64_inc(&dev->stats.sw_stats.alloc_pd_err);
+ atomic64_inc(&dev->stats.alloc_pd_err);
return err;
}
-void efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+int efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
struct efa_dev *dev = to_edev(ibpd->device);
struct efa_pd *pd = to_epd(ibpd);
ibdev_dbg(&dev->ibdev, "Dealloc pd[%d]\n", pd->pdn);
efa_pd_dealloc(dev, pd->pdn);
+ return 0;
}
static int efa_destroy_qp_handle(struct efa_dev *dev, u32 qp_handle)
@@ -737,18 +750,130 @@ err_free_mapped:
err_free_qp:
kfree(qp);
err_out:
- atomic64_inc(&dev->stats.sw_stats.create_qp_err);
+ atomic64_inc(&dev->stats.create_qp_err);
return ERR_PTR(err);
}
+static const struct {
+ int valid;
+ enum ib_qp_attr_mask req_param;
+ enum ib_qp_attr_mask opt_param;
+} srd_qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
+ [IB_QPS_RESET] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_INIT] = {
+ .valid = 1,
+ .req_param = IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_QKEY,
+ },
+ },
+ [IB_QPS_INIT] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 },
+ [IB_QPS_INIT] = {
+ .valid = 1,
+ .opt_param = IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_QKEY,
+ },
+ [IB_QPS_RTR] = {
+ .valid = 1,
+ .opt_param = IB_QP_PKEY_INDEX |
+ IB_QP_QKEY,
+ },
+ },
+ [IB_QPS_RTR] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 },
+ [IB_QPS_RTS] = {
+ .valid = 1,
+ .req_param = IB_QP_SQ_PSN,
+ .opt_param = IB_QP_CUR_STATE |
+ IB_QP_QKEY |
+ IB_QP_RNR_RETRY,
+
+ }
+ },
+ [IB_QPS_RTS] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 },
+ [IB_QPS_RTS] = {
+ .valid = 1,
+ .opt_param = IB_QP_CUR_STATE |
+ IB_QP_QKEY,
+ },
+ [IB_QPS_SQD] = {
+ .valid = 1,
+ .opt_param = IB_QP_EN_SQD_ASYNC_NOTIFY,
+ },
+ },
+ [IB_QPS_SQD] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 },
+ [IB_QPS_RTS] = {
+ .valid = 1,
+ .opt_param = IB_QP_CUR_STATE |
+ IB_QP_QKEY,
+ },
+ [IB_QPS_SQD] = {
+ .valid = 1,
+ .opt_param = IB_QP_PKEY_INDEX |
+ IB_QP_QKEY,
+ }
+ },
+ [IB_QPS_SQE] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 },
+ [IB_QPS_RTS] = {
+ .valid = 1,
+ .opt_param = IB_QP_CUR_STATE |
+ IB_QP_QKEY,
+ }
+ },
+ [IB_QPS_ERR] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 },
+ }
+};
+
+static bool efa_modify_srd_qp_is_ok(enum ib_qp_state cur_state,
+ enum ib_qp_state next_state,
+ enum ib_qp_attr_mask mask)
+{
+ enum ib_qp_attr_mask req_param, opt_param;
+
+ if (mask & IB_QP_CUR_STATE &&
+ cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS &&
+ cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE)
+ return false;
+
+ if (!srd_qp_state_table[cur_state][next_state].valid)
+ return false;
+
+ req_param = srd_qp_state_table[cur_state][next_state].req_param;
+ opt_param = srd_qp_state_table[cur_state][next_state].opt_param;
+
+ if ((mask & req_param) != req_param)
+ return false;
+
+ if (mask & ~(req_param | opt_param | IB_QP_STATE))
+ return false;
+
+ return true;
+}
+
static int efa_modify_qp_validate(struct efa_dev *dev, struct efa_qp *qp,
struct ib_qp_attr *qp_attr, int qp_attr_mask,
enum ib_qp_state cur_state,
enum ib_qp_state new_state)
{
+ int err;
+
#define EFA_MODIFY_QP_SUPP_MASK \
(IB_QP_STATE | IB_QP_CUR_STATE | IB_QP_EN_SQD_ASYNC_NOTIFY | \
- IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY | IB_QP_SQ_PSN)
+ IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY | IB_QP_SQ_PSN | \
+ IB_QP_RNR_RETRY)
if (qp_attr_mask & ~EFA_MODIFY_QP_SUPP_MASK) {
ibdev_dbg(&dev->ibdev,
@@ -757,8 +882,14 @@ static int efa_modify_qp_validate(struct efa_dev *dev, struct efa_qp *qp,
return -EOPNOTSUPP;
}
- if (!ib_modify_qp_is_ok(cur_state, new_state, IB_QPT_UD,
- qp_attr_mask)) {
+ if (qp->ibqp.qp_type == IB_QPT_DRIVER)
+ err = !efa_modify_srd_qp_is_ok(cur_state, new_state,
+ qp_attr_mask);
+ else
+ err = !ib_modify_qp_is_ok(cur_state, new_state, IB_QPT_UD,
+ qp_attr_mask);
+
+ if (err) {
ibdev_dbg(&dev->ibdev, "Invalid modify QP parameters\n");
return -EINVAL;
}
@@ -805,28 +936,36 @@ int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
params.qp_handle = qp->qp_handle;
if (qp_attr_mask & IB_QP_STATE) {
- params.modify_mask |= BIT(EFA_ADMIN_QP_STATE_BIT) |
- BIT(EFA_ADMIN_CUR_QP_STATE_BIT);
+ EFA_SET(&params.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_QP_STATE,
+ 1);
+ EFA_SET(&params.modify_mask,
+ EFA_ADMIN_MODIFY_QP_CMD_CUR_QP_STATE, 1);
params.cur_qp_state = qp_attr->cur_qp_state;
params.qp_state = qp_attr->qp_state;
}
if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) {
- params.modify_mask |=
- BIT(EFA_ADMIN_SQ_DRAINED_ASYNC_NOTIFY_BIT);
+ EFA_SET(&params.modify_mask,
+ EFA_ADMIN_MODIFY_QP_CMD_SQ_DRAINED_ASYNC_NOTIFY, 1);
params.sq_drained_async_notify = qp_attr->en_sqd_async_notify;
}
if (qp_attr_mask & IB_QP_QKEY) {
- params.modify_mask |= BIT(EFA_ADMIN_QKEY_BIT);
+ EFA_SET(&params.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_QKEY, 1);
params.qkey = qp_attr->qkey;
}
if (qp_attr_mask & IB_QP_SQ_PSN) {
- params.modify_mask |= BIT(EFA_ADMIN_SQ_PSN_BIT);
+ EFA_SET(&params.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_SQ_PSN, 1);
params.sq_psn = qp_attr->sq_psn;
}
+ if (qp_attr_mask & IB_QP_RNR_RETRY) {
+ EFA_SET(&params.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_RNR_RETRY,
+ 1);
+ params.rnr_retry = qp_attr->rnr_retry;
+ }
+
err = efa_com_modify_qp(&dev->edev, &params);
if (err)
return err;
@@ -843,7 +982,7 @@ static int efa_destroy_cq_idx(struct efa_dev *dev, int cq_idx)
return efa_com_destroy_cq(&dev->edev, &params);
}
-void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
{
struct efa_dev *dev = to_edev(ibcq->device);
struct efa_cq *cq = to_ecq(ibcq);
@@ -856,6 +995,7 @@ void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
efa_destroy_cq_idx(dev, cq->cq_idx);
efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size,
DMA_FROM_DEVICE);
+ return 0;
}
static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq,
@@ -996,7 +1136,7 @@ err_free_mapped:
DMA_FROM_DEVICE);
err_out:
- atomic64_inc(&dev->stats.sw_stats.create_cq_err);
+ atomic64_inc(&dev->stats.create_cq_err);
return err;
}
@@ -1013,8 +1153,7 @@ static int umem_to_page_list(struct efa_dev *dev,
ibdev_dbg(&dev->ibdev, "hp_cnt[%u], pages_in_hp[%u]\n",
hp_cnt, pages_in_hp);
- rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap,
- BIT(hp_shift))
+ rdma_umem_for_each_dma_block(umem, &biter, BIT(hp_shift))
page_list[hp_idx++] = rdma_block_iter_dma_address(&biter);
return 0;
@@ -1026,7 +1165,7 @@ static struct scatterlist *efa_vmalloc_buf_to_sg(u64 *buf, int page_cnt)
struct page *pg;
int i;
- sglist = kcalloc(page_cnt, sizeof(*sglist), GFP_KERNEL);
+ sglist = kmalloc_array(page_cnt, sizeof(*sglist), GFP_KERNEL);
if (!sglist)
return NULL;
sg_init_table(sglist, page_cnt);
@@ -1370,7 +1509,7 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
supp_access_flags =
IB_ACCESS_LOCAL_WRITE |
- (is_rdma_read_cap(dev) ? IB_ACCESS_REMOTE_READ : 0);
+ (EFA_DEV_CAP(dev, RDMA_READ) ? IB_ACCESS_REMOTE_READ : 0);
access_flags &= ~IB_ACCESS_OPTIONAL;
if (access_flags & ~supp_access_flags) {
@@ -1410,9 +1549,8 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
goto err_unmap;
}
- params.page_shift = __ffs(pg_sz);
- params.page_num = DIV_ROUND_UP(length + (start & (pg_sz - 1)),
- pg_sz);
+ params.page_shift = order_base_2(pg_sz);
+ params.page_num = ib_umem_num_dma_blocks(mr->umem, pg_sz);
ibdev_dbg(&dev->ibdev,
"start %#llx length %#llx params.page_shift %u params.page_num %u\n",
@@ -1451,7 +1589,7 @@ err_unmap:
err_free:
kfree(mr);
err_out:
- atomic64_inc(&dev->stats.sw_stats.reg_mr_err);
+ atomic64_inc(&dev->stats.reg_mr_err);
return ERR_PTR(err);
}
@@ -1569,19 +1707,17 @@ int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata)
resp.max_tx_batch = dev->dev_attr.max_tx_batch;
resp.min_sq_wr = dev->dev_attr.min_sq_depth;
- if (udata && udata->outlen) {
- err = ib_copy_to_udata(udata, &resp,
- min(sizeof(resp), udata->outlen));
- if (err)
- goto err_dealloc_uar;
- }
+ err = ib_copy_to_udata(udata, &resp,
+ min(sizeof(resp), udata->outlen));
+ if (err)
+ goto err_dealloc_uar;
return 0;
err_dealloc_uar:
efa_dealloc_uar(dev, result.uarn);
err_out:
- atomic64_inc(&dev->stats.sw_stats.alloc_ucontext_err);
+ atomic64_inc(&dev->stats.alloc_ucontext_err);
return err;
}
@@ -1614,7 +1750,7 @@ static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext,
ibdev_dbg(&dev->ibdev,
"pgoff[%#lx] does not have valid entry\n",
vma->vm_pgoff);
- atomic64_inc(&dev->stats.sw_stats.mmap_err);
+ atomic64_inc(&dev->stats.mmap_err);
return -EINVAL;
}
entry = to_emmap(rdma_entry);
@@ -1656,7 +1792,7 @@ static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext,
"Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n",
entry->address, rdma_entry->npages * PAGE_SIZE,
entry->mmap_flag, err);
- atomic64_inc(&dev->stats.sw_stats.mmap_err);
+ atomic64_inc(&dev->stats.mmap_err);
}
rdma_user_mmap_entry_put(rdma_entry);
@@ -1741,11 +1877,11 @@ int efa_create_ah(struct ib_ah *ibah,
err_destroy_ah:
efa_ah_destroy(dev, ah);
err_out:
- atomic64_inc(&dev->stats.sw_stats.create_ah_err);
+ atomic64_inc(&dev->stats.create_ah_err);
return err;
}
-void efa_destroy_ah(struct ib_ah *ibah, u32 flags)
+int efa_destroy_ah(struct ib_ah *ibah, u32 flags)
{
struct efa_dev *dev = to_edev(ibah->pd->device);
struct efa_ah *ah = to_eah(ibah);
@@ -1755,10 +1891,11 @@ void efa_destroy_ah(struct ib_ah *ibah, u32 flags)
if (!(flags & RDMA_DESTROY_AH_SLEEPABLE)) {
ibdev_dbg(&dev->ibdev,
"Destroy address handle is not supported in atomic context\n");
- return;
+ return -EOPNOTSUPP;
}
efa_ah_destroy(dev, ah);
+ return 0;
}
struct rdma_hw_stats *efa_alloc_hw_stats(struct ib_device *ibdev, u8 port_num)
@@ -1774,13 +1911,15 @@ int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
struct efa_com_get_stats_params params = {};
union efa_com_get_stats_result result;
struct efa_dev *dev = to_edev(ibdev);
+ struct efa_com_rdma_read_stats *rrs;
+ struct efa_com_messages_stats *ms;
struct efa_com_basic_stats *bs;
struct efa_com_stats_admin *as;
struct efa_stats *s;
int err;
- params.type = EFA_ADMIN_GET_STATS_TYPE_BASIC;
params.scope = EFA_ADMIN_GET_STATS_SCOPE_ALL;
+ params.type = EFA_ADMIN_GET_STATS_TYPE_BASIC;
err = efa_com_get_stats(&dev->edev, &params, &result);
if (err)
@@ -1793,6 +1932,28 @@ int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
stats->value[EFA_RX_PKTS] = bs->rx_pkts;
stats->value[EFA_RX_DROPS] = bs->rx_drops;
+ params.type = EFA_ADMIN_GET_STATS_TYPE_MESSAGES;
+ err = efa_com_get_stats(&dev->edev, &params, &result);
+ if (err)
+ return err;
+
+ ms = &result.messages_stats;
+ stats->value[EFA_SEND_BYTES] = ms->send_bytes;
+ stats->value[EFA_SEND_WRS] = ms->send_wrs;
+ stats->value[EFA_RECV_BYTES] = ms->recv_bytes;
+ stats->value[EFA_RECV_WRS] = ms->recv_wrs;
+
+ params.type = EFA_ADMIN_GET_STATS_TYPE_RDMA_READ;
+ err = efa_com_get_stats(&dev->edev, &params, &result);
+ if (err)
+ return err;
+
+ rrs = &result.rdma_read_stats;
+ stats->value[EFA_RDMA_READ_WRS] = rrs->read_wrs;
+ stats->value[EFA_RDMA_READ_BYTES] = rrs->read_bytes;
+ stats->value[EFA_RDMA_READ_WR_ERR] = rrs->read_wr_err;
+ stats->value[EFA_RDMA_READ_RESP_BYTES] = rrs->read_resp_bytes;
+
as = &dev->edev.aq.stats;
stats->value[EFA_SUBMITTED_CMDS] = atomic64_read(&as->submitted_cmd);
stats->value[EFA_COMPLETED_CMDS] = atomic64_read(&as->completed_cmd);
@@ -1801,13 +1962,14 @@ int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
s = &dev->stats;
stats->value[EFA_KEEP_ALIVE_RCVD] = atomic64_read(&s->keep_alive_rcvd);
- stats->value[EFA_ALLOC_PD_ERR] = atomic64_read(&s->sw_stats.alloc_pd_err);
- stats->value[EFA_CREATE_QP_ERR] = atomic64_read(&s->sw_stats.create_qp_err);
- stats->value[EFA_CREATE_CQ_ERR] = atomic64_read(&s->sw_stats.create_cq_err);
- stats->value[EFA_REG_MR_ERR] = atomic64_read(&s->sw_stats.reg_mr_err);
- stats->value[EFA_ALLOC_UCONTEXT_ERR] = atomic64_read(&s->sw_stats.alloc_ucontext_err);
- stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->sw_stats.create_ah_err);
- stats->value[EFA_MMAP_ERR] = atomic64_read(&s->sw_stats.mmap_err);
+ stats->value[EFA_ALLOC_PD_ERR] = atomic64_read(&s->alloc_pd_err);
+ stats->value[EFA_CREATE_QP_ERR] = atomic64_read(&s->create_qp_err);
+ stats->value[EFA_CREATE_CQ_ERR] = atomic64_read(&s->create_cq_err);
+ stats->value[EFA_REG_MR_ERR] = atomic64_read(&s->reg_mr_err);
+ stats->value[EFA_ALLOC_UCONTEXT_ERR] =
+ atomic64_read(&s->alloc_ucontext_err);
+ stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->create_ah_err);
+ stats->value[EFA_MMAP_ERR] = atomic64_read(&s->mmap_err);
return ARRAY_SIZE(efa_stats_names);
}
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 04575c9afd61..a307d4c8b15a 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -232,11 +232,11 @@ static const struct sdma_set_state_action sdma_action_table[] = {
static void sdma_complete(struct kref *);
static void sdma_finalput(struct sdma_state *);
static void sdma_get(struct sdma_state *);
-static void sdma_hw_clean_up_task(unsigned long);
+static void sdma_hw_clean_up_task(struct tasklet_struct *);
static void sdma_put(struct sdma_state *);
static void sdma_set_state(struct sdma_engine *, enum sdma_states);
static void sdma_start_hw_clean_up(struct sdma_engine *);
-static void sdma_sw_clean_up_task(unsigned long);
+static void sdma_sw_clean_up_task(struct tasklet_struct *);
static void sdma_sendctrl(struct sdma_engine *, unsigned);
static void init_sdma_regs(struct sdma_engine *, u32, uint);
static void sdma_process_event(
@@ -545,9 +545,10 @@ static void sdma_err_progress_check(struct timer_list *t)
schedule_work(&sde->err_halt_worker);
}
-static void sdma_hw_clean_up_task(unsigned long opaque)
+static void sdma_hw_clean_up_task(struct tasklet_struct *t)
{
- struct sdma_engine *sde = (struct sdma_engine *)opaque;
+ struct sdma_engine *sde = from_tasklet(sde, t,
+ sdma_hw_clean_up_task);
u64 statuscsr;
while (1) {
@@ -604,9 +605,9 @@ static void sdma_flush_descq(struct sdma_engine *sde)
sdma_desc_avail(sde, sdma_descq_freecnt(sde));
}
-static void sdma_sw_clean_up_task(unsigned long opaque)
+static void sdma_sw_clean_up_task(struct tasklet_struct *t)
{
- struct sdma_engine *sde = (struct sdma_engine *)opaque;
+ struct sdma_engine *sde = from_tasklet(sde, t, sdma_sw_clean_up_task);
unsigned long flags;
spin_lock_irqsave(&sde->tail_lock, flags);
@@ -1454,11 +1455,10 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
sde->tail_csr =
get_kctxt_csr_addr(dd, this_idx, SD(TAIL));
- tasklet_init(&sde->sdma_hw_clean_up_task, sdma_hw_clean_up_task,
- (unsigned long)sde);
-
- tasklet_init(&sde->sdma_sw_clean_up_task, sdma_sw_clean_up_task,
- (unsigned long)sde);
+ tasklet_setup(&sde->sdma_hw_clean_up_task,
+ sdma_hw_clean_up_task);
+ tasklet_setup(&sde->sdma_sw_clean_up_task,
+ sdma_sw_clean_up_task);
INIT_WORK(&sde->err_halt_worker, sdma_err_halt_wait);
INIT_WORK(&sde->flush_worker, sdma_field_flush);
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 30865635b449..3591923abebb 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1424,7 +1424,7 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num,
props->gid_tbl_len = HFI1_GUIDS_PER_PORT;
props->active_width = (u8)opa_width_to_ib(ppd->link_width_active);
/* see rate_show() in ib core/sysfs.c */
- props->active_speed = (u8)opa_speed_to_ib(ppd->link_speed_active);
+ props->active_speed = opa_speed_to_ib(ppd->link_speed_active);
props->max_vl_num = ppd->vls_supported;
/* Once we are a "first class" citizen and have added the OPA MTUs to
diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c
index 5b2f9314edd3..75b06db60f7c 100644
--- a/drivers/infiniband/hw/hns/hns_roce_ah.c
+++ b/drivers/infiniband/hw/hns/hns_roce_ah.c
@@ -39,6 +39,22 @@
#define HNS_ROCE_VLAN_SL_BIT_MASK 7
#define HNS_ROCE_VLAN_SL_SHIFT 13
+static inline u16 get_ah_udp_sport(const struct rdma_ah_attr *ah_attr)
+{
+ u32 fl = ah_attr->grh.flow_label;
+ u16 sport;
+
+ if (!fl)
+ sport = get_random_u32() %
+ (IB_ROCE_UDP_ENCAP_VALID_PORT_MAX + 1 -
+ IB_ROCE_UDP_ENCAP_VALID_PORT_MIN) +
+ IB_ROCE_UDP_ENCAP_VALID_PORT_MIN;
+ else
+ sport = rdma_flow_label_to_udp_sport(fl);
+
+ return sport;
+}
+
int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
struct ib_udata *udata)
{
@@ -79,6 +95,8 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE);
ah->av.sl = rdma_ah_get_sl(ah_attr);
+ ah->av.flowlabel = grh->flow_label;
+ ah->av.udp_sport = get_ah_udp_sport(ah_attr);
return 0;
}
@@ -98,8 +116,3 @@ int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
return 0;
}
-
-void hns_roce_destroy_ah(struct ib_ah *ah, u32 flags)
-{
- return;
-}
diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c
index a522cb2d29ea..a6b23dec1adc 100644
--- a/drivers/infiniband/hw/hns/hns_roce_alloc.c
+++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c
@@ -268,8 +268,7 @@ int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
}
/* convert system page cnt to hw page cnt */
- rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap,
- 1 << page_shift) {
+ rdma_umem_for_each_dma_block(umem, &biter, 1 << page_shift) {
addr = rdma_block_iter_dma_address(&biter);
if (idx >= start) {
bufs[total++] = addr;
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index e87d616f7988..809b22aa5056 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -150,7 +150,7 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
int err;
buf_attr.page_shift = hr_dev->caps.cqe_buf_pg_sz + HNS_HW_PAGE_SHIFT;
- buf_attr.region[0].size = hr_cq->cq_depth * hr_dev->caps.cq_entry_sz;
+ buf_attr.region[0].size = hr_cq->cq_depth * hr_cq->cqe_size;
buf_attr.region[0].hopnum = hr_dev->caps.cqe_hop_num;
buf_attr.region_count = 1;
buf_attr.fixed_page = true;
@@ -224,6 +224,21 @@ static void free_cq_db(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
}
}
+static void set_cqe_size(struct hns_roce_cq *hr_cq, struct ib_udata *udata,
+ struct hns_roce_ib_create_cq *ucmd)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(hr_cq->ib_cq.device);
+
+ if (udata) {
+ if (udata->inlen >= offsetofend(typeof(*ucmd), cqe_size))
+ hr_cq->cqe_size = ucmd->cqe_size;
+ else
+ hr_cq->cqe_size = HNS_ROCE_V2_CQE_SIZE;
+ } else {
+ hr_cq->cqe_size = hr_dev->caps.cqe_sz;
+ }
+}
+
int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
struct ib_udata *udata)
{
@@ -258,7 +273,8 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
INIT_LIST_HEAD(&hr_cq->rq_list);
if (udata) {
- ret = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
+ ret = ib_copy_from_udata(&ucmd, udata,
+ min(sizeof(ucmd), udata->inlen));
if (ret) {
ibdev_err(ibdev, "Failed to copy CQ udata, err %d\n",
ret);
@@ -266,6 +282,8 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
}
}
+ set_cqe_size(hr_cq, udata, &ucmd);
+
ret = alloc_cq_buf(hr_dev, hr_cq, udata, ucmd.buf_addr);
if (ret) {
ibdev_err(ibdev, "Failed to alloc CQ buf, err %d\n", ret);
@@ -287,7 +305,7 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
/*
* For the QP created by kernel space, tptr value should be initialized
* to zero; For the QP created by user space, it will cause synchronous
- * problems if tptr is set to zero here, so we initialze it in user
+ * problems if tptr is set to zero here, so we initialize it in user
* space.
*/
if (!udata && hr_cq->tptr_addr)
@@ -311,7 +329,7 @@ err_cq_buf:
return ret;
}
-void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
+int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
@@ -322,6 +340,7 @@ void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
free_cq_buf(hr_dev, hr_cq);
free_cq_db(hr_dev, hr_cq, udata);
free_cqc(hr_dev, hr_cq);
+ return 0;
}
void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 6edcbdcd8f43..6d2acff69f98 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -37,8 +37,8 @@
#define DRV_NAME "hns_roce"
-/* hip08 is a pci device */
#define PCI_REVISION_ID_HIP08 0x21
+#define PCI_REVISION_ID_HIP09 0x30
#define HNS_ROCE_HW_VER1 ('h' << 24 | 'i' << 16 | '0' << 8 | '6')
@@ -57,7 +57,6 @@
/* Hardware specification only for v1 engine */
#define HNS_ROCE_MAX_INNER_MTPT_NUM 0x7
#define HNS_ROCE_MAX_MTPT_PBL_NUM 0x100000
-#define HNS_ROCE_MAX_SGE_NUM 2
#define HNS_ROCE_EACH_FREE_CQ_WAIT_MSECS 20
#define HNS_ROCE_MAX_FREE_CQ_WAIT_CNT \
@@ -76,15 +75,18 @@
#define HNS_ROCE_CEQ 0
#define HNS_ROCE_AEQ 1
-#define HNS_ROCE_CEQ_ENTRY_SIZE 0x4
-#define HNS_ROCE_AEQ_ENTRY_SIZE 0x10
+#define HNS_ROCE_CEQE_SIZE 0x4
+#define HNS_ROCE_AEQE_SIZE 0x10
-#define HNS_ROCE_SL_SHIFT 28
-#define HNS_ROCE_TCLASS_SHIFT 20
-#define HNS_ROCE_FLOW_LABEL_MASK 0xfffff
+#define HNS_ROCE_V3_EQE_SIZE 0x40
+
+#define HNS_ROCE_V2_CQE_SIZE 32
+#define HNS_ROCE_V3_CQE_SIZE 64
+
+#define HNS_ROCE_V2_QPC_SZ 256
+#define HNS_ROCE_V3_QPC_SZ 512
#define HNS_ROCE_MAX_PORTS 6
-#define HNS_ROCE_MAX_GID_NUM 16
#define HNS_ROCE_GID_SIZE 16
#define HNS_ROCE_SGE_SIZE 16
@@ -112,8 +114,6 @@
#define PAGES_SHIFT_24 24
#define PAGES_SHIFT_32 32
-#define HNS_ROCE_PCI_BAR_NUM 2
-
#define HNS_ROCE_IDX_QUE_ENTRY_SZ 4
#define SRQ_DB_REG 0x230
@@ -467,6 +467,7 @@ struct hns_roce_cq {
void __iomem *cq_db_l;
u16 *tptr_addr;
int arm_sn;
+ int cqe_size;
unsigned long cqn;
u32 vector;
atomic_t refcount;
@@ -535,17 +536,18 @@ struct hns_roce_raq_table {
};
struct hns_roce_av {
- u8 port;
- u8 gid_index;
- u8 stat_rate;
- u8 hop_limit;
- u32 flowlabel;
- u8 sl;
- u8 tclass;
- u8 dgid[HNS_ROCE_GID_SIZE];
- u8 mac[ETH_ALEN];
- u16 vlan_id;
- bool vlan_en;
+ u8 port;
+ u8 gid_index;
+ u8 stat_rate;
+ u8 hop_limit;
+ u32 flowlabel;
+ u16 udp_sport;
+ u8 sl;
+ u8 tclass;
+ u8 dgid[HNS_ROCE_GID_SIZE];
+ u8 mac[ETH_ALEN];
+ u16 vlan_id;
+ bool vlan_en;
};
struct hns_roce_ah {
@@ -655,6 +657,8 @@ struct hns_roce_qp {
struct hns_roce_sge sge;
u32 next_sge;
+ enum ib_mtu path_mtu;
+ u32 max_inline_data;
/* 0: flush needed, 1: unneeded */
unsigned long flush_flag;
@@ -678,7 +682,8 @@ enum {
};
struct hns_roce_ceqe {
- __le32 comp;
+ __le32 comp;
+ __le32 rsv[15];
};
struct hns_roce_aeqe {
@@ -715,6 +720,7 @@ struct hns_roce_aeqe {
u8 rsv0;
} __packed cmd;
} event;
+ __le32 rsv[12];
};
struct hns_roce_eq {
@@ -791,15 +797,15 @@ struct hns_roce_caps {
int num_pds;
int reserved_pds;
u32 mtt_entry_sz;
- u32 cq_entry_sz;
+ u32 cqe_sz;
u32 page_size_cap;
u32 reserved_lkey;
int mtpt_entry_sz;
- int qpc_entry_sz;
+ int qpc_sz;
int irrl_entry_sz;
int trrl_entry_sz;
int cqc_entry_sz;
- int sccc_entry_sz;
+ int sccc_sz;
int qpc_timer_entry_sz;
int cqc_timer_entry_sz;
int srqc_entry_sz;
@@ -809,6 +815,8 @@ struct hns_roce_caps {
u32 pbl_hop_num;
int aeqe_depth;
int ceqe_depth;
+ u32 aeqe_size;
+ u32 ceqe_size;
enum ib_mtu max_mtu;
u32 qpc_bt_num;
u32 qpc_timer_bt_num;
@@ -930,7 +938,7 @@ struct hns_roce_hw {
int (*poll_cq)(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int (*dereg_mr)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr,
struct ib_udata *udata);
- void (*destroy_cq)(struct ib_cq *ibcq, struct ib_udata *udata);
+ int (*destroy_cq)(struct ib_cq *ibcq, struct ib_udata *udata);
int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period);
int (*init_eq)(struct hns_roce_dev *hr_dev);
void (*cleanup_eq)(struct hns_roce_dev *hr_dev);
@@ -1178,10 +1186,13 @@ void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap,
int hns_roce_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
struct ib_udata *udata);
int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
-void hns_roce_destroy_ah(struct ib_ah *ah, u32 flags);
+static inline int hns_roce_destroy_ah(struct ib_ah *ah, u32 flags)
+{
+ return 0;
+}
int hns_roce_alloc_pd(struct ib_pd *pd, struct ib_udata *udata);
-void hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
+int hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc);
struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
@@ -1200,8 +1211,7 @@ int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev,
unsigned long mpt_index);
unsigned long key_to_hw_index(u32 key);
-struct ib_mw *hns_roce_alloc_mw(struct ib_pd *pd, enum ib_mw_type,
- struct ib_udata *udata);
+int hns_roce_alloc_mw(struct ib_mw *mw, struct ib_udata *udata);
int hns_roce_dealloc_mw(struct ib_mw *ibmw);
void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf);
@@ -1220,7 +1230,7 @@ int hns_roce_create_srq(struct ib_srq *srq,
int hns_roce_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr,
enum ib_srq_attr_mask srq_attr_mask,
struct ib_udata *udata);
-void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata);
+int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata);
struct ib_qp *hns_roce_create_qp(struct ib_pd *ib_pd,
struct ib_qp_init_attr *init_attr,
@@ -1247,7 +1257,7 @@ int to_hr_qp_type(int qp_type);
int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
struct ib_udata *udata);
-void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
+int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
int hns_roce_db_map_user(struct hns_roce_ucontext *context,
struct ib_udata *udata, unsigned long virt,
struct hns_roce_db *db);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
index c8db6f8ae018..7487cf3d2c37 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -338,8 +338,8 @@ static int hns_roce_set_hem(struct hns_roce_dev *hr_dev,
void __iomem *bt_cmd;
__le32 bt_cmd_val[2];
__le32 bt_cmd_h = 0;
- __le32 bt_cmd_l = 0;
- u64 bt_ba = 0;
+ __le32 bt_cmd_l;
+ u64 bt_ba;
int ret = 0;
/* Find the HEM(Hardware Entry Memory) entry */
@@ -1027,7 +1027,7 @@ void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev)
if (hr_dev->caps.cqc_timer_entry_sz)
hns_roce_cleanup_hem_table(hr_dev,
&hr_dev->cqc_timer_table);
- if (hr_dev->caps.sccc_entry_sz)
+ if (hr_dev->caps.sccc_sz)
hns_roce_cleanup_hem_table(hr_dev,
&hr_dev->qp_table.sccc_table);
if (hr_dev->caps.trrl_entry_sz)
@@ -1404,7 +1404,7 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev,
{
const struct hns_roce_buf_region *r;
int ofs, end;
- int ret = 0;
+ int ret;
int unit;
int i;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index aeb3a6fa7d47..5f4d8a32ed6d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -70,15 +70,15 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp,
struct hns_roce_qp *qp = to_hr_qp(ibqp);
struct device *dev = &hr_dev->pdev->dev;
struct hns_roce_sq_db sq_db = {};
- int ps_opcode = 0, i = 0;
+ int ps_opcode, i;
unsigned long flags = 0;
void *wqe = NULL;
__le32 doorbell[2];
- u32 wqe_idx = 0;
- int nreq = 0;
int ret = 0;
- u8 *smac;
int loopback;
+ u32 wqe_idx;
+ int nreq;
+ u8 *smac;
if (unlikely(ibqp->qp_type != IB_QPT_GSI &&
ibqp->qp_type != IB_QPT_RC)) {
@@ -271,7 +271,6 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp,
ps_opcode = HNS_ROCE_WQE_OPCODE_SEND;
break;
case IB_WR_LOCAL_INV:
- break;
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
case IB_WR_LSO:
@@ -888,7 +887,7 @@ static int hns_roce_db_init(struct hns_roce_dev *hr_dev)
u32 odb_ext_mod;
u32 sdb_evt_mod;
u32 odb_evt_mod;
- int ret = 0;
+ int ret;
memset(db, 0, sizeof(*db));
@@ -1148,8 +1147,8 @@ static int hns_roce_raq_init(struct hns_roce_dev *hr_dev)
struct hns_roce_v1_priv *priv = hr_dev->priv;
struct hns_roce_raq_table *raq = &priv->raq_table;
struct device *dev = &hr_dev->pdev->dev;
- int raq_shift = 0;
dma_addr_t addr;
+ int raq_shift;
__le32 tmp;
u32 val;
int ret;
@@ -1360,7 +1359,7 @@ static int hns_roce_free_mr_init(struct hns_roce_dev *hr_dev)
struct hns_roce_v1_priv *priv = hr_dev->priv;
struct hns_roce_free_mr *free_mr = &priv->free_mr;
struct device *dev = &hr_dev->pdev->dev;
- int ret = 0;
+ int ret;
free_mr->free_mr_wq = create_singlethread_workqueue("hns_roce_free_mr");
if (!free_mr->free_mr_wq) {
@@ -1440,8 +1439,8 @@ static int hns_roce_v1_reset(struct hns_roce_dev *hr_dev, bool dereset)
static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev)
{
- int i = 0;
struct hns_roce_caps *caps = &hr_dev->caps;
+ int i;
hr_dev->vendor_id = roce_read(hr_dev, ROCEE_VENDOR_ID_REG);
hr_dev->vendor_part_id = roce_read(hr_dev, ROCEE_VENDOR_PART_ID_REG);
@@ -1471,12 +1470,12 @@ static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev)
caps->max_qp_dest_rdma = HNS_ROCE_V1_MAX_QP_DEST_RDMA;
caps->max_sq_desc_sz = HNS_ROCE_V1_MAX_SQ_DESC_SZ;
caps->max_rq_desc_sz = HNS_ROCE_V1_MAX_RQ_DESC_SZ;
- caps->qpc_entry_sz = HNS_ROCE_V1_QPC_ENTRY_SIZE;
+ caps->qpc_sz = HNS_ROCE_V1_QPC_SIZE;
caps->irrl_entry_sz = HNS_ROCE_V1_IRRL_ENTRY_SIZE;
caps->cqc_entry_sz = HNS_ROCE_V1_CQC_ENTRY_SIZE;
caps->mtpt_entry_sz = HNS_ROCE_V1_MTPT_ENTRY_SIZE;
caps->mtt_entry_sz = HNS_ROCE_V1_MTT_ENTRY_SIZE;
- caps->cq_entry_sz = HNS_ROCE_V1_CQE_ENTRY_SIZE;
+ caps->cqe_sz = HNS_ROCE_V1_CQE_SIZE;
caps->page_size_cap = HNS_ROCE_V1_PAGE_SIZE_SUPPORT;
caps->reserved_lkey = 0;
caps->reserved_pds = 0;
@@ -1643,7 +1642,7 @@ static int hns_roce_v1_chk_mbox(struct hns_roce_dev *hr_dev,
unsigned long timeout)
{
u8 __iomem *hcr = hr_dev->reg_base + ROCEE_MB1_REG;
- unsigned long end = 0;
+ unsigned long end;
u32 status = 0;
end = msecs_to_jiffies(timeout) + jiffies;
@@ -1671,7 +1670,7 @@ static int hns_roce_v1_set_gid(struct hns_roce_dev *hr_dev, u8 port,
{
unsigned long flags;
u32 *p = NULL;
- u8 gid_idx = 0;
+ u8 gid_idx;
gid_idx = hns_get_gid_index(hr_dev, port, gid_index);
@@ -1897,8 +1896,7 @@ static int hns_roce_v1_write_mtpt(struct hns_roce_dev *hr_dev, void *mb_buf,
static void *get_cqe(struct hns_roce_cq *hr_cq, int n)
{
- return hns_roce_buf_offset(hr_cq->mtr.kmem,
- n * HNS_ROCE_V1_CQE_ENTRY_SIZE);
+ return hns_roce_buf_offset(hr_cq->mtr.kmem, n * HNS_ROCE_V1_CQE_SIZE);
}
static void *get_sw_cqe(struct hns_roce_cq *hr_cq, int n)
@@ -2445,7 +2443,7 @@ static int hns_roce_v1_qp_modify(struct hns_roce_dev *hr_dev,
struct hns_roce_cmd_mailbox *mailbox;
struct device *dev = &hr_dev->pdev->dev;
- int ret = 0;
+ int ret;
if (cur_state >= HNS_ROCE_QP_NUM_STATE ||
new_state >= HNS_ROCE_QP_NUM_STATE ||
@@ -3394,7 +3392,7 @@ static int hns_roce_v1_q_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
struct device *dev = &hr_dev->pdev->dev;
struct hns_roce_qp_context *context;
- int tmp_qp_state = 0;
+ int tmp_qp_state;
int ret = 0;
int state;
@@ -3572,7 +3570,7 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
return 0;
}
-static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+static int hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibcq->device);
struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
@@ -3603,6 +3601,7 @@ static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
}
wait_time++;
}
+ return 0;
}
static void set_eq_cons_index_v1(struct hns_roce_eq *eq, int req_not)
@@ -3775,8 +3774,7 @@ static void hns_roce_v1_db_overflow_handle(struct hns_roce_dev *hr_dev,
static struct hns_roce_aeqe *get_aeqe_v1(struct hns_roce_eq *eq, u32 entry)
{
- unsigned long off = (entry & (eq->entries - 1)) *
- HNS_ROCE_AEQ_ENTRY_SIZE;
+ unsigned long off = (entry & (eq->entries - 1)) * HNS_ROCE_AEQE_SIZE;
return (struct hns_roce_aeqe *)((u8 *)
(eq->buf_list[off / HNS_ROCE_BA_SIZE].buf) +
@@ -3881,8 +3879,7 @@ static int hns_roce_v1_aeq_int(struct hns_roce_dev *hr_dev,
static struct hns_roce_ceqe *get_ceqe_v1(struct hns_roce_eq *eq, u32 entry)
{
- unsigned long off = (entry & (eq->entries - 1)) *
- HNS_ROCE_CEQ_ENTRY_SIZE;
+ unsigned long off = (entry & (eq->entries - 1)) * HNS_ROCE_CEQE_SIZE;
return (struct hns_roce_ceqe *)((u8 *)
(eq->buf_list[off / HNS_ROCE_BA_SIZE].buf) +
@@ -3934,7 +3931,7 @@ static irqreturn_t hns_roce_v1_msix_interrupt_eq(int irq, void *eq_ptr)
{
struct hns_roce_eq *eq = eq_ptr;
struct hns_roce_dev *hr_dev = eq->hr_dev;
- int int_work = 0;
+ int int_work;
if (eq->type_flag == HNS_ROCE_CEQ)
/* CEQ irq routine, CEQ is pulse irq, not clear */
@@ -4132,9 +4129,9 @@ static int hns_roce_v1_create_eq(struct hns_roce_dev *hr_dev,
void __iomem *eqc = hr_dev->eq_table.eqc_base[eq->eqn];
struct device *dev = &hr_dev->pdev->dev;
dma_addr_t tmp_dma_addr;
- u32 eqconsindx_val = 0;
u32 eqcuridx_val = 0;
- u32 eqshift_val = 0;
+ u32 eqconsindx_val;
+ u32 eqshift_val;
__le32 tmp2 = 0;
__le32 tmp1 = 0;
__le32 tmp = 0;
@@ -4253,7 +4250,7 @@ static int hns_roce_v1_init_eq_table(struct hns_roce_dev *hr_dev)
CEQ_REG_OFFSET * i;
eq->entries = hr_dev->caps.ceqe_depth;
eq->log_entries = ilog2(eq->entries);
- eq->eqe_size = HNS_ROCE_CEQ_ENTRY_SIZE;
+ eq->eqe_size = HNS_ROCE_CEQE_SIZE;
} else {
/* AEQ */
eq_table->eqc_base[i] = hr_dev->reg_base +
@@ -4263,7 +4260,7 @@ static int hns_roce_v1_init_eq_table(struct hns_roce_dev *hr_dev)
ROCEE_CAEP_AEQE_CONS_IDX_REG;
eq->entries = hr_dev->caps.aeqe_depth;
eq->log_entries = ilog2(eq->entries);
- eq->eqe_size = HNS_ROCE_AEQ_ENTRY_SIZE;
+ eq->eqe_size = HNS_ROCE_AEQE_SIZE;
}
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
index 52307b2c7100..ffd0156080f5 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
@@ -68,13 +68,13 @@
#define HNS_ROCE_V1_COMP_EQE_NUM 0x8000
#define HNS_ROCE_V1_ASYNC_EQE_NUM 0x400
-#define HNS_ROCE_V1_QPC_ENTRY_SIZE 256
+#define HNS_ROCE_V1_QPC_SIZE 256
#define HNS_ROCE_V1_IRRL_ENTRY_SIZE 8
#define HNS_ROCE_V1_CQC_ENTRY_SIZE 64
#define HNS_ROCE_V1_MTPT_ENTRY_SIZE 64
#define HNS_ROCE_V1_MTT_ENTRY_SIZE 64
-#define HNS_ROCE_V1_CQE_ENTRY_SIZE 32
+#define HNS_ROCE_V1_CQE_SIZE 32
#define HNS_ROCE_V1_PAGE_SIZE_SUPPORT 0xFFFFF000
#define HNS_ROCE_V1_TABLE_CHUNK_SIZE (1 << 17)
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 4cda95ed1fbe..6d30850696c5 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -153,6 +153,67 @@ static void set_atomic_seg(const struct ib_send_wr *wr,
V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge);
}
+static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
+ const struct ib_send_wr *wr,
+ unsigned int *sge_idx, u32 msg_len)
+{
+ struct ib_device *ibdev = &(to_hr_dev(qp->ibqp.device))->ib_dev;
+ unsigned int dseg_len = sizeof(struct hns_roce_v2_wqe_data_seg);
+ unsigned int ext_sge_sz = qp->sq.max_gs * dseg_len;
+ unsigned int left_len_in_pg;
+ unsigned int idx = *sge_idx;
+ unsigned int i = 0;
+ unsigned int len;
+ void *addr;
+ void *dseg;
+
+ if (msg_len > ext_sge_sz) {
+ ibdev_err(ibdev,
+ "no enough extended sge space for inline data.\n");
+ return -EINVAL;
+ }
+
+ dseg = hns_roce_get_extend_sge(qp, idx & (qp->sge.sge_cnt - 1));
+ left_len_in_pg = hr_hw_page_align((uintptr_t)dseg) - (uintptr_t)dseg;
+ len = wr->sg_list[0].length;
+ addr = (void *)(unsigned long)(wr->sg_list[0].addr);
+
+ /* When copying data to extended sge space, the left length in page may
+ * not long enough for current user's sge. So the data should be
+ * splited into several parts, one in the first page, and the others in
+ * the subsequent pages.
+ */
+ while (1) {
+ if (len <= left_len_in_pg) {
+ memcpy(dseg, addr, len);
+
+ idx += len / dseg_len;
+
+ i++;
+ if (i >= wr->num_sge)
+ break;
+
+ left_len_in_pg -= len;
+ len = wr->sg_list[i].length;
+ addr = (void *)(unsigned long)(wr->sg_list[i].addr);
+ dseg += len;
+ } else {
+ memcpy(dseg, addr, left_len_in_pg);
+
+ len -= left_len_in_pg;
+ addr += left_len_in_pg;
+ idx += left_len_in_pg / dseg_len;
+ dseg = hns_roce_get_extend_sge(qp,
+ idx & (qp->sge.sge_cnt - 1));
+ left_len_in_pg = 1 << HNS_HW_PAGE_SHIFT;
+ }
+ }
+
+ *sge_idx = idx;
+
+ return 0;
+}
+
static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
unsigned int *sge_ind, unsigned int valid_num_sge)
{
@@ -177,73 +238,115 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
*sge_ind = idx;
}
+static bool check_inl_data_len(struct hns_roce_qp *qp, unsigned int len)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device);
+ int mtu = ib_mtu_enum_to_int(qp->path_mtu);
+
+ if (len > qp->max_inline_data || len > mtu) {
+ ibdev_err(&hr_dev->ib_dev,
+ "invalid length of data, data len = %u, max inline len = %u, path mtu = %d.\n",
+ len, qp->max_inline_data, mtu);
+ return false;
+ }
+
+ return true;
+}
+
+static int set_rc_inl(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
+ struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
+ unsigned int *sge_idx)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device);
+ u32 msg_len = le32_to_cpu(rc_sq_wqe->msg_len);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ unsigned int curr_idx = *sge_idx;
+ void *dseg = rc_sq_wqe;
+ unsigned int i;
+ int ret;
+
+ if (unlikely(wr->opcode == IB_WR_RDMA_READ)) {
+ ibdev_err(ibdev, "invalid inline parameters!\n");
+ return -EINVAL;
+ }
+
+ if (!check_inl_data_len(qp, msg_len))
+ return -EINVAL;
+
+ dseg += sizeof(struct hns_roce_v2_rc_send_wqe);
+
+ roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S, 1);
+
+ if (msg_len <= HNS_ROCE_V2_MAX_RC_INL_INN_SZ) {
+ roce_set_bit(rc_sq_wqe->byte_20,
+ V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S, 0);
+
+ for (i = 0; i < wr->num_sge; i++) {
+ memcpy(dseg, ((void *)wr->sg_list[i].addr),
+ wr->sg_list[i].length);
+ dseg += wr->sg_list[i].length;
+ }
+ } else {
+ roce_set_bit(rc_sq_wqe->byte_20,
+ V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S, 1);
+
+ ret = fill_ext_sge_inl_data(qp, wr, &curr_idx, msg_len);
+ if (ret)
+ return ret;
+
+ roce_set_field(rc_sq_wqe->byte_16,
+ V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
+ V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S,
+ curr_idx - *sge_idx);
+ }
+
+ *sge_idx = curr_idx;
+
+ return 0;
+}
+
static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
unsigned int *sge_ind,
unsigned int valid_num_sge)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_v2_wqe_data_seg *dseg =
(void *)rc_sq_wqe + sizeof(struct hns_roce_v2_rc_send_wqe);
- struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_qp *qp = to_hr_qp(ibqp);
- void *wqe = dseg;
int j = 0;
int i;
- if (wr->send_flags & IB_SEND_INLINE && valid_num_sge) {
- if (unlikely(le32_to_cpu(rc_sq_wqe->msg_len) >
- hr_dev->caps.max_sq_inline)) {
- ibdev_err(ibdev, "inline len(1-%d)=%d, illegal",
- rc_sq_wqe->msg_len,
- hr_dev->caps.max_sq_inline);
- return -EINVAL;
- }
+ roce_set_field(rc_sq_wqe->byte_20,
+ V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M,
+ V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
+ (*sge_ind) & (qp->sge.sge_cnt - 1));
- if (unlikely(wr->opcode == IB_WR_RDMA_READ)) {
- ibdev_err(ibdev, "Not support inline data!\n");
- return -EINVAL;
- }
+ if (wr->send_flags & IB_SEND_INLINE)
+ return set_rc_inl(qp, wr, rc_sq_wqe, sge_ind);
+ if (valid_num_sge <= HNS_ROCE_SGE_IN_WQE) {
for (i = 0; i < wr->num_sge; i++) {
- memcpy(wqe, ((void *)wr->sg_list[i].addr),
- wr->sg_list[i].length);
- wqe += wr->sg_list[i].length;
+ if (likely(wr->sg_list[i].length)) {
+ set_data_seg_v2(dseg, wr->sg_list + i);
+ dseg++;
+ }
}
-
- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S,
- 1);
} else {
- if (valid_num_sge <= HNS_ROCE_SGE_IN_WQE) {
- for (i = 0; i < wr->num_sge; i++) {
- if (likely(wr->sg_list[i].length)) {
- set_data_seg_v2(dseg, wr->sg_list + i);
- dseg++;
- }
+ for (i = 0; i < wr->num_sge && j < HNS_ROCE_SGE_IN_WQE; i++) {
+ if (likely(wr->sg_list[i].length)) {
+ set_data_seg_v2(dseg, wr->sg_list + i);
+ dseg++;
+ j++;
}
- } else {
- roce_set_field(rc_sq_wqe->byte_20,
- V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M,
- V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
- (*sge_ind) & (qp->sge.sge_cnt - 1));
-
- for (i = 0; i < wr->num_sge && j < HNS_ROCE_SGE_IN_WQE;
- i++) {
- if (likely(wr->sg_list[i].length)) {
- set_data_seg_v2(dseg, wr->sg_list + i);
- dseg++;
- j++;
- }
- }
-
- set_extend_sge(qp, wr, sge_ind, valid_num_sge);
}
- roce_set_field(rc_sq_wqe->byte_16,
- V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
- V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge);
+ set_extend_sge(qp, wr, sge_ind, valid_num_sge);
}
+ roce_set_field(rc_sq_wqe->byte_16,
+ V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
+ V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge);
+
return 0;
}
@@ -292,6 +395,33 @@ static unsigned int calc_wr_sge_num(const struct ib_send_wr *wr,
return valid_num;
}
+static __le32 get_immtdata(const struct ib_send_wr *wr)
+{
+ switch (wr->opcode) {
+ case IB_WR_SEND_WITH_IMM:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ return cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
+ default:
+ return 0;
+ }
+}
+
+static int set_ud_opcode(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe,
+ const struct ib_send_wr *wr)
+{
+ u32 ib_op = wr->opcode;
+
+ if (ib_op != IB_WR_SEND && ib_op != IB_WR_SEND_WITH_IMM)
+ return -EINVAL;
+
+ ud_sq_wqe->immtdata = get_immtdata(wr);
+
+ roce_set_field(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OPCODE_M,
+ V2_UD_SEND_WQE_BYTE_4_OPCODE_S, to_hr_opcode(ib_op));
+
+ return 0;
+}
+
static inline int set_ud_wqe(struct hns_roce_qp *qp,
const struct ib_send_wr *wr,
void *wqe, unsigned int *sge_idx,
@@ -305,10 +435,15 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp,
u32 msg_len = 0;
bool loopback;
u8 *smac;
+ int ret;
valid_num_sge = calc_wr_sge_num(wr, &msg_len);
memset(ud_sq_wqe, 0, sizeof(*ud_sq_wqe));
+ ret = set_ud_opcode(ud_sq_wqe, wr);
+ if (WARN_ON(ret))
+ return ret;
+
roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_0_M,
V2_UD_SEND_WQE_DMAC_0_S, ah->av.mac[0]);
roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_1_M,
@@ -329,23 +464,8 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp,
roce_set_bit(ud_sq_wqe->byte_40,
V2_UD_SEND_WQE_BYTE_40_LBI_S, loopback);
- roce_set_field(ud_sq_wqe->byte_4,
- V2_UD_SEND_WQE_BYTE_4_OPCODE_M,
- V2_UD_SEND_WQE_BYTE_4_OPCODE_S,
- HNS_ROCE_V2_WQE_OP_SEND);
-
ud_sq_wqe->msg_len = cpu_to_le32(msg_len);
- switch (wr->opcode) {
- case IB_WR_SEND_WITH_IMM:
- case IB_WR_RDMA_WRITE_WITH_IMM:
- ud_sq_wqe->immtdata = cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
- break;
- default:
- ud_sq_wqe->immtdata = 0;
- break;
- }
-
/* Set sig attr */
roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_CQE_S,
(wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0);
@@ -369,7 +489,7 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp,
curr_idx & (qp->sge.sge_cnt - 1));
roce_set_field(ud_sq_wqe->byte_24, V2_UD_SEND_WQE_BYTE_24_UDPSPN_M,
- V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, 0);
+ V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, ah->av.udp_sport);
ud_sq_wqe->qkey = cpu_to_le32(ud_wr(wr)->remote_qkey & 0x80000000 ?
qp->qkey : ud_wr(wr)->remote_qkey);
roce_set_field(ud_sq_wqe->byte_32, V2_UD_SEND_WQE_BYTE_32_DQPN_M,
@@ -402,6 +522,46 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp,
return 0;
}
+static int set_rc_opcode(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
+ const struct ib_send_wr *wr)
+{
+ u32 ib_op = wr->opcode;
+
+ rc_sq_wqe->immtdata = get_immtdata(wr);
+
+ switch (ib_op) {
+ case IB_WR_RDMA_READ:
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ rc_sq_wqe->rkey = cpu_to_le32(rdma_wr(wr)->rkey);
+ rc_sq_wqe->va = cpu_to_le64(rdma_wr(wr)->remote_addr);
+ break;
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_IMM:
+ break;
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ rc_sq_wqe->rkey = cpu_to_le32(atomic_wr(wr)->rkey);
+ rc_sq_wqe->va = cpu_to_le64(atomic_wr(wr)->remote_addr);
+ break;
+ case IB_WR_REG_MR:
+ set_frmr_seg(rc_sq_wqe, reg_wr(wr));
+ break;
+ case IB_WR_LOCAL_INV:
+ roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SO_S, 1);
+ fallthrough;
+ case IB_WR_SEND_WITH_INV:
+ rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
+ V2_RC_SEND_WQE_BYTE_4_OPCODE_S, to_hr_opcode(ib_op));
+
+ return 0;
+}
static inline int set_rc_wqe(struct hns_roce_qp *qp,
const struct ib_send_wr *wr,
void *wqe, unsigned int *sge_idx,
@@ -411,25 +571,16 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp,
unsigned int curr_idx = *sge_idx;
unsigned int valid_num_sge;
u32 msg_len = 0;
- int ret = 0;
+ int ret;
valid_num_sge = calc_wr_sge_num(wr, &msg_len);
memset(rc_sq_wqe, 0, sizeof(*rc_sq_wqe));
rc_sq_wqe->msg_len = cpu_to_le32(msg_len);
- switch (wr->opcode) {
- case IB_WR_SEND_WITH_IMM:
- case IB_WR_RDMA_WRITE_WITH_IMM:
- rc_sq_wqe->immtdata = cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
- break;
- case IB_WR_SEND_WITH_INV:
- rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey);
- break;
- default:
- rc_sq_wqe->immtdata = 0;
- break;
- }
+ ret = set_rc_opcode(rc_sq_wqe, wr);
+ if (WARN_ON(ret))
+ return ret;
roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_FENCE_S,
(wr->send_flags & IB_SEND_FENCE) ? 1 : 0);
@@ -443,33 +594,6 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp,
roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S,
owner_bit);
- switch (wr->opcode) {
- case IB_WR_RDMA_READ:
- case IB_WR_RDMA_WRITE:
- case IB_WR_RDMA_WRITE_WITH_IMM:
- rc_sq_wqe->rkey = cpu_to_le32(rdma_wr(wr)->rkey);
- rc_sq_wqe->va = cpu_to_le64(rdma_wr(wr)->remote_addr);
- break;
- case IB_WR_LOCAL_INV:
- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SO_S, 1);
- rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey);
- break;
- case IB_WR_REG_MR:
- set_frmr_seg(rc_sq_wqe, reg_wr(wr));
- break;
- case IB_WR_ATOMIC_CMP_AND_SWP:
- case IB_WR_ATOMIC_FETCH_AND_ADD:
- rc_sq_wqe->rkey = cpu_to_le32(atomic_wr(wr)->rkey);
- rc_sq_wqe->va = cpu_to_le64(atomic_wr(wr)->remote_addr);
- break;
- default:
- break;
- }
-
- roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
- to_hr_opcode(wr->opcode));
-
if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
set_atomic_seg(wr, rc_sq_wqe, valid_num_sge);
@@ -1682,7 +1806,7 @@ static void set_default_caps(struct hns_roce_dev *hr_dev)
caps->max_sq_desc_sz = HNS_ROCE_V2_MAX_SQ_DESC_SZ;
caps->max_rq_desc_sz = HNS_ROCE_V2_MAX_RQ_DESC_SZ;
caps->max_srq_desc_sz = HNS_ROCE_V2_MAX_SRQ_DESC_SZ;
- caps->qpc_entry_sz = HNS_ROCE_V2_QPC_ENTRY_SZ;
+ caps->qpc_sz = HNS_ROCE_V2_QPC_SZ;
caps->irrl_entry_sz = HNS_ROCE_V2_IRRL_ENTRY_SZ;
caps->trrl_entry_sz = HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ;
caps->cqc_entry_sz = HNS_ROCE_V2_CQC_ENTRY_SZ;
@@ -1690,7 +1814,7 @@ static void set_default_caps(struct hns_roce_dev *hr_dev)
caps->mtpt_entry_sz = HNS_ROCE_V2_MTPT_ENTRY_SZ;
caps->mtt_entry_sz = HNS_ROCE_V2_MTT_ENTRY_SZ;
caps->idx_entry_sz = HNS_ROCE_V2_IDX_ENTRY_SZ;
- caps->cq_entry_sz = HNS_ROCE_V2_CQE_ENTRY_SIZE;
+ caps->cqe_sz = HNS_ROCE_V2_CQE_SIZE;
caps->page_size_cap = HNS_ROCE_V2_PAGE_SIZE_SUPPORTED;
caps->reserved_lkey = 0;
caps->reserved_pds = 0;
@@ -1739,6 +1863,8 @@ static void set_default_caps(struct hns_roce_dev *hr_dev)
caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM;
caps->ceqe_depth = HNS_ROCE_V2_COMP_EQE_NUM;
caps->aeqe_depth = HNS_ROCE_V2_ASYNC_EQE_NUM;
+ caps->aeqe_size = HNS_ROCE_AEQE_SIZE;
+ caps->ceqe_size = HNS_ROCE_CEQE_SIZE;
caps->local_ca_ack_delay = 0;
caps->max_mtu = IB_MTU_4096;
@@ -1760,19 +1886,26 @@ static void set_default_caps(struct hns_roce_dev *hr_dev)
caps->cqc_timer_buf_pg_sz = 0;
caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
- caps->sccc_entry_sz = HNS_ROCE_V2_SCCC_ENTRY_SZ;
+ caps->sccc_sz = HNS_ROCE_V2_SCCC_SZ;
caps->sccc_ba_pg_sz = 0;
caps->sccc_buf_pg_sz = 0;
caps->sccc_hop_num = HNS_ROCE_SCCC_HOP_NUM;
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
+ caps->aeqe_size = HNS_ROCE_V3_EQE_SIZE;
+ caps->ceqe_size = HNS_ROCE_V3_EQE_SIZE;
+ caps->cqe_sz = HNS_ROCE_V3_CQE_SIZE;
+ caps->qpc_sz = HNS_ROCE_V3_QPC_SZ;
+ }
}
static void calc_pg_sz(int obj_num, int obj_size, int hop_num, int ctx_bt_num,
int *buf_page_size, int *bt_page_size, u32 hem_type)
{
u64 obj_per_chunk;
- int bt_chunk_size = 1 << PAGE_SHIFT;
- int buf_chunk_size = 1 << PAGE_SHIFT;
- int obj_per_chunk_default = buf_chunk_size / obj_size;
+ u64 bt_chunk_size = PAGE_SIZE;
+ u64 buf_chunk_size = PAGE_SIZE;
+ u64 obj_per_chunk_default = buf_chunk_size / obj_size;
*buf_page_size = 0;
*bt_page_size = 0;
@@ -1855,7 +1988,7 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
caps->max_sq_desc_sz = resp_a->max_sq_desc_sz;
caps->max_rq_desc_sz = resp_a->max_rq_desc_sz;
caps->max_srq_desc_sz = resp_a->max_srq_desc_sz;
- caps->cq_entry_sz = resp_a->cq_entry_sz;
+ caps->cqe_sz = HNS_ROCE_V2_CQE_SIZE;
caps->mtpt_entry_sz = resp_b->mtpt_entry_sz;
caps->irrl_entry_sz = resp_b->irrl_entry_sz;
@@ -1863,9 +1996,9 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
caps->cqc_entry_sz = resp_b->cqc_entry_sz;
caps->srqc_entry_sz = resp_b->srqc_entry_sz;
caps->idx_entry_sz = resp_b->idx_entry_sz;
- caps->sccc_entry_sz = resp_b->scc_ctx_entry_sz;
+ caps->sccc_sz = resp_b->sccc_sz;
caps->max_mtu = resp_b->max_mtu;
- caps->qpc_entry_sz = le16_to_cpu(resp_b->qpc_entry_sz);
+ caps->qpc_sz = HNS_ROCE_V2_QPC_SZ;
caps->min_cqes = resp_b->min_cqes;
caps->min_wqes = resp_b->min_wqes;
caps->page_size_cap = le32_to_cpu(resp_b->page_size_cap);
@@ -1958,6 +2091,8 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
caps->cqc_timer_entry_sz = HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ;
caps->mtt_entry_sz = HNS_ROCE_V2_MTT_ENTRY_SZ;
caps->num_mtt_segs = HNS_ROCE_V2_MAX_MTT_SEGS;
+ caps->ceqe_size = HNS_ROCE_CEQE_SIZE;
+ caps->aeqe_size = HNS_ROCE_AEQE_SIZE;
caps->mtt_ba_pg_sz = 0;
caps->num_cqe_segs = HNS_ROCE_V2_MAX_CQE_SEGS;
caps->num_srqwqe_segs = HNS_ROCE_V2_MAX_SRQWQE_SEGS;
@@ -1981,7 +2116,15 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_M,
V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_S);
- calc_pg_sz(caps->num_qps, caps->qpc_entry_sz, caps->qpc_hop_num,
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
+ caps->ceqe_size = HNS_ROCE_V3_EQE_SIZE;
+ caps->aeqe_size = HNS_ROCE_V3_EQE_SIZE;
+ caps->cqe_sz = HNS_ROCE_V3_CQE_SIZE;
+ caps->qpc_sz = HNS_ROCE_V3_QPC_SZ;
+ caps->sccc_sz = HNS_ROCE_V3_SCCC_SZ;
+ }
+
+ calc_pg_sz(caps->num_qps, caps->qpc_sz, caps->qpc_hop_num,
caps->qpc_bt_num, &caps->qpc_buf_pg_sz, &caps->qpc_ba_pg_sz,
HEM_TYPE_QPC);
calc_pg_sz(caps->num_mtpts, caps->mtpt_entry_sz, caps->mpt_hop_num,
@@ -1998,7 +2141,7 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
- calc_pg_sz(caps->num_qps, caps->sccc_entry_sz,
+ calc_pg_sz(caps->num_qps, caps->sccc_sz,
caps->sccc_hop_num, caps->sccc_bt_num,
&caps->sccc_buf_pg_sz, &caps->sccc_ba_pg_sz,
HEM_TYPE_SCCC);
@@ -2018,6 +2161,56 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
return 0;
}
+static int hns_roce_config_qpc_size(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cfg_entry_size *cfg_size =
+ (struct hns_roce_cfg_entry_size *)desc.data;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_ENTRY_SIZE,
+ false);
+
+ cfg_size->type = cpu_to_le32(HNS_ROCE_CFG_QPC_SIZE);
+ cfg_size->size = cpu_to_le32(hr_dev->caps.qpc_sz);
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
+static int hns_roce_config_sccc_size(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cfg_entry_size *cfg_size =
+ (struct hns_roce_cfg_entry_size *)desc.data;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_ENTRY_SIZE,
+ false);
+
+ cfg_size->type = cpu_to_le32(HNS_ROCE_CFG_SCCC_SIZE);
+ cfg_size->size = cpu_to_le32(hr_dev->caps.sccc_sz);
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
+static int hns_roce_config_entry_size(struct hns_roce_dev *hr_dev)
+{
+ int ret;
+
+ if (hr_dev->pci_dev->revision < PCI_REVISION_ID_HIP09)
+ return 0;
+
+ ret = hns_roce_config_qpc_size(hr_dev);
+ if (ret) {
+ dev_err(hr_dev->dev, "failed to cfg qpc sz, ret = %d.\n", ret);
+ return ret;
+ }
+
+ ret = hns_roce_config_sccc_size(hr_dev);
+ if (ret)
+ dev_err(hr_dev->dev, "failed to cfg sccc sz, ret = %d.\n", ret);
+
+ return ret;
+}
+
static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
{
struct hns_roce_caps *caps = &hr_dev->caps;
@@ -2090,9 +2283,14 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
}
ret = hns_roce_v2_set_bt(hr_dev);
- if (ret)
- dev_err(hr_dev->dev, "Configure bt attribute fail, ret = %d.\n",
- ret);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "Configure bt attribute fail, ret = %d.\n", ret);
+ return ret;
+ }
+
+ /* Configure the size of QPC, SCCC, etc. */
+ ret = hns_roce_config_entry_size(hr_dev);
return ret;
}
@@ -2757,8 +2955,7 @@ static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw)
static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n)
{
- return hns_roce_buf_offset(hr_cq->mtr.kmem,
- n * HNS_ROCE_V2_CQE_ENTRY_SIZE);
+ return hns_roce_buf_offset(hr_cq->mtr.kmem, n * hr_cq->cqe_size);
}
static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, int n)
@@ -2858,6 +3055,10 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev,
roce_set_field(cq_context->byte_8_cqn, V2_CQC_BYTE_8_CQN_M,
V2_CQC_BYTE_8_CQN_S, hr_cq->cqn);
+ roce_set_field(cq_context->byte_8_cqn, V2_CQC_BYTE_8_CQE_SIZE_M,
+ V2_CQC_BYTE_8_CQE_SIZE_S, hr_cq->cqe_size ==
+ HNS_ROCE_V3_CQE_SIZE ? 1 : 0);
+
cq_context->cqe_cur_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[0]));
roce_set_field(cq_context->byte_16_hop_addr,
@@ -3025,7 +3226,8 @@ out:
}
static void get_cqe_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp,
- struct hns_roce_v2_cqe *cqe, struct ib_wc *wc)
+ struct hns_roce_cq *cq, struct hns_roce_v2_cqe *cqe,
+ struct ib_wc *wc)
{
static const struct {
u32 cqe_status;
@@ -3066,7 +3268,7 @@ static void get_cqe_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp,
ibdev_err(&hr_dev->ib_dev, "error cqe status 0x%x:\n", cqe_status);
print_hex_dump(KERN_ERR, "", DUMP_PREFIX_NONE, 16, 4, cqe,
- sizeof(*cqe), false);
+ cq->cqe_size, false);
/*
* For hns ROCEE, GENERAL_ERR is an error type that is not defined in
@@ -3163,7 +3365,7 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
++wq->tail;
}
- get_cqe_status(hr_dev, *cur_qp, cqe, wc);
+ get_cqe_status(hr_dev, *cur_qp, hr_cq, cqe, wc);
if (unlikely(wc->status != IB_WC_SUCCESS))
return 0;
@@ -3514,16 +3716,21 @@ static int hns_roce_v2_clear_hem(struct hns_roce_dev *hr_dev,
static int hns_roce_v2_qp_modify(struct hns_roce_dev *hr_dev,
struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask,
struct hns_roce_qp *hr_qp)
{
struct hns_roce_cmd_mailbox *mailbox;
+ int qpc_size;
int ret;
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
- memcpy(mailbox->buf, context, sizeof(*context) * 2);
+ /* The qpc size of HIP08 is only 256B, which is half of HIP09 */
+ qpc_size = hr_dev->caps.qpc_sz;
+ memcpy(mailbox->buf, context, qpc_size);
+ memcpy(mailbox->buf + qpc_size, qpc_mask, qpc_size);
ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_qp->qpn, 0,
HNS_ROCE_CMD_MODIFY_QPC,
@@ -3641,9 +3848,6 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
V2_QPC_BYTE_76_SRQ_EN_S, 1);
}
- roce_set_field(context->byte_172_sq_psn, V2_QPC_BYTE_172_ACK_REQ_FREQ_M,
- V2_QPC_BYTE_172_ACK_REQ_FREQ_S, 4);
-
roce_set_bit(context->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 1);
hr_qp->access_flags = attr->qp_access_flags;
@@ -3954,6 +4158,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
dma_addr_t trrl_ba;
dma_addr_t irrl_ba;
enum ib_mtu mtu;
+ u8 lp_pktn_ini;
u8 port_num;
u64 *mtts;
u8 *dmac;
@@ -4052,6 +4257,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
V2_QPC_BYTE_52_DMAC_S, 0);
mtu = get_mtu(ibqp, attr);
+ hr_qp->path_mtu = mtu;
if (attr_mask & IB_QP_PATH_MTU) {
roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M,
@@ -4061,13 +4267,21 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
}
#define MAX_LP_MSG_LEN 65536
- /* MTU*(2^LP_PKTN_INI) shouldn't be bigger than 64kb */
+ /* MTU * (2 ^ LP_PKTN_INI) shouldn't be bigger than 64KB */
+ lp_pktn_ini = ilog2(MAX_LP_MSG_LEN / ib_mtu_enum_to_int(mtu));
+
roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M,
- V2_QPC_BYTE_56_LP_PKTN_INI_S,
- ilog2(MAX_LP_MSG_LEN / ib_mtu_enum_to_int(mtu)));
+ V2_QPC_BYTE_56_LP_PKTN_INI_S, lp_pktn_ini);
roce_set_field(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M,
V2_QPC_BYTE_56_LP_PKTN_INI_S, 0);
+ /* ACK_REQ_FREQ should be larger than or equal to LP_PKTN_INI */
+ roce_set_field(context->byte_172_sq_psn, V2_QPC_BYTE_172_ACK_REQ_FREQ_M,
+ V2_QPC_BYTE_172_ACK_REQ_FREQ_S, lp_pktn_ini);
+ roce_set_field(qpc_mask->byte_172_sq_psn,
+ V2_QPC_BYTE_172_ACK_REQ_FREQ_M,
+ V2_QPC_BYTE_172_ACK_REQ_FREQ_S, 0);
+
roce_set_bit(qpc_mask->byte_108_rx_reqepsn,
V2_QPC_BYTE_108_RX_REQ_PSN_ERR_S, 0);
roce_set_field(qpc_mask->byte_96_rx_reqmsn, V2_QPC_BYTE_96_RX_REQ_MSN_M,
@@ -4164,6 +4378,14 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
return 0;
}
+static inline u16 get_udp_sport(u32 fl, u32 lqpn, u32 rqpn)
+{
+ if (!fl)
+ fl = rdma_calc_flow_label(lqpn, rqpn);
+
+ return rdma_flow_label_to_udp_sport(fl);
+}
+
static int hns_roce_v2_set_path(struct ib_qp *ibqp,
const struct ib_qp_attr *attr,
int attr_mask,
@@ -4227,7 +4449,8 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp,
roce_set_field(context->byte_52_udpspn_dmac, V2_QPC_BYTE_52_UDPSPN_M,
V2_QPC_BYTE_52_UDPSPN_S,
- is_udp ? 0x12b7 : 0);
+ is_udp ? get_udp_sport(grh->flow_label, ibqp->qp_num,
+ attr->dest_qp_num) : 0);
roce_set_field(qpc_mask->byte_52_udpspn_dmac, V2_QPC_BYTE_52_UDPSPN_M,
V2_QPC_BYTE_52_UDPSPN_S, 0);
@@ -4259,11 +4482,19 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp,
V2_QPC_BYTE_28_FL_S, 0);
memcpy(context->dgid, grh->dgid.raw, sizeof(grh->dgid.raw));
memset(qpc_mask->dgid, 0, sizeof(grh->dgid.raw));
+
+ hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
+ if (unlikely(hr_qp->sl > MAX_SERVICE_LEVEL)) {
+ ibdev_err(ibdev,
+ "failed to fill QPC, sl (%d) shouldn't be larger than %d.\n",
+ hr_qp->sl, MAX_SERVICE_LEVEL);
+ return -EINVAL;
+ }
+
roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_SL_M,
- V2_QPC_BYTE_28_SL_S, rdma_ah_get_sl(&attr->ah_attr));
+ V2_QPC_BYTE_28_SL_S, hr_qp->sl);
roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_SL_M,
V2_QPC_BYTE_28_SL_S, 0);
- hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
return 0;
}
@@ -4309,7 +4540,7 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp,
}
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
- memset(qpc_mask, 0, sizeof(*qpc_mask));
+ memset(qpc_mask, 0, hr_dev->caps.qpc_sz);
modify_qp_reset_to_init(ibqp, attr, attr_mask, context,
qpc_mask);
} else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
@@ -4532,8 +4763,9 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
* we should set all bits of the relevant fields in context mask to
* 0 at the same time, else set them to 0x1.
*/
- memset(context, 0, sizeof(*context));
- memset(qpc_mask, 0xff, sizeof(*qpc_mask));
+ memset(context, 0, hr_dev->caps.qpc_sz);
+ memset(qpc_mask, 0xff, hr_dev->caps.qpc_sz);
+
ret = hns_roce_v2_set_abs_fields(ibqp, attr, attr_mask, cur_state,
new_state, context, qpc_mask);
if (ret)
@@ -4583,7 +4815,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
V2_QPC_BYTE_60_QP_ST_S, 0);
/* SW pass context to HW */
- ret = hns_roce_v2_qp_modify(hr_dev, ctx, hr_qp);
+ ret = hns_roce_v2_qp_modify(hr_dev, context, qpc_mask, hr_qp);
if (ret) {
ibdev_err(ibdev, "failed to modify QP, ret = %d\n", ret);
goto out;
@@ -4646,7 +4878,7 @@ static int hns_roce_v2_query_qpc(struct hns_roce_dev *hr_dev,
if (ret)
goto out;
- memcpy(hr_context, mailbox->buf, sizeof(*hr_context));
+ memcpy(hr_context, mailbox->buf, hr_dev->caps.qpc_sz);
out:
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
@@ -4759,7 +4991,9 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
qp_attr->retry_cnt = roce_get_field(context.byte_212_lsn,
V2_QPC_BYTE_212_RETRY_CNT_M,
V2_QPC_BYTE_212_RETRY_CNT_S);
- qp_attr->rnr_retry = le32_to_cpu(context.rq_rnr_timer);
+ qp_attr->rnr_retry = roce_get_field(context.byte_244_rnr_rxack,
+ V2_QPC_BYTE_244_RNR_CNT_M,
+ V2_QPC_BYTE_244_RNR_CNT_S);
done:
qp_attr->cur_qp_state = qp_attr->qp_state;
@@ -4775,6 +5009,7 @@ done:
}
qp_init_attr->cap = qp_attr->cap;
+ qp_init_attr->sq_sig_type = hr_qp->sq_signal_bits;
out:
mutex_unlock(&hr_qp->mutex);
@@ -5004,6 +5239,10 @@ static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq,
struct hns_roce_cmd_mailbox *mailbox;
int ret;
+ /* Resizing SRQs is not supported yet */
+ if (srq_attr_mask & IB_SRQ_MAX_WR)
+ return -EINVAL;
+
if (srq_attr_mask & IB_SRQ_LIMIT) {
if (srq_attr->srq_limit >= srq->wqe_cnt)
return -EINVAL;
@@ -5233,7 +5472,7 @@ static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq)
aeqe = hns_roce_buf_offset(eq->mtr.kmem,
(eq->cons_index & (eq->entries - 1)) *
- HNS_ROCE_AEQ_ENTRY_SIZE);
+ eq->eqe_size);
return (roce_get_bit(aeqe->asyn, HNS_ROCE_V2_AEQ_AEQE_OWNER_S) ^
!!(eq->cons_index & eq->entries)) ? aeqe : NULL;
@@ -5333,7 +5572,8 @@ static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq)
ceqe = hns_roce_buf_offset(eq->mtr.kmem,
(eq->cons_index & (eq->entries - 1)) *
- HNS_ROCE_CEQ_ENTRY_SIZE);
+ eq->eqe_size);
+
return (!!(roce_get_bit(ceqe->comp, HNS_ROCE_V2_CEQ_CEQE_OWNER_S))) ^
(!!(eq->cons_index & eq->entries)) ? ceqe : NULL;
}
@@ -5374,7 +5614,7 @@ static irqreturn_t hns_roce_v2_msix_interrupt_eq(int irq, void *eq_ptr)
{
struct hns_roce_eq *eq = eq_ptr;
struct hns_roce_dev *hr_dev = eq->hr_dev;
- int int_work = 0;
+ int int_work;
if (eq->type_flag == HNS_ROCE_CEQ)
/* Completion event interrupt */
@@ -5609,14 +5849,16 @@ static int config_eqc(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq,
roce_set_field(eqc->byte_36, HNS_ROCE_EQC_CONS_INDX_M,
HNS_ROCE_EQC_CONS_INDX_S, HNS_ROCE_EQ_INIT_CONS_IDX);
- /* set nex_eqe_ba[43:12] */
- roce_set_field(eqc->nxt_eqe_ba0, HNS_ROCE_EQC_NXT_EQE_BA_L_M,
+ roce_set_field(eqc->byte_40, HNS_ROCE_EQC_NXT_EQE_BA_L_M,
HNS_ROCE_EQC_NXT_EQE_BA_L_S, eqe_ba[1] >> 12);
- /* set nex_eqe_ba[63:44] */
- roce_set_field(eqc->nxt_eqe_ba1, HNS_ROCE_EQC_NXT_EQE_BA_H_M,
+ roce_set_field(eqc->byte_44, HNS_ROCE_EQC_NXT_EQE_BA_H_M,
HNS_ROCE_EQC_NXT_EQE_BA_H_S, eqe_ba[1] >> 44);
+ roce_set_field(eqc->byte_44, HNS_ROCE_EQC_EQE_SIZE_M,
+ HNS_ROCE_EQC_EQE_SIZE_S,
+ eq->eqe_size == HNS_ROCE_V3_EQE_SIZE ? 1 : 0);
+
return 0;
}
@@ -5807,7 +6049,7 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
eq_cmd = HNS_ROCE_CMD_CREATE_CEQC;
eq->type_flag = HNS_ROCE_CEQ;
eq->entries = hr_dev->caps.ceqe_depth;
- eq->eqe_size = HNS_ROCE_CEQ_ENTRY_SIZE;
+ eq->eqe_size = hr_dev->caps.ceqe_size;
eq->irq = hr_dev->irq[i + other_num + aeq_num];
eq->eq_max_cnt = HNS_ROCE_CEQ_DEFAULT_BURST_NUM;
eq->eq_period = HNS_ROCE_CEQ_DEFAULT_INTERVAL;
@@ -5816,7 +6058,7 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
eq_cmd = HNS_ROCE_CMD_CREATE_AEQC;
eq->type_flag = HNS_ROCE_AEQ;
eq->entries = hr_dev->caps.aeqe_depth;
- eq->eqe_size = HNS_ROCE_AEQ_ENTRY_SIZE;
+ eq->eqe_size = hr_dev->caps.aeqe_size;
eq->irq = hr_dev->irq[i - comp_num + other_num];
eq->eq_max_cnt = HNS_ROCE_AEQ_DEFAULT_BURST_NUM;
eq->eq_period = HNS_ROCE_AEQ_DEFAULT_INTERVAL;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index ac29be43b6bd..29c9dd4bcbc6 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -60,6 +60,7 @@
#define HNS_ROCE_V2_MAX_SQ_SGE_NUM 64
#define HNS_ROCE_V2_MAX_EXTEND_SGE_NUM 0x200000
#define HNS_ROCE_V2_MAX_SQ_INLINE 0x20
+#define HNS_ROCE_V2_MAX_RC_INL_INN_SZ 32
#define HNS_ROCE_V2_UAR_NUM 256
#define HNS_ROCE_V2_PHY_UAR_NUM 1
#define HNS_ROCE_V2_MAX_IRQ_NUM 65
@@ -77,7 +78,6 @@
#define HNS_ROCE_V2_MAX_SQ_DESC_SZ 64
#define HNS_ROCE_V2_MAX_RQ_DESC_SZ 16
#define HNS_ROCE_V2_MAX_SRQ_DESC_SZ 64
-#define HNS_ROCE_V2_QPC_ENTRY_SZ 256
#define HNS_ROCE_V2_IRRL_ENTRY_SZ 64
#define HNS_ROCE_V2_TRRL_ENTRY_SZ 48
#define HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ 100
@@ -86,8 +86,10 @@
#define HNS_ROCE_V2_MTPT_ENTRY_SZ 64
#define HNS_ROCE_V2_MTT_ENTRY_SZ 64
#define HNS_ROCE_V2_IDX_ENTRY_SZ 4
-#define HNS_ROCE_V2_CQE_ENTRY_SIZE 32
-#define HNS_ROCE_V2_SCCC_ENTRY_SZ 32
+
+#define HNS_ROCE_V2_SCCC_SZ 32
+#define HNS_ROCE_V3_SCCC_SZ 64
+
#define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ PAGE_SIZE
#define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE
#define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000
@@ -229,6 +231,7 @@ enum hns_roce_opcode_type {
HNS_ROCE_OPC_CFG_TMOUT_LLM = 0x8404,
HNS_ROCE_OPC_QUERY_PF_TIMER_RES = 0x8406,
HNS_ROCE_OPC_QUERY_PF_CAPS_NUM = 0x8408,
+ HNS_ROCE_OPC_CFG_ENTRY_SIZE = 0x8409,
HNS_ROCE_OPC_CFG_SGID_TB = 0x8500,
HNS_ROCE_OPC_CFG_SMAC_TB = 0x8501,
HNS_ROCE_OPC_POST_MB = 0x8504,
@@ -309,6 +312,9 @@ struct hns_roce_v2_cq_context {
#define V2_CQC_BYTE_8_CQN_S 0
#define V2_CQC_BYTE_8_CQN_M GENMASK(23, 0)
+#define V2_CQC_BYTE_8_CQE_SIZE_S 27
+#define V2_CQC_BYTE_8_CQE_SIZE_M GENMASK(28, 27)
+
#define V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_S 0
#define V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_M GENMASK(19, 0)
@@ -512,6 +518,7 @@ struct hns_roce_v2_qp_context {
__le32 byte_248_ack_psn;
__le32 byte_252_err_txcqn;
__le32 byte_256_sqflush_rqcqe;
+ __le32 ext[64];
};
#define V2_QPC_BYTE_4_TST_S 0
@@ -896,6 +903,7 @@ struct hns_roce_v2_cqe {
u8 smac[4];
__le32 byte_28;
__le32 byte_32;
+ __le32 rsv[8];
};
#define V2_CQE_BYTE_4_OPCODE_S 0
@@ -1187,6 +1195,8 @@ struct hns_roce_v2_rc_send_wqe {
#define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0
#define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0)
+#define V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S 31
+
struct hns_roce_wqe_frmr_seg {
__le32 pbl_size;
__le32 mode_buf_pg_sz;
@@ -1537,6 +1547,18 @@ struct hns_roce_cfg_sgid_tb {
__le32 vf_sgid_h;
__le32 vf_sgid_type_rsv;
};
+
+enum {
+ HNS_ROCE_CFG_QPC_SIZE = BIT(0),
+ HNS_ROCE_CFG_SCCC_SIZE = BIT(1),
+};
+
+struct hns_roce_cfg_entry_size {
+ __le32 type;
+ __le32 rsv[4];
+ __le32 size;
+};
+
#define CFG_SGID_TB_TABLE_IDX_S 0
#define CFG_SGID_TB_TABLE_IDX_M GENMASK(7, 0)
@@ -1571,7 +1593,7 @@ struct hns_roce_query_pf_caps_a {
u8 max_sq_desc_sz;
u8 max_rq_desc_sz;
u8 max_srq_desc_sz;
- u8 cq_entry_sz;
+ u8 cqe_sz;
};
struct hns_roce_query_pf_caps_b {
@@ -1581,9 +1603,9 @@ struct hns_roce_query_pf_caps_b {
u8 cqc_entry_sz;
u8 srqc_entry_sz;
u8 idx_entry_sz;
- u8 scc_ctx_entry_sz;
+ u8 sccc_sz;
u8 max_mtu;
- __le16 qpc_entry_sz;
+ __le16 qpc_sz;
__le16 qpc_timer_entry_sz;
__le16 cqc_timer_entry_sz;
u8 min_cqes;
@@ -1777,8 +1799,8 @@ struct hns_roce_eq_context {
__le32 byte_28;
__le32 byte_32;
__le32 byte_36;
- __le32 nxt_eqe_ba0;
- __le32 nxt_eqe_ba1;
+ __le32 byte_40;
+ __le32 byte_44;
__le32 rsv[5];
};
@@ -1920,6 +1942,9 @@ struct hns_roce_eq_context {
#define HNS_ROCE_EQC_NXT_EQE_BA_H_S 0
#define HNS_ROCE_EQC_NXT_EQE_BA_H_M GENMASK(19, 0)
+#define HNS_ROCE_EQC_EQE_SIZE_S 20
+#define HNS_ROCE_EQC_EQE_SIZE_M GENMASK(21, 20)
+
#define HNS_ROCE_V2_CEQE_COMP_CQN_S 0
#define HNS_ROCE_V2_CEQE_COMP_CQN_M GENMASK(23, 0)
@@ -1941,6 +1966,8 @@ struct hns_roce_eq_context {
#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S 0
#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M GENMASK(23, 0)
+#define MAX_SERVICE_LEVEL 0x7
+
struct hns_roce_wqe_atomic_seg {
__le64 fetchadd_swap_data;
__le64 cmp_data;
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 5907cfd878a6..afeffafc59f9 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -141,8 +141,8 @@ static int hns_roce_netdev_event(struct notifier_block *self,
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct hns_roce_ib_iboe *iboe = NULL;
struct hns_roce_dev *hr_dev = NULL;
- u8 port = 0;
- int ret = 0;
+ int ret;
+ u8 port;
hr_dev = container_of(self, struct hns_roce_dev, iboe.nb);
iboe = &hr_dev->iboe;
@@ -323,6 +323,8 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
mutex_init(&context->page_mutex);
}
+ resp.cqe_size = hr_dev->caps.cqe_sz;
+
ret = ib_copy_to_udata(udata, &resp, sizeof(resp));
if (ret)
goto error_fail_copy_to_udata;
@@ -454,6 +456,8 @@ static const struct ib_device_ops hns_roce_dev_mr_ops = {
static const struct ib_device_ops hns_roce_dev_mw_ops = {
.alloc_mw = hns_roce_alloc_mw,
.dealloc_mw = hns_roce_dealloc_mw,
+
+ INIT_RDMA_OBJ_SIZE(ib_mw, hns_roce_mw, ibmw),
};
static const struct ib_device_ops hns_roce_dev_frmr_ops = {
@@ -545,7 +549,8 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
if (ret)
return ret;
}
- ret = ib_register_device(ib_dev, "hns_%d");
+ dma_set_max_seg_size(dev, UINT_MAX);
+ ret = ib_register_device(ib_dev, "hns_%d", dev);
if (ret) {
dev_err(dev, "ib_register_device failed!\n");
return ret;
@@ -587,7 +592,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
}
ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qp_table.qp_table,
- HEM_TYPE_QPC, hr_dev->caps.qpc_entry_sz,
+ HEM_TYPE_QPC, hr_dev->caps.qpc_sz,
hr_dev->caps.num_qps, 1);
if (ret) {
dev_err(dev, "Failed to init QP context memory, aborting.\n");
@@ -638,11 +643,11 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
}
}
- if (hr_dev->caps.sccc_entry_sz) {
+ if (hr_dev->caps.sccc_sz) {
ret = hns_roce_init_hem_table(hr_dev,
&hr_dev->qp_table.sccc_table,
HEM_TYPE_SCCC,
- hr_dev->caps.sccc_entry_sz,
+ hr_dev->caps.sccc_sz,
hr_dev->caps.num_qps, 1);
if (ret) {
dev_err(dev,
@@ -682,7 +687,7 @@ err_unmap_qpc_timer:
hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qpc_timer_table);
err_unmap_ctx:
- if (hr_dev->caps.sccc_entry_sz)
+ if (hr_dev->caps.sccc_sz)
hns_roce_cleanup_hem_table(hr_dev,
&hr_dev->qp_table.sccc_table);
err_unmap_srq:
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index e5df3884b41d..7f81a695e9af 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -589,28 +589,22 @@ err_table:
return ret;
}
-struct ib_mw *hns_roce_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
- struct ib_udata *udata)
+int hns_roce_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(ib_pd->device);
- struct hns_roce_mw *mw;
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibmw->device);
+ struct hns_roce_mw *mw = to_hr_mw(ibmw);
unsigned long index = 0;
int ret;
- mw = kmalloc(sizeof(*mw), GFP_KERNEL);
- if (!mw)
- return ERR_PTR(-ENOMEM);
-
/* Allocate a key for mw from bitmap */
ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index);
if (ret)
- goto err_bitmap;
+ return ret;
mw->rkey = hw_index_to_key(index);
- mw->ibmw.rkey = mw->rkey;
- mw->ibmw.type = type;
- mw->pdn = to_hr_pd(ib_pd)->pdn;
+ ibmw->rkey = mw->rkey;
+ mw->pdn = to_hr_pd(ibmw->pd)->pdn;
mw->pbl_hop_num = hr_dev->caps.pbl_hop_num;
mw->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
mw->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
@@ -619,15 +613,11 @@ struct ib_mw *hns_roce_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
if (ret)
goto err_mw;
- return &mw->ibmw;
+ return 0;
err_mw:
hns_roce_mw_free(hr_dev, mw);
-
-err_bitmap:
- kfree(mw);
-
- return ERR_PTR(ret);
+ return ret;
}
int hns_roce_dealloc_mw(struct ib_mw *ibmw)
@@ -636,8 +626,6 @@ int hns_roce_dealloc_mw(struct ib_mw *ibmw)
struct hns_roce_mw *mw = to_hr_mw(ibmw);
hns_roce_mw_free(hr_dev, mw);
- kfree(mw);
-
return 0;
}
@@ -707,19 +695,6 @@ static inline size_t mtr_bufs_size(struct hns_roce_buf_attr *attr)
return size;
}
-static inline int mtr_umem_page_count(struct ib_umem *umem,
- unsigned int page_shift)
-{
- int count = ib_umem_page_count(umem);
-
- if (page_shift >= PAGE_SHIFT)
- count >>= page_shift - PAGE_SHIFT;
- else
- count <<= PAGE_SHIFT - page_shift;
-
- return count;
-}
-
static inline size_t mtr_kmem_direct_size(bool is_direct, size_t alloc_size,
unsigned int page_shift)
{
@@ -767,13 +742,11 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
struct ib_udata *udata, unsigned long user_addr)
{
struct ib_device *ibdev = &hr_dev->ib_dev;
- unsigned int max_pg_shift = buf_attr->page_shift;
- unsigned int best_pg_shift = 0;
+ unsigned int best_pg_shift;
int all_pg_count = 0;
size_t direct_size;
size_t total_size;
- unsigned long tmp;
- int ret = 0;
+ int ret;
total_size = mtr_bufs_size(buf_attr);
if (total_size < 1) {
@@ -782,6 +755,9 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
}
if (udata) {
+ unsigned long pgsz_bitmap;
+ unsigned long page_size;
+
mtr->kmem = NULL;
mtr->umem = ib_umem_get(ibdev, user_addr, total_size,
buf_attr->user_access);
@@ -790,15 +766,17 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
PTR_ERR(mtr->umem));
return -ENOMEM;
}
- if (buf_attr->fixed_page) {
- best_pg_shift = max_pg_shift;
- } else {
- tmp = GENMASK(max_pg_shift, 0);
- ret = ib_umem_find_best_pgsz(mtr->umem, tmp, user_addr);
- best_pg_shift = (ret <= PAGE_SIZE) ?
- PAGE_SHIFT : ilog2(ret);
- }
- all_pg_count = mtr_umem_page_count(mtr->umem, best_pg_shift);
+ if (buf_attr->fixed_page)
+ pgsz_bitmap = 1 << buf_attr->page_shift;
+ else
+ pgsz_bitmap = GENMASK(buf_attr->page_shift, PAGE_SHIFT);
+
+ page_size = ib_umem_find_best_pgsz(mtr->umem, pgsz_bitmap,
+ user_addr);
+ if (!page_size)
+ return -EINVAL;
+ best_pg_shift = order_base_2(page_size);
+ all_pg_count = ib_umem_num_dma_blocks(mtr->umem, page_size);
ret = 0;
} else {
mtr->umem = NULL;
@@ -808,16 +786,15 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
return -ENOMEM;
}
direct_size = mtr_kmem_direct_size(is_direct, total_size,
- max_pg_shift);
+ buf_attr->page_shift);
ret = hns_roce_buf_alloc(hr_dev, total_size, direct_size,
- mtr->kmem, max_pg_shift);
+ mtr->kmem, buf_attr->page_shift);
if (ret) {
ibdev_err(ibdev, "Failed to alloc kmem, ret %d\n", ret);
goto err_alloc_mem;
- } else {
- best_pg_shift = max_pg_shift;
- all_pg_count = mtr->kmem->npages;
}
+ best_pg_shift = buf_attr->page_shift;
+ all_pg_count = mtr->kmem->npages;
}
/* must bigger than minimum hardware page shift */
@@ -967,7 +944,7 @@ static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev,
unsigned int *buf_page_shift)
{
struct hns_roce_buf_region *r;
- unsigned int page_shift = 0;
+ unsigned int page_shift;
int page_cnt = 0;
size_t buf_size;
int region_cnt;
diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c
index b10c50b8736e..98f69496adb4 100644
--- a/drivers/infiniband/hw/hns/hns_roce_pd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_pd.c
@@ -82,9 +82,10 @@ int hns_roce_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
return 0;
}
-void hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
+int hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
{
hns_roce_pd_free(to_hr_dev(pd->device), to_hr_pd(pd)->pdn);
+ return 0;
}
int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar)
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index c063c450c715..6c081dd985fc 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -41,8 +41,6 @@
#include "hns_roce_hem.h"
#include <rdma/hns-abi.h>
-#define SQP_NUM (2 * HNS_ROCE_MAX_PORTS)
-
static void flush_work_handle(struct work_struct *work)
{
struct hns_roce_work *flush_work = container_of(work,
@@ -288,7 +286,7 @@ static int alloc_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
}
}
- if (hr_dev->caps.sccc_entry_sz) {
+ if (hr_dev->caps.sccc_sz) {
/* Alloc memory for SCC CTX */
ret = hns_roce_table_get(hr_dev, &qp_table->sccc_table,
hr_qp->qpn);
@@ -551,10 +549,9 @@ static int set_kernel_sq_size(struct hns_roce_dev *hr_dev,
int ret;
if (!cap->max_send_wr || cap->max_send_wr > hr_dev->caps.max_wqes ||
- cap->max_send_sge > hr_dev->caps.max_sq_sg ||
- cap->max_inline_data > hr_dev->caps.max_sq_inline) {
+ cap->max_send_sge > hr_dev->caps.max_sq_sg) {
ibdev_err(ibdev,
- "failed to check SQ WR, SGE or inline num, ret = %d.\n",
+ "failed to check SQ WR or SGE num, ret = %d.\n",
-EINVAL);
return -EINVAL;
}
@@ -577,9 +574,6 @@ static int set_kernel_sq_size(struct hns_roce_dev *hr_dev,
cap->max_send_wr = cnt;
cap->max_send_sge = hr_qp->sq.max_gs;
- /* We don't support inline sends for kernel QPs (yet) */
- cap->max_inline_data = 0;
-
return 0;
}
@@ -847,6 +841,11 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
hr_qp->ibqp.qp_type = init_attr->qp_type;
+ if (init_attr->cap.max_inline_data > hr_dev->caps.max_sq_inline)
+ init_attr->cap.max_inline_data = hr_dev->caps.max_sq_inline;
+
+ hr_qp->max_inline_data = init_attr->cap.max_inline_data;
+
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
hr_qp->sq_signal_bits = IB_SIGNAL_ALL_WR;
else
@@ -1014,53 +1013,32 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
int ret;
switch (init_attr->qp_type) {
- case IB_QPT_RC: {
- hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL);
- if (!hr_qp)
- return ERR_PTR(-ENOMEM);
-
- ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata,
- hr_qp);
- if (ret) {
- ibdev_err(ibdev, "Create QP 0x%06lx failed(%d)\n",
- hr_qp->qpn, ret);
- kfree(hr_qp);
- return ERR_PTR(ret);
- }
-
+ case IB_QPT_RC:
+ case IB_QPT_GSI:
break;
+ default:
+ ibdev_err(ibdev, "not support QP type %d\n",
+ init_attr->qp_type);
+ return ERR_PTR(-EOPNOTSUPP);
}
- case IB_QPT_GSI: {
- /* Userspace is not allowed to create special QPs: */
- if (udata) {
- ibdev_err(ibdev, "not support usr space GSI\n");
- return ERR_PTR(-EINVAL);
- }
- hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL);
- if (!hr_qp)
- return ERR_PTR(-ENOMEM);
+ hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL);
+ if (!hr_qp)
+ return ERR_PTR(-ENOMEM);
+ if (init_attr->qp_type == IB_QPT_GSI) {
hr_qp->port = init_attr->port_num - 1;
hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port];
-
- ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata,
- hr_qp);
- if (ret) {
- ibdev_err(ibdev, "Create GSI QP failed!\n");
- kfree(hr_qp);
- return ERR_PTR(ret);
- }
-
- break;
- }
- default:{
- ibdev_err(ibdev, "not support QP type %d\n",
- init_attr->qp_type);
- return ERR_PTR(-EOPNOTSUPP);
- }
}
+ ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, hr_qp);
+ if (ret) {
+ ibdev_err(ibdev, "Create QP type 0x%x failed(%d)\n",
+ init_attr->qp_type, ret);
+ ibdev_err(ibdev, "Create GSI QP failed!\n");
+ kfree(hr_qp);
+ return ERR_PTR(ret);
+ }
return &hr_qp->ibqp;
}
@@ -1161,8 +1139,10 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
mutex_lock(&hr_qp->mutex);
- cur_state = attr_mask & IB_QP_CUR_STATE ?
- attr->cur_qp_state : (enum ib_qp_state)hr_qp->state;
+ if (attr_mask & IB_QP_CUR_STATE && attr->cur_qp_state != hr_qp->state)
+ goto out;
+
+ cur_state = hr_qp->state;
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
if (ibqp->uobject &&
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
index b9e2dbd372b6..8caf74e44efd 100644
--- a/drivers/infiniband/hw/hns/hns_roce_srq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
@@ -285,7 +285,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
struct hns_roce_srq *srq = to_hr_srq(ib_srq);
struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_ib_create_srq ucmd = {};
- int ret = 0;
+ int ret;
u32 cqn;
/* Check the actual SRQ wqe and SRQ sge num */
@@ -363,7 +363,7 @@ err_buf_alloc:
return ret;
}
-void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
+int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
struct hns_roce_srq *srq = to_hr_srq(ibsrq);
@@ -372,6 +372,7 @@ void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
free_srq_idx(hr_dev, srq);
free_srq_wrid(srq);
free_srq_buf(hr_dev, srq);
+ return 0;
}
int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev)
diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h
index 25747b85a79c..832b80de004f 100644
--- a/drivers/infiniband/hw/i40iw/i40iw.h
+++ b/drivers/infiniband/hw/i40iw/i40iw.h
@@ -409,8 +409,8 @@ static inline struct i40iw_qp *to_iwqp(struct ib_qp *ibqp)
}
/* i40iw.c */
-void i40iw_add_ref(struct ib_qp *);
-void i40iw_rem_ref(struct ib_qp *);
+void i40iw_qp_add_ref(struct ib_qp *ibqp);
+void i40iw_qp_rem_ref(struct ib_qp *ibqp);
struct ib_qp *i40iw_get_qp(struct ib_device *, int);
void i40iw_flush_wqes(struct i40iw_device *iwdev,
@@ -554,9 +554,8 @@ enum i40iw_status_code i40iw_manage_qhash(struct i40iw_device *iwdev,
bool wait);
void i40iw_receive_ilq(struct i40iw_sc_vsi *vsi, struct i40iw_puda_buf *rbuf);
void i40iw_free_sqbuf(struct i40iw_sc_vsi *vsi, void *bufp);
-void i40iw_free_qp_resources(struct i40iw_device *iwdev,
- struct i40iw_qp *iwqp,
- u32 qp_num);
+void i40iw_free_qp_resources(struct i40iw_qp *iwqp);
+
enum i40iw_status_code i40iw_obj_aligned_mem(struct i40iw_device *iwdev,
struct i40iw_dma_mem *memptr,
u32 size, u32 mask);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index a3b95805c154..3053c345a5a3 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -2322,7 +2322,7 @@ static void i40iw_rem_ref_cm_node(struct i40iw_cm_node *cm_node)
iwqp = cm_node->iwqp;
if (iwqp) {
iwqp->cm_node = NULL;
- i40iw_rem_ref(&iwqp->ibqp);
+ i40iw_qp_rem_ref(&iwqp->ibqp);
cm_node->iwqp = NULL;
} else if (cm_node->qhash_set) {
i40iw_get_addr_info(cm_node, &nfo);
@@ -3452,7 +3452,7 @@ void i40iw_cm_disconn(struct i40iw_qp *iwqp)
kfree(work);
return;
}
- i40iw_add_ref(&iwqp->ibqp);
+ i40iw_qp_add_ref(&iwqp->ibqp);
spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
work->iwqp = iwqp;
@@ -3623,7 +3623,7 @@ static void i40iw_disconnect_worker(struct work_struct *work)
kfree(dwork);
i40iw_cm_disconn_true(iwqp);
- i40iw_rem_ref(&iwqp->ibqp);
+ i40iw_qp_rem_ref(&iwqp->ibqp);
}
/**
@@ -3745,7 +3745,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
cm_node->lsmm_size = accept.size + conn_param->private_data_len;
i40iw_cm_init_tsa_conn(iwqp, cm_node);
cm_id->add_ref(cm_id);
- i40iw_add_ref(&iwqp->ibqp);
+ i40iw_qp_add_ref(&iwqp->ibqp);
attr.qp_state = IB_QPS_RTS;
cm_node->qhash_set = false;
@@ -3908,7 +3908,7 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
iwqp->cm_node = cm_node;
cm_node->iwqp = iwqp;
iwqp->cm_id = cm_id;
- i40iw_add_ref(&iwqp->ibqp);
+ i40iw_qp_add_ref(&iwqp->ibqp);
if (cm_node->state != I40IW_CM_STATE_OFFLOADED) {
cm_node->state = I40IW_CM_STATE_SYN_SENT;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c
index e1085634b8d9..56fdc161f6f8 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_hw.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c
@@ -313,7 +313,7 @@ void i40iw_process_aeq(struct i40iw_device *iwdev)
__func__, info->qp_cq_id);
continue;
}
- i40iw_add_ref(&iwqp->ibqp);
+ i40iw_qp_add_ref(&iwqp->ibqp);
spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
qp = &iwqp->sc_qp;
spin_lock_irqsave(&iwqp->lock, flags);
@@ -426,7 +426,7 @@ void i40iw_process_aeq(struct i40iw_device *iwdev)
break;
}
if (info->qp)
- i40iw_rem_ref(&iwqp->ibqp);
+ i40iw_qp_rem_ref(&iwqp->ibqp);
} while (1);
if (aeqcnt)
diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
index 58a433135a03..2408b279e4c2 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -192,9 +192,9 @@ static void i40iw_enable_intr(struct i40iw_sc_dev *dev, u32 msix_id)
* i40iw_dpc - tasklet for aeq and ceq 0
* @data: iwarp device
*/
-static void i40iw_dpc(unsigned long data)
+static void i40iw_dpc(struct tasklet_struct *t)
{
- struct i40iw_device *iwdev = (struct i40iw_device *)data;
+ struct i40iw_device *iwdev = from_tasklet(iwdev, t, dpc_tasklet);
if (iwdev->msix_shared)
i40iw_process_ceq(iwdev, iwdev->ceqlist);
@@ -206,9 +206,9 @@ static void i40iw_dpc(unsigned long data)
* i40iw_ceq_dpc - dpc handler for CEQ
* @data: data points to CEQ
*/
-static void i40iw_ceq_dpc(unsigned long data)
+static void i40iw_ceq_dpc(struct tasklet_struct *t)
{
- struct i40iw_ceq *iwceq = (struct i40iw_ceq *)data;
+ struct i40iw_ceq *iwceq = from_tasklet(iwceq, t, dpc_tasklet);
struct i40iw_device *iwdev = iwceq->iwdev;
i40iw_process_ceq(iwdev, iwceq);
@@ -689,10 +689,10 @@ static enum i40iw_status_code i40iw_configure_ceq_vector(struct i40iw_device *iw
enum i40iw_status_code status;
if (iwdev->msix_shared && !ceq_id) {
- tasklet_init(&iwdev->dpc_tasklet, i40iw_dpc, (unsigned long)iwdev);
+ tasklet_setup(&iwdev->dpc_tasklet, i40iw_dpc);
status = request_irq(msix_vec->irq, i40iw_irq_handler, 0, "AEQCEQ", iwdev);
} else {
- tasklet_init(&iwceq->dpc_tasklet, i40iw_ceq_dpc, (unsigned long)iwceq);
+ tasklet_setup(&iwceq->dpc_tasklet, i40iw_ceq_dpc);
status = request_irq(msix_vec->irq, i40iw_ceq_handler, 0, "CEQ", iwceq);
}
@@ -841,7 +841,7 @@ static enum i40iw_status_code i40iw_configure_aeq_vector(struct i40iw_device *iw
u32 ret = 0;
if (!iwdev->msix_shared) {
- tasklet_init(&iwdev->dpc_tasklet, i40iw_dpc, (unsigned long)iwdev);
+ tasklet_setup(&iwdev->dpc_tasklet, i40iw_dpc);
ret = request_irq(msix_vec->irq, i40iw_irq_handler, 0, "i40iw", iwdev);
}
if (ret) {
@@ -1573,7 +1573,7 @@ static enum i40iw_status_code i40iw_setup_init_state(struct i40iw_handler *hdl,
status = i40iw_save_msix_info(iwdev, ldev);
if (status)
return status;
- iwdev->hw.dev_context = (void *)ldev->pcidev;
+ iwdev->hw.pcidev = ldev->pcidev;
iwdev->hw.hw_addr = ldev->hw_addr;
status = i40iw_allocate_dma_mem(&iwdev->hw,
&iwdev->obj_mem, 8192, 4096);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_pble.c b/drivers/infiniband/hw/i40iw/i40iw_pble.c
index 540aab5e502d..5f97643e22e5 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_pble.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_pble.c
@@ -167,7 +167,7 @@ static enum i40iw_status_code add_sd_direct(struct i40iw_sc_dev *dev,
*/
static void i40iw_free_vmalloc_mem(struct i40iw_hw *hw, struct i40iw_chunk *chunk)
{
- struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context;
+ struct pci_dev *pcidev = hw->pcidev;
int i;
if (!chunk->pg_cnt)
@@ -193,7 +193,7 @@ static enum i40iw_status_code i40iw_get_vmalloc_mem(struct i40iw_hw *hw,
struct i40iw_chunk *chunk,
int pg_cnt)
{
- struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context;
+ struct pci_dev *pcidev = hw->pcidev;
struct page *page;
u8 *addr;
u32 size;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h
index 54c323c40d96..c3babf3cbb8e 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_type.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_type.h
@@ -73,6 +73,7 @@ struct i40iw_pd_ops;
struct i40iw_priv_qp_ops;
struct i40iw_priv_cq_ops;
struct i40iw_hmc_ops;
+struct pci_dev;
enum i40iw_page_size {
I40IW_PAGE_SIZE_4K,
@@ -261,7 +262,7 @@ struct i40iw_vsi_pestat {
struct i40iw_hw {
u8 __iomem *hw_addr;
- void *dev_context;
+ struct pci_dev *pcidev;
struct i40iw_hmc_info hmc;
};
diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c
index e07fb37af086..644f8c641aa0 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_utils.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c
@@ -478,25 +478,6 @@ void i40iw_cleanup_pending_cqp_op(struct i40iw_device *iwdev)
}
/**
- * i40iw_free_qp - callback after destroy cqp completes
- * @cqp_request: cqp request for destroy qp
- * @num: not used
- */
-static void i40iw_free_qp(struct i40iw_cqp_request *cqp_request, u32 num)
-{
- struct i40iw_sc_qp *qp = (struct i40iw_sc_qp *)cqp_request->param;
- struct i40iw_qp *iwqp = (struct i40iw_qp *)qp->back_qp;
- struct i40iw_device *iwdev;
- u32 qp_num = iwqp->ibqp.qp_num;
-
- iwdev = iwqp->iwdev;
-
- i40iw_rem_pdusecount(iwqp->iwpd, iwdev);
- i40iw_free_qp_resources(iwdev, iwqp, qp_num);
- i40iw_rem_devusecount(iwdev);
-}
-
-/**
* i40iw_wait_event - wait for completion
* @iwdev: iwarp device
* @cqp_request: cqp request to wait
@@ -616,26 +597,23 @@ void i40iw_rem_pdusecount(struct i40iw_pd *iwpd, struct i40iw_device *iwdev)
}
/**
- * i40iw_add_ref - add refcount for qp
+ * i40iw_qp_add_ref - add refcount for qp
* @ibqp: iqarp qp
*/
-void i40iw_add_ref(struct ib_qp *ibqp)
+void i40iw_qp_add_ref(struct ib_qp *ibqp)
{
struct i40iw_qp *iwqp = (struct i40iw_qp *)ibqp;
- atomic_inc(&iwqp->refcount);
+ refcount_inc(&iwqp->refcount);
}
/**
- * i40iw_rem_ref - rem refcount for qp and free if 0
+ * i40iw_qp_rem_ref - rem refcount for qp and free if 0
* @ibqp: iqarp qp
*/
-void i40iw_rem_ref(struct ib_qp *ibqp)
+void i40iw_qp_rem_ref(struct ib_qp *ibqp)
{
struct i40iw_qp *iwqp;
- enum i40iw_status_code status;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
struct i40iw_device *iwdev;
u32 qp_num;
unsigned long flags;
@@ -643,7 +621,7 @@ void i40iw_rem_ref(struct ib_qp *ibqp)
iwqp = to_iwqp(ibqp);
iwdev = iwqp->iwdev;
spin_lock_irqsave(&iwdev->qptable_lock, flags);
- if (!atomic_dec_and_test(&iwqp->refcount)) {
+ if (!refcount_dec_and_test(&iwqp->refcount)) {
spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
return;
}
@@ -651,25 +629,8 @@ void i40iw_rem_ref(struct ib_qp *ibqp)
qp_num = iwqp->ibqp.qp_num;
iwdev->qp_table[qp_num] = NULL;
spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false);
- if (!cqp_request)
- return;
-
- cqp_request->callback_fcn = i40iw_free_qp;
- cqp_request->param = (void *)&iwqp->sc_qp;
- cqp_info = &cqp_request->info;
- cqp_info->cqp_cmd = OP_QP_DESTROY;
- cqp_info->post_sq = 1;
- cqp_info->in.u.qp_destroy.qp = &iwqp->sc_qp;
- cqp_info->in.u.qp_destroy.scratch = (uintptr_t)cqp_request;
- cqp_info->in.u.qp_destroy.remove_hash_idx = true;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (!status)
- return;
+ complete(&iwqp->free_qp);
- i40iw_rem_pdusecount(iwqp->iwpd, iwdev);
- i40iw_free_qp_resources(iwdev, iwqp, qp_num);
- i40iw_rem_devusecount(iwdev);
}
/**
@@ -751,7 +712,7 @@ enum i40iw_status_code i40iw_allocate_dma_mem(struct i40iw_hw *hw,
u64 size,
u32 alignment)
{
- struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context;
+ struct pci_dev *pcidev = hw->pcidev;
if (!mem)
return I40IW_ERR_PARAM;
@@ -770,7 +731,7 @@ enum i40iw_status_code i40iw_allocate_dma_mem(struct i40iw_hw *hw,
*/
void i40iw_free_dma_mem(struct i40iw_hw *hw, struct i40iw_dma_mem *mem)
{
- struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context;
+ struct pci_dev *pcidev = hw->pcidev;
if (!mem || !mem->va)
return;
@@ -936,7 +897,7 @@ static void i40iw_terminate_timeout(struct timer_list *t)
struct i40iw_sc_qp *qp = (struct i40iw_sc_qp *)&iwqp->sc_qp;
i40iw_terminate_done(qp, 1);
- i40iw_rem_ref(&iwqp->ibqp);
+ i40iw_qp_rem_ref(&iwqp->ibqp);
}
/**
@@ -948,7 +909,7 @@ void i40iw_terminate_start_timer(struct i40iw_sc_qp *qp)
struct i40iw_qp *iwqp;
iwqp = (struct i40iw_qp *)qp->back_qp;
- i40iw_add_ref(&iwqp->ibqp);
+ i40iw_qp_add_ref(&iwqp->ibqp);
timer_setup(&iwqp->terminate_timer, i40iw_terminate_timeout, 0);
iwqp->terminate_timer.expires = jiffies + HZ;
add_timer(&iwqp->terminate_timer);
@@ -964,7 +925,7 @@ void i40iw_terminate_del_timer(struct i40iw_sc_qp *qp)
iwqp = (struct i40iw_qp *)qp->back_qp;
if (del_timer(&iwqp->terminate_timer))
- i40iw_rem_ref(&iwqp->ibqp);
+ i40iw_qp_rem_ref(&iwqp->ibqp);
}
/**
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index b51339328a51..581ecbadf586 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -328,12 +328,13 @@ error:
* @ibpd: ptr of pd to be deallocated
* @udata: user data or null for kernel object
*/
-static void i40iw_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+static int i40iw_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
struct i40iw_pd *iwpd = to_iwpd(ibpd);
struct i40iw_device *iwdev = to_iwdev(ibpd->device);
i40iw_rem_pdusecount(iwpd, iwdev);
+ return 0;
}
/**
@@ -363,11 +364,11 @@ static struct i40iw_pbl *i40iw_get_pbl(unsigned long va,
* @iwqp: qp ptr (user or kernel)
* @qp_num: qp number assigned
*/
-void i40iw_free_qp_resources(struct i40iw_device *iwdev,
- struct i40iw_qp *iwqp,
- u32 qp_num)
+void i40iw_free_qp_resources(struct i40iw_qp *iwqp)
{
struct i40iw_pbl *iwpbl = &iwqp->iwpbl;
+ struct i40iw_device *iwdev = iwqp->iwdev;
+ u32 qp_num = iwqp->ibqp.qp_num;
i40iw_ieq_cleanup_qp(iwdev->vsi.ieq, &iwqp->sc_qp);
i40iw_dealloc_push_page(iwdev, &iwqp->sc_qp);
@@ -379,7 +380,7 @@ void i40iw_free_qp_resources(struct i40iw_device *iwdev,
i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwqp->kqp.dma_mem);
kfree(iwqp->kqp.wrid_mem);
iwqp->kqp.wrid_mem = NULL;
- kfree(iwqp->allocated_buffer);
+ kfree(iwqp);
}
/**
@@ -401,6 +402,10 @@ static void i40iw_clean_cqes(struct i40iw_qp *iwqp, struct i40iw_cq *iwcq)
static int i40iw_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
{
struct i40iw_qp *iwqp = to_iwqp(ibqp);
+ struct ib_qp_attr attr;
+ struct i40iw_device *iwdev = iwqp->iwdev;
+
+ memset(&attr, 0, sizeof(attr));
iwqp->destroyed = 1;
@@ -415,7 +420,15 @@ static int i40iw_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
}
}
- i40iw_rem_ref(&iwqp->ibqp);
+ attr.qp_state = IB_QPS_ERR;
+ i40iw_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL);
+ i40iw_qp_rem_ref(&iwqp->ibqp);
+ wait_for_completion(&iwqp->free_qp);
+ i40iw_cqp_qp_destroy_cmd(&iwdev->sc_dev, &iwqp->sc_qp);
+ i40iw_rem_pdusecount(iwqp->iwpd, iwdev);
+ i40iw_free_qp_resources(iwqp);
+ i40iw_rem_devusecount(iwdev);
+
return 0;
}
@@ -524,7 +537,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
struct i40iw_create_qp_req req;
struct i40iw_create_qp_resp uresp;
u32 qp_num = 0;
- void *mem;
enum i40iw_status_code ret;
int err_code;
int sq_size;
@@ -566,16 +578,15 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
init_info.qp_uk_init_info.max_rq_frag_cnt = init_attr->cap.max_recv_sge;
init_info.qp_uk_init_info.max_inline_data = init_attr->cap.max_inline_data;
- mem = kzalloc(sizeof(*iwqp), GFP_KERNEL);
- if (!mem)
+ iwqp = kzalloc(sizeof(*iwqp), GFP_KERNEL);
+ if (!iwqp)
return ERR_PTR(-ENOMEM);
- iwqp = (struct i40iw_qp *)mem;
- iwqp->allocated_buffer = mem;
qp = &iwqp->sc_qp;
qp->back_qp = (void *)iwqp;
qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX;
+ iwqp->iwdev = iwdev;
iwqp->ctx_info.iwarp_info = &iwqp->iwarp_info;
if (i40iw_allocate_dma_mem(dev->hw,
@@ -600,7 +611,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
goto error;
}
- iwqp->iwdev = iwdev;
iwqp->iwpd = iwpd;
iwqp->ibqp.qp_num = qp_num;
qp = &iwqp->sc_qp;
@@ -714,7 +724,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
goto error;
}
- i40iw_add_ref(&iwqp->ibqp);
+ refcount_set(&iwqp->refcount, 1);
spin_lock_init(&iwqp->lock);
iwqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0;
iwdev->qp_table[qp_num] = iwqp;
@@ -736,10 +746,11 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
}
init_completion(&iwqp->sq_drained);
init_completion(&iwqp->rq_drained);
+ init_completion(&iwqp->free_qp);
return &iwqp->ibqp;
error:
- i40iw_free_qp_resources(iwdev, iwqp, qp_num);
+ i40iw_free_qp_resources(iwqp);
return ERR_PTR(err_code);
}
@@ -1052,7 +1063,7 @@ void i40iw_cq_wq_destroy(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq)
* @ib_cq: cq pointer
* @udata: user data or NULL for kernel object
*/
-static void i40iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
+static int i40iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
{
struct i40iw_cq *iwcq;
struct i40iw_device *iwdev;
@@ -1064,6 +1075,7 @@ static void i40iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
i40iw_cq_wq_destroy(iwdev, cq);
cq_free_resources(iwdev, iwcq);
i40iw_rem_devusecount(iwdev);
+ return 0;
}
/**
@@ -1320,8 +1332,7 @@ static void i40iw_copy_user_pgaddrs(struct i40iw_mr *iwmr,
if (iwmr->type == IW_MEMREG_TYPE_QP)
iwpbl->qp_mr.sq_page = sg_page(region->sg_head.sgl);
- rdma_for_each_block(region->sg_head.sgl, &biter, region->nmap,
- iwmr->page_size) {
+ rdma_umem_for_each_dma_block(region, &biter, iwmr->page_size) {
*pbl = rdma_block_iter_dma_address(&biter);
pbl = i40iw_next_pbl_addr(pbl, &pinfo, &idx);
}
@@ -1744,15 +1755,12 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
struct i40iw_mr *iwmr;
struct ib_umem *region;
struct i40iw_mem_reg_req req;
- u64 pbl_depth = 0;
u32 stag = 0;
u16 access;
- u64 region_length;
bool use_pbles = false;
unsigned long flags;
int err = -ENOSYS;
int ret;
- int pg_shift;
if (!udata)
return ERR_PTR(-EOPNOTSUPP);
@@ -1787,18 +1795,13 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
if (req.reg_type == IW_MEMREG_TYPE_MEM)
iwmr->page_size = ib_umem_find_best_pgsz(region, SZ_4K | SZ_2M,
virt);
-
- region_length = region->length + (start & (iwmr->page_size - 1));
- pg_shift = ffs(iwmr->page_size) - 1;
- pbl_depth = region_length >> pg_shift;
- pbl_depth += (region_length & (iwmr->page_size - 1)) ? 1 : 0;
iwmr->length = region->length;
iwpbl->user_base = virt;
palloc = &iwpbl->pble_alloc;
iwmr->type = req.reg_type;
- iwmr->page_cnt = (u32)pbl_depth;
+ iwmr->page_cnt = ib_umem_num_dma_blocks(region, iwmr->page_size);
switch (req.reg_type) {
case IW_MEMREG_TYPE_QP:
@@ -2636,13 +2639,13 @@ static const struct ib_device_ops i40iw_dev_ops = {
.get_hw_stats = i40iw_get_hw_stats,
.get_port_immutable = i40iw_port_immutable,
.iw_accept = i40iw_accept,
- .iw_add_ref = i40iw_add_ref,
+ .iw_add_ref = i40iw_qp_add_ref,
.iw_connect = i40iw_connect,
.iw_create_listen = i40iw_create_listen,
.iw_destroy_listen = i40iw_destroy_listen,
.iw_get_qp = i40iw_get_qp,
.iw_reject = i40iw_reject,
- .iw_rem_ref = i40iw_rem_ref,
+ .iw_rem_ref = i40iw_qp_rem_ref,
.map_mr_sg = i40iw_map_mr_sg,
.mmap = i40iw_mmap,
.modify_qp = i40iw_modify_qp,
@@ -2668,7 +2671,7 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
{
struct i40iw_ib_device *iwibdev;
struct net_device *netdev = iwdev->netdev;
- struct pci_dev *pcidev = (struct pci_dev *)iwdev->hw.dev_context;
+ struct pci_dev *pcidev = iwdev->hw.pcidev;
iwibdev = ib_alloc_device(i40iw_ib_device, ibdev);
if (!iwibdev) {
@@ -2758,7 +2761,8 @@ int i40iw_register_rdma_device(struct i40iw_device *iwdev)
if (ret)
goto error;
- ret = ib_register_device(&iwibdev->ibdev, "i40iw%d");
+ dma_set_max_seg_size(&iwdev->hw.pcidev->dev, UINT_MAX);
+ ret = ib_register_device(&iwibdev->ibdev, "i40iw%d", &iwdev->hw.pcidev->dev);
if (ret)
goto error;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.h b/drivers/infiniband/hw/i40iw/i40iw_verbs.h
index 331bc21cbcc7..bab71f3e5637 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.h
@@ -139,7 +139,7 @@ struct i40iw_qp {
struct i40iw_qp_host_ctx_info ctx_info;
struct i40iwarp_offload_info iwarp_info;
void *allocated_buffer;
- atomic_t refcount;
+ refcount_t refcount;
struct iw_cm_id *cm_id;
void *cm_node;
struct ib_mr *lsmm_mr;
@@ -174,5 +174,6 @@ struct i40iw_qp {
struct i40iw_dma_mem ietf_mem;
struct completion sq_drained;
struct completion rq_drained;
+ struct completion free_qp;
};
#endif
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index 5f8f8d5c0ce0..7321d6ab5fe1 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -232,8 +232,3 @@ int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
return 0;
}
-
-void mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags)
-{
- return;
-}
diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c
index b591861934b3..4aff1c8298b1 100644
--- a/drivers/infiniband/hw/mlx4/cm.c
+++ b/drivers/infiniband/hw/mlx4/cm.c
@@ -54,11 +54,20 @@ struct id_map_entry {
struct delayed_work timeout;
};
+struct rej_tmout_entry {
+ int slave;
+ u32 rem_pv_cm_id;
+ struct delayed_work timeout;
+ struct xarray *xa_rej_tmout;
+};
+
struct cm_generic_msg {
struct ib_mad_hdr hdr;
__be32 local_comm_id;
__be32 remote_comm_id;
+ unsigned char unused[2];
+ __be16 rej_reason;
};
struct cm_sidr_generic_msg {
@@ -280,11 +289,15 @@ static void schedule_delayed(struct ib_device *ibdev, struct id_map_entry *id)
if (!sriov->is_going_down && !id->scheduled_delete) {
id->scheduled_delete = 1;
schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
+ } else if (id->scheduled_delete) {
+ /* Adjust timeout if already scheduled */
+ mod_delayed_work(system_wq, &id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
}
spin_unlock_irqrestore(&sriov->going_down_lock, flags);
spin_unlock(&sriov->id_map_lock);
}
+#define REJ_REASON(m) be16_to_cpu(((struct cm_generic_msg *)(m))->rej_reason)
int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id,
struct ib_mad *mad)
{
@@ -293,8 +306,10 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id
int pv_cm_id = -1;
if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID ||
- mad->mad_hdr.attr_id == CM_REP_ATTR_ID ||
- mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
+ mad->mad_hdr.attr_id == CM_REP_ATTR_ID ||
+ mad->mad_hdr.attr_id == CM_MRA_ATTR_ID ||
+ mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID ||
+ (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID && REJ_REASON(mad) == IB_CM_REJ_TIMEOUT)) {
sl_cm_id = get_local_comm_id(mad);
id = id_map_get(ibdev, &pv_cm_id, slave_id, sl_cm_id);
if (id)
@@ -314,8 +329,8 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id
}
if (!id) {
- pr_debug("id{slave: %d, sl_cm_id: 0x%x} is NULL!\n",
- slave_id, sl_cm_id);
+ pr_debug("id{slave: %d, sl_cm_id: 0x%x} is NULL! attr_id: 0x%x\n",
+ slave_id, sl_cm_id, be16_to_cpu(mad->mad_hdr.attr_id));
return -EINVAL;
}
@@ -327,11 +342,94 @@ cont:
return 0;
}
+static void rej_tmout_timeout(struct work_struct *work)
+{
+ struct delayed_work *delay = to_delayed_work(work);
+ struct rej_tmout_entry *item = container_of(delay, struct rej_tmout_entry, timeout);
+ struct rej_tmout_entry *deleted;
+
+ deleted = xa_cmpxchg(item->xa_rej_tmout, item->rem_pv_cm_id, item, NULL, 0);
+
+ if (deleted != item)
+ pr_debug("deleted(%p) != item(%p)\n", deleted, item);
+
+ kfree(item);
+}
+
+static int alloc_rej_tmout(struct mlx4_ib_sriov *sriov, u32 rem_pv_cm_id, int slave)
+{
+ struct rej_tmout_entry *item;
+ struct rej_tmout_entry *old;
+ int ret = 0;
+
+ xa_lock(&sriov->xa_rej_tmout);
+ item = xa_load(&sriov->xa_rej_tmout, (unsigned long)rem_pv_cm_id);
+
+ if (item) {
+ if (xa_err(item))
+ ret = xa_err(item);
+ else
+ /* If a retry, adjust delayed work */
+ mod_delayed_work(system_wq, &item->timeout, CM_CLEANUP_CACHE_TIMEOUT);
+ goto err_or_exists;
+ }
+ xa_unlock(&sriov->xa_rej_tmout);
+
+ item = kmalloc(sizeof(*item), GFP_KERNEL);
+ if (!item)
+ return -ENOMEM;
+
+ INIT_DELAYED_WORK(&item->timeout, rej_tmout_timeout);
+ item->slave = slave;
+ item->rem_pv_cm_id = rem_pv_cm_id;
+ item->xa_rej_tmout = &sriov->xa_rej_tmout;
+
+ old = xa_cmpxchg(&sriov->xa_rej_tmout, (unsigned long)rem_pv_cm_id, NULL, item, GFP_KERNEL);
+ if (old) {
+ pr_debug(
+ "Non-null old entry (%p) or error (%d) when inserting\n",
+ old, xa_err(old));
+ kfree(item);
+ return xa_err(old);
+ }
+
+ schedule_delayed_work(&item->timeout, CM_CLEANUP_CACHE_TIMEOUT);
+
+ return 0;
+
+err_or_exists:
+ xa_unlock(&sriov->xa_rej_tmout);
+ return ret;
+}
+
+static int lookup_rej_tmout_slave(struct mlx4_ib_sriov *sriov, u32 rem_pv_cm_id)
+{
+ struct rej_tmout_entry *item;
+ int slave;
+
+ xa_lock(&sriov->xa_rej_tmout);
+ item = xa_load(&sriov->xa_rej_tmout, (unsigned long)rem_pv_cm_id);
+
+ if (!item || xa_err(item)) {
+ pr_debug("Could not find slave. rem_pv_cm_id 0x%x error: %d\n",
+ rem_pv_cm_id, xa_err(item));
+ slave = !item ? -ENOENT : xa_err(item);
+ } else {
+ slave = item->slave;
+ }
+ xa_unlock(&sriov->xa_rej_tmout);
+
+ return slave;
+}
+
int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
struct ib_mad *mad)
{
+ struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
+ u32 rem_pv_cm_id = get_local_comm_id(mad);
u32 pv_cm_id;
struct id_map_entry *id;
+ int sts;
if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID ||
mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
@@ -347,6 +445,13 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
be64_to_cpu(gid.global.interface_id));
return -ENOENT;
}
+
+ sts = alloc_rej_tmout(sriov, rem_pv_cm_id, *slave);
+ if (sts)
+ /* Even if this fails, we pass on the REQ to the slave */
+ pr_debug("Could not allocate rej_tmout entry. rem_pv_cm_id 0x%x slave %d status %d\n",
+ rem_pv_cm_id, *slave, sts);
+
return 0;
}
@@ -354,7 +459,14 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
id = id_map_get(ibdev, (int *)&pv_cm_id, -1, -1);
if (!id) {
- pr_debug("Couldn't find an entry for pv_cm_id 0x%x\n", pv_cm_id);
+ if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID &&
+ REJ_REASON(mad) == IB_CM_REJ_TIMEOUT && slave) {
+ *slave = lookup_rej_tmout_slave(sriov, rem_pv_cm_id);
+
+ return (*slave < 0) ? *slave : 0;
+ }
+ pr_debug("Couldn't find an entry for pv_cm_id 0x%x, attr_id 0x%x\n",
+ pv_cm_id, be16_to_cpu(mad->mad_hdr.attr_id));
return -ENOENT;
}
@@ -375,6 +487,34 @@ void mlx4_ib_cm_paravirt_init(struct mlx4_ib_dev *dev)
INIT_LIST_HEAD(&dev->sriov.cm_list);
dev->sriov.sl_id_map = RB_ROOT;
xa_init_flags(&dev->sriov.pv_id_table, XA_FLAGS_ALLOC);
+ xa_init(&dev->sriov.xa_rej_tmout);
+}
+
+static void rej_tmout_xa_cleanup(struct mlx4_ib_sriov *sriov, int slave)
+{
+ struct rej_tmout_entry *item;
+ bool flush_needed = false;
+ unsigned long id;
+ int cnt = 0;
+
+ xa_lock(&sriov->xa_rej_tmout);
+ xa_for_each(&sriov->xa_rej_tmout, id, item) {
+ if (slave < 0 || slave == item->slave) {
+ mod_delayed_work(system_wq, &item->timeout, 0);
+ flush_needed = true;
+ ++cnt;
+ }
+ }
+ xa_unlock(&sriov->xa_rej_tmout);
+
+ if (flush_needed) {
+ flush_scheduled_work();
+ pr_debug("Deleted %d entries in xarray for slave %d during cleanup\n",
+ cnt, slave);
+ }
+
+ if (slave < 0)
+ WARN_ON(!xa_empty(&sriov->xa_rej_tmout));
}
/* slave = -1 ==> all slaves */
@@ -444,4 +584,6 @@ void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave)
list_del(&map->list);
kfree(map);
}
+
+ rej_tmout_xa_cleanup(sriov, slave);
}
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 8a3436994f80..e9b5a4d57fb1 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -149,7 +149,6 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_udata *udata,
if (IS_ERR(*umem))
return PTR_ERR(*umem);
- n = ib_umem_page_count(*umem);
shift = mlx4_ib_umem_calc_optimal_mtt_size(*umem, 0, &n);
err = mlx4_mtt_init(dev->dev, n, shift, &buf->mtt);
@@ -475,7 +474,7 @@ out:
return err;
}
-void mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
+int mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(cq->device);
struct mlx4_ib_cq *mcq = to_mcq(cq);
@@ -495,6 +494,7 @@ void mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
mlx4_db_free(dev->dev, &mcq->db);
}
ib_umem_release(mcq->umem);
+ return 0;
}
static void dump_cqe(void *cqe)
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index abe68708d6d6..8bd16474708f 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -500,6 +500,13 @@ static int get_gids_from_l3_hdr(struct ib_grh *grh, union ib_gid *sgid,
sgid, dgid);
}
+static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
+{
+ int proxy_start = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave;
+
+ return (qpn >= proxy_start && qpn <= proxy_start + 1);
+}
+
int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
enum ib_qp_type dest_qpt, struct ib_wc *wc,
struct ib_grh *grh, struct ib_mad *mad)
@@ -520,8 +527,10 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
u16 cached_pkey;
u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
- if (dest_qpt > IB_QPT_GSI)
+ if (dest_qpt > IB_QPT_GSI) {
+ pr_debug("dest_qpt (%d) > IB_QPT_GSI\n", dest_qpt);
return -EINVAL;
+ }
tun_ctx = dev->sriov.demux[port-1].tun[slave];
@@ -538,12 +547,20 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
if (dest_qpt) {
u16 pkey_ix;
ret = ib_get_cached_pkey(&dev->ib_dev, port, wc->pkey_index, &cached_pkey);
- if (ret)
+ if (ret) {
+ pr_debug("unable to get %s cached pkey for index %d, ret %d\n",
+ is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI",
+ wc->pkey_index, ret);
return -EINVAL;
+ }
ret = find_slave_port_pkey_ix(dev, slave, port, cached_pkey, &pkey_ix);
- if (ret)
+ if (ret) {
+ pr_debug("unable to get %s pkey ix for pkey 0x%x, ret %d\n",
+ is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI",
+ cached_pkey, ret);
return -EINVAL;
+ }
tun_pkey_ix = pkey_ix;
} else
tun_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0];
@@ -715,7 +732,8 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad);
if (err)
- pr_debug("failed sending to slave %d via tunnel qp (%d)\n",
+ pr_debug("failed sending %s to slave %d via tunnel qp (%d)\n",
+ is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI",
slave, err);
return 0;
}
@@ -794,7 +812,8 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad);
if (err)
- pr_debug("failed sending to slave %d via tunnel qp (%d)\n",
+ pr_debug("failed sending %s to slave %d via tunnel qp (%d)\n",
+ is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI",
slave, err);
return 0;
}
@@ -807,27 +826,6 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
int err;
struct ib_port_attr pattr;
- if (in_wc && in_wc->qp) {
- pr_debug("received MAD: port:%d slid:%d sqpn:%d "
- "dlid_bits:%d dqpn:%d wc_flags:0x%x tid:%016llx cls:%x mtd:%x atr:%x\n",
- port_num,
- in_wc->slid, in_wc->src_qp,
- in_wc->dlid_path_bits,
- in_wc->qp->qp_num,
- in_wc->wc_flags,
- be64_to_cpu(in_mad->mad_hdr.tid),
- in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.method,
- be16_to_cpu(in_mad->mad_hdr.attr_id));
- if (in_wc->wc_flags & IB_WC_GRH) {
- pr_debug("sgid_hi:0x%016llx sgid_lo:0x%016llx\n",
- be64_to_cpu(in_grh->sgid.global.subnet_prefix),
- be64_to_cpu(in_grh->sgid.global.interface_id));
- pr_debug("dgid_hi:0x%016llx dgid_lo:0x%016llx\n",
- be64_to_cpu(in_grh->dgid.global.subnet_prefix),
- be64_to_cpu(in_grh->dgid.global.interface_id));
- }
- }
-
slid = in_wc ? ib_lid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE);
if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) {
@@ -1299,6 +1297,18 @@ static void mlx4_ib_tunnel_comp_handler(struct ib_cq *cq, void *arg)
spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
}
+static void mlx4_ib_wire_comp_handler(struct ib_cq *cq, void *arg)
+{
+ unsigned long flags;
+ struct mlx4_ib_demux_pv_ctx *ctx = cq->cq_context;
+ struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
+
+ spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
+ if (!dev->sriov.is_going_down && ctx->state == DEMUX_PV_STATE_ACTIVE)
+ queue_work(ctx->wi_wq, &ctx->work);
+ spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
+}
+
static int mlx4_ib_post_pv_qp_buf(struct mlx4_ib_demux_pv_ctx *ctx,
struct mlx4_ib_demux_pv_qp *tun_qp,
int index)
@@ -1341,14 +1351,6 @@ static int mlx4_ib_multiplex_sa_handler(struct ib_device *ibdev, int port,
return ret;
}
-static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
-{
- int proxy_start = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave;
-
- return (qpn >= proxy_start && qpn <= proxy_start + 1);
-}
-
-
int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
enum ib_qp_type dest_qpt, u16 pkey_index,
u32 remote_qpn, u32 qkey, struct rdma_ah_attr *attr,
@@ -1401,10 +1403,10 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
spin_lock(&sqp->tx_lock);
if (sqp->tx_ix_head - sqp->tx_ix_tail >=
- (MLX4_NUM_TUNNEL_BUFS - 1))
+ (MLX4_NUM_WIRE_BUFS - 1))
ret = -EAGAIN;
else
- wire_tx_ix = (++sqp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1);
+ wire_tx_ix = (++sqp->tx_ix_head) & (MLX4_NUM_WIRE_BUFS - 1);
spin_unlock(&sqp->tx_lock);
if (ret)
goto out;
@@ -1484,6 +1486,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
u16 vlan_id;
u8 qos;
u8 *dmac;
+ int sts;
/* Get slave that sent this packet */
if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn ||
@@ -1580,13 +1583,17 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
&vlan_id, &qos))
rdma_ah_set_sl(&ah_attr, qos);
- mlx4_ib_send_to_wire(dev, slave, ctx->port,
- is_proxy_qp0(dev, wc->src_qp, slave) ?
- IB_QPT_SMI : IB_QPT_GSI,
- be16_to_cpu(tunnel->hdr.pkey_index),
- be32_to_cpu(tunnel->hdr.remote_qpn),
- be32_to_cpu(tunnel->hdr.qkey),
- &ah_attr, wc->smac, vlan_id, &tunnel->mad);
+ sts = mlx4_ib_send_to_wire(dev, slave, ctx->port,
+ is_proxy_qp0(dev, wc->src_qp, slave) ?
+ IB_QPT_SMI : IB_QPT_GSI,
+ be16_to_cpu(tunnel->hdr.pkey_index),
+ be32_to_cpu(tunnel->hdr.remote_qpn),
+ be32_to_cpu(tunnel->hdr.qkey),
+ &ah_attr, wc->smac, vlan_id, &tunnel->mad);
+ if (sts)
+ pr_debug("failed sending %s to wire on behalf of slave %d (%d)\n",
+ is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI",
+ slave, sts);
}
static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
@@ -1595,19 +1602,20 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
int i;
struct mlx4_ib_demux_pv_qp *tun_qp;
int rx_buf_size, tx_buf_size;
+ const int nmbr_bufs = is_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS;
if (qp_type > IB_QPT_GSI)
return -EINVAL;
tun_qp = &ctx->qp[qp_type];
- tun_qp->ring = kcalloc(MLX4_NUM_TUNNEL_BUFS,
+ tun_qp->ring = kcalloc(nmbr_bufs,
sizeof(struct mlx4_ib_buf),
GFP_KERNEL);
if (!tun_qp->ring)
return -ENOMEM;
- tun_qp->tx_ring = kcalloc(MLX4_NUM_TUNNEL_BUFS,
+ tun_qp->tx_ring = kcalloc(nmbr_bufs,
sizeof (struct mlx4_ib_tun_tx_buf),
GFP_KERNEL);
if (!tun_qp->tx_ring) {
@@ -1624,7 +1632,7 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
tx_buf_size = sizeof (struct mlx4_mad_snd_buf);
}
- for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+ for (i = 0; i < nmbr_bufs; i++) {
tun_qp->ring[i].addr = kmalloc(rx_buf_size, GFP_KERNEL);
if (!tun_qp->ring[i].addr)
goto err;
@@ -1638,7 +1646,7 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
}
}
- for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+ for (i = 0; i < nmbr_bufs; i++) {
tun_qp->tx_ring[i].buf.addr =
kmalloc(tx_buf_size, GFP_KERNEL);
if (!tun_qp->tx_ring[i].buf.addr)
@@ -1669,7 +1677,7 @@ tx_err:
tx_buf_size, DMA_TO_DEVICE);
kfree(tun_qp->tx_ring[i].buf.addr);
}
- i = MLX4_NUM_TUNNEL_BUFS;
+ i = nmbr_bufs;
err:
while (i > 0) {
--i;
@@ -1690,6 +1698,7 @@ static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
int i;
struct mlx4_ib_demux_pv_qp *tun_qp;
int rx_buf_size, tx_buf_size;
+ const int nmbr_bufs = is_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS;
if (qp_type > IB_QPT_GSI)
return;
@@ -1704,13 +1713,13 @@ static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
}
- for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+ for (i = 0; i < nmbr_bufs; i++) {
ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map,
rx_buf_size, DMA_FROM_DEVICE);
kfree(tun_qp->ring[i].addr);
}
- for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+ for (i = 0; i < nmbr_bufs; i++) {
ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map,
tx_buf_size, DMA_TO_DEVICE);
kfree(tun_qp->tx_ring[i].buf.addr);
@@ -1744,9 +1753,6 @@ static void mlx4_ib_tunnel_comp_worker(struct work_struct *work)
"buf:%lld\n", wc.wr_id);
break;
case IB_WC_SEND:
- pr_debug("received tunnel send completion:"
- "wrid=0x%llx, status=0x%x\n",
- wc.wr_id, wc.status);
rdma_destroy_ah(tun_qp->tx_ring[wc.wr_id &
(MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0);
tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
@@ -1793,6 +1799,7 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
struct mlx4_ib_qp_tunnel_init_attr qp_init_attr;
struct ib_qp_attr attr;
int qp_attr_mask_INIT;
+ const int nmbr_bufs = create_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS;
if (qp_type > IB_QPT_GSI)
return -EINVAL;
@@ -1803,8 +1810,8 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
qp_init_attr.init_attr.send_cq = ctx->cq;
qp_init_attr.init_attr.recv_cq = ctx->cq;
qp_init_attr.init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
- qp_init_attr.init_attr.cap.max_send_wr = MLX4_NUM_TUNNEL_BUFS;
- qp_init_attr.init_attr.cap.max_recv_wr = MLX4_NUM_TUNNEL_BUFS;
+ qp_init_attr.init_attr.cap.max_send_wr = nmbr_bufs;
+ qp_init_attr.init_attr.cap.max_recv_wr = nmbr_bufs;
qp_init_attr.init_attr.cap.max_send_sge = 1;
qp_init_attr.init_attr.cap.max_recv_sge = 1;
if (create_tun) {
@@ -1866,7 +1873,7 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
goto err_qp;
}
- for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+ for (i = 0; i < nmbr_bufs; i++) {
ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp, i);
if (ret) {
pr_err(" mlx4_ib_post_pv_buf error"
@@ -1902,8 +1909,8 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
switch (wc.opcode) {
case IB_WC_SEND:
kfree(sqp->tx_ring[wc.wr_id &
- (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
- sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
+ (MLX4_NUM_WIRE_BUFS - 1)].ah);
+ sqp->tx_ring[wc.wr_id & (MLX4_NUM_WIRE_BUFS - 1)].ah
= NULL;
spin_lock(&sqp->tx_lock);
sqp->tx_ix_tail++;
@@ -1912,13 +1919,13 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
case IB_WC_RECV:
mad = (struct ib_mad *) &(((struct mlx4_mad_rcv_buf *)
(sqp->ring[wc.wr_id &
- (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->payload);
+ (MLX4_NUM_WIRE_BUFS - 1)].addr))->payload);
grh = &(((struct mlx4_mad_rcv_buf *)
(sqp->ring[wc.wr_id &
- (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->grh);
+ (MLX4_NUM_WIRE_BUFS - 1)].addr))->grh);
mlx4_ib_demux_mad(ctx->ib_dev, ctx->port, &wc, grh, mad);
if (mlx4_ib_post_pv_qp_buf(ctx, sqp, wc.wr_id &
- (MLX4_NUM_TUNNEL_BUFS - 1)))
+ (MLX4_NUM_WIRE_BUFS - 1)))
pr_err("Failed reposting SQP "
"buf:%lld\n", wc.wr_id);
break;
@@ -1931,8 +1938,8 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
ctx->slave, wc.status, wc.wr_id);
if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
kfree(sqp->tx_ring[wc.wr_id &
- (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
- sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
+ (MLX4_NUM_WIRE_BUFS - 1)].ah);
+ sqp->tx_ring[wc.wr_id & (MLX4_NUM_WIRE_BUFS - 1)].ah
= NULL;
spin_lock(&sqp->tx_lock);
sqp->tx_ix_tail++;
@@ -1972,6 +1979,7 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
{
int ret, cq_size;
struct ib_cq_init_attr cq_attr = {};
+ const int nmbr_bufs = create_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS;
if (ctx->state != DEMUX_PV_STATE_DOWN)
return -EEXIST;
@@ -1996,12 +2004,13 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
goto err_out_qp0;
}
- cq_size = 2 * MLX4_NUM_TUNNEL_BUFS;
+ cq_size = 2 * nmbr_bufs;
if (ctx->has_smi)
cq_size *= 2;
cq_attr.cqe = cq_size;
- ctx->cq = ib_create_cq(ctx->ib_dev, mlx4_ib_tunnel_comp_handler,
+ ctx->cq = ib_create_cq(ctx->ib_dev,
+ create_tun ? mlx4_ib_tunnel_comp_handler : mlx4_ib_wire_comp_handler,
NULL, ctx, &cq_attr);
if (IS_ERR(ctx->cq)) {
ret = PTR_ERR(ctx->cq);
@@ -2038,6 +2047,7 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
INIT_WORK(&ctx->work, mlx4_ib_sqp_comp_worker);
ctx->wq = to_mdev(ibdev)->sriov.demux[port - 1].wq;
+ ctx->wi_wq = to_mdev(ibdev)->sriov.demux[port - 1].wi_wq;
ret = ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
if (ret) {
@@ -2181,7 +2191,7 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev,
goto err_mcg;
}
- snprintf(name, sizeof name, "mlx4_ibt%d", port);
+ snprintf(name, sizeof(name), "mlx4_ibt%d", port);
ctx->wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
if (!ctx->wq) {
pr_err("Failed to create tunnelling WQ for port %d\n", port);
@@ -2189,7 +2199,15 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev,
goto err_wq;
}
- snprintf(name, sizeof name, "mlx4_ibud%d", port);
+ snprintf(name, sizeof(name), "mlx4_ibwi%d", port);
+ ctx->wi_wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
+ if (!ctx->wi_wq) {
+ pr_err("Failed to create wire WQ for port %d\n", port);
+ ret = -ENOMEM;
+ goto err_wiwq;
+ }
+
+ snprintf(name, sizeof(name), "mlx4_ibud%d", port);
ctx->ud_wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
if (!ctx->ud_wq) {
pr_err("Failed to create up/down WQ for port %d\n", port);
@@ -2200,6 +2218,10 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev,
return 0;
err_udwq:
+ destroy_workqueue(ctx->wi_wq);
+ ctx->wi_wq = NULL;
+
+err_wiwq:
destroy_workqueue(ctx->wq);
ctx->wq = NULL;
@@ -2247,12 +2269,14 @@ static void mlx4_ib_free_demux_ctx(struct mlx4_ib_demux_ctx *ctx)
ctx->tun[i]->state = DEMUX_PV_STATE_DOWNING;
}
flush_workqueue(ctx->wq);
+ flush_workqueue(ctx->wi_wq);
for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
destroy_pv_resources(dev, i, ctx->port, ctx->tun[i], 0);
free_pv_object(dev, i, ctx->port);
}
kfree(ctx->tun);
destroy_workqueue(ctx->ud_wq);
+ destroy_workqueue(ctx->wi_wq);
destroy_workqueue(ctx->wq);
}
}
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index bd4f975e7f9a..cd0fba6b0964 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1215,9 +1215,10 @@ static int mlx4_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
return 0;
}
-static void mlx4_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
+static int mlx4_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
{
mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn);
+ return 0;
}
static int mlx4_ib_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata)
@@ -1256,11 +1257,12 @@ err2:
return err;
}
-static void mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
+static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
{
ib_destroy_cq(to_mxrcd(xrcd)->cq);
ib_dealloc_pd(to_mxrcd(xrcd)->pd);
mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn);
+ return 0;
}
static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
@@ -1533,23 +1535,11 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att
struct mlx4_net_trans_rule_hw_ctrl *ctrl;
int default_flow;
- static const u16 __mlx4_domain[] = {
- [IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS,
- [IB_FLOW_DOMAIN_ETHTOOL] = MLX4_DOMAIN_ETHTOOL,
- [IB_FLOW_DOMAIN_RFS] = MLX4_DOMAIN_RFS,
- [IB_FLOW_DOMAIN_NIC] = MLX4_DOMAIN_NIC,
- };
-
if (flow_attr->priority > MLX4_IB_FLOW_MAX_PRIO) {
pr_err("Invalid priority value %d\n", flow_attr->priority);
return -EINVAL;
}
- if (domain >= IB_FLOW_DOMAIN_NUM) {
- pr_err("Invalid domain value %d\n", domain);
- return -EINVAL;
- }
-
if (mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type) < 0)
return -EINVAL;
@@ -1558,8 +1548,7 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att
return PTR_ERR(mailbox);
ctrl = mailbox->buf;
- ctrl->prio = cpu_to_be16(__mlx4_domain[domain] |
- flow_attr->priority);
+ ctrl->prio = cpu_to_be16(domain | flow_attr->priority);
ctrl->type = mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type);
ctrl->port = flow_attr->port;
ctrl->qpn = cpu_to_be32(qp->qp_num);
@@ -1701,8 +1690,8 @@ static int mlx4_ib_add_dont_trap_rule(struct mlx4_dev *dev,
}
static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
- struct ib_flow_attr *flow_attr,
- int domain, struct ib_udata *udata)
+ struct ib_flow_attr *flow_attr,
+ struct ib_udata *udata)
{
int err = 0, i = 0, j = 0;
struct mlx4_ib_flow *mflow;
@@ -1768,8 +1757,8 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
}
while (i < ARRAY_SIZE(type) && type[i]) {
- err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i],
- &mflow->reg_id[i].id);
+ err = __mlx4_ib_create_flow(qp, flow_attr, MLX4_DOMAIN_UVERBS,
+ type[i], &mflow->reg_id[i].id);
if (err)
goto err_create_flow;
if (is_bonded) {
@@ -1778,7 +1767,7 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
*/
flow_attr->port = 2;
err = __mlx4_ib_create_flow(qp, flow_attr,
- domain, type[j],
+ MLX4_DOMAIN_UVERBS, type[j],
&mflow->reg_id[j].mirror);
flow_attr->port = 1;
if (err)
@@ -2589,11 +2578,16 @@ static const struct ib_device_ops mlx4_ib_dev_wq_ops = {
.destroy_rwq_ind_table = mlx4_ib_destroy_rwq_ind_table,
.destroy_wq = mlx4_ib_destroy_wq,
.modify_wq = mlx4_ib_modify_wq,
+
+ INIT_RDMA_OBJ_SIZE(ib_rwq_ind_table, mlx4_ib_rwq_ind_table,
+ ib_rwq_ind_tbl),
};
static const struct ib_device_ops mlx4_ib_dev_mw_ops = {
.alloc_mw = mlx4_ib_alloc_mw,
.dealloc_mw = mlx4_ib_dealloc_mw,
+
+ INIT_RDMA_OBJ_SIZE(ib_mw, mlx4_ib_mw, ibmw),
};
static const struct ib_device_ops mlx4_ib_dev_xrc_ops = {
@@ -2847,7 +2841,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
goto err_steer_free_bitmap;
rdma_set_device_sysfs_group(&ibdev->ib_dev, &mlx4_attr_group);
- if (ib_register_device(&ibdev->ib_dev, "mlx4_%d"))
+ if (ib_register_device(&ibdev->ib_dev, "mlx4_%d",
+ &dev->persist->pdev->dev))
goto err_diag_counters;
if (mlx4_ib_mad_init(ibdev))
@@ -2989,10 +2984,8 @@ int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
/* Add an empty rule for IB L2 */
memset(&ib_spec->mask, 0, sizeof(ib_spec->mask));
- err = __mlx4_ib_create_flow(&mqp->ibqp, flow,
- IB_FLOW_DOMAIN_NIC,
- MLX4_FS_REGULAR,
- &mqp->reg_id);
+ err = __mlx4_ib_create_flow(&mqp->ibqp, flow, MLX4_DOMAIN_NIC,
+ MLX4_FS_REGULAR, &mqp->reg_id);
} else {
err = __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id);
}
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 38e87a700a2a..58df06492d69 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -233,7 +233,8 @@ enum mlx4_ib_mad_ifc_flags {
};
enum {
- MLX4_NUM_TUNNEL_BUFS = 256,
+ MLX4_NUM_TUNNEL_BUFS = 512,
+ MLX4_NUM_WIRE_BUFS = 2048,
};
struct mlx4_ib_tunnel_header {
@@ -298,6 +299,26 @@ struct mlx4_ib_rss {
u8 rss_key[MLX4_EN_RSS_KEY_SIZE];
};
+enum {
+ /*
+ * Largest possible UD header: send with GRH and immediate
+ * data plus 18 bytes for an Ethernet header with VLAN/802.1Q
+ * tag. (LRH would only use 8 bytes, so Ethernet is the
+ * biggest case)
+ */
+ MLX4_IB_UD_HEADER_SIZE = 82,
+ MLX4_IB_LSO_HEADER_SPARE = 128,
+};
+
+struct mlx4_ib_sqp {
+ int pkey_index;
+ u32 qkey;
+ u32 send_psn;
+ struct ib_ud_header ud_header;
+ u8 header_buf[MLX4_IB_UD_HEADER_SIZE];
+ struct ib_qp *roce_v2_gsi;
+};
+
struct mlx4_ib_qp {
union {
struct ib_qp ibqp;
@@ -343,7 +364,10 @@ struct mlx4_ib_qp {
struct mlx4_wqn_range *wqn_range;
/* Number of RSS QP parents that uses this WQ */
u32 rss_usecnt;
- struct mlx4_ib_rss *rss_ctx;
+ union {
+ struct mlx4_ib_rss *rss_ctx;
+ struct mlx4_ib_sqp *sqp;
+ };
};
struct mlx4_ib_srq {
@@ -366,6 +390,10 @@ struct mlx4_ib_ah {
union mlx4_ext_av av;
};
+struct mlx4_ib_rwq_ind_table {
+ struct ib_rwq_ind_table ib_rwq_ind_tbl;
+};
+
/****************************************/
/* alias guid support */
/****************************************/
@@ -454,6 +482,7 @@ struct mlx4_ib_demux_pv_ctx {
struct ib_pd *pd;
struct work_struct work;
struct workqueue_struct *wq;
+ struct workqueue_struct *wi_wq;
struct mlx4_ib_demux_pv_qp qp[2];
};
@@ -461,6 +490,7 @@ struct mlx4_ib_demux_ctx {
struct ib_device *ib_dev;
int port;
struct workqueue_struct *wq;
+ struct workqueue_struct *wi_wq;
struct workqueue_struct *ud_wq;
spinlock_t ud_lock;
atomic64_t subnet_prefix;
@@ -492,6 +522,7 @@ struct mlx4_ib_sriov {
spinlock_t id_map_lock;
struct rb_root sl_id_map;
struct list_head cm_list;
+ struct xarray xa_rej_tmout;
};
struct gid_cache_context {
@@ -725,8 +756,7 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata);
int mlx4_ib_dereg_mr(struct ib_mr *mr, struct ib_udata *udata);
-struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
- struct ib_udata *udata);
+int mlx4_ib_alloc_mw(struct ib_mw *mw, struct ib_udata *udata);
int mlx4_ib_dealloc_mw(struct ib_mw *mw);
struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
u32 max_num_sg);
@@ -736,7 +766,7 @@ int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct ib_udata *udata);
-void mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
+int mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
@@ -747,14 +777,17 @@ int mlx4_ib_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
int mlx4_ib_create_ah_slave(struct ib_ah *ah, struct rdma_ah_attr *ah_attr,
int slave_sgid_index, u8 *s_mac, u16 vlan_tag);
int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
-void mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags);
+static inline int mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags)
+{
+ return 0;
+}
int mlx4_ib_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *init_attr,
struct ib_udata *udata);
int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
int mlx4_ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
-void mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
+int mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index);
int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr);
@@ -890,15 +923,18 @@ void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port);
struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr,
struct ib_udata *udata);
-void mlx4_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata);
+int mlx4_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata);
int mlx4_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
u32 wq_attr_mask, struct ib_udata *udata);
-struct ib_rwq_ind_table
-*mlx4_ib_create_rwq_ind_table(struct ib_device *device,
- struct ib_rwq_ind_table_init_attr *init_attr,
- struct ib_udata *udata);
-int mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
+int mlx4_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata);
+static inline int
+mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table)
+{
+ return 0;
+}
int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va,
int *num_of_mtts);
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 1d5ef0de12c9..426fed005d53 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -271,6 +271,8 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va,
u64 total_len = 0;
int i;
+ *num_of_mtts = ib_umem_num_dma_blocks(umem, PAGE_SIZE);
+
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) {
/*
* Initialization - save the first chunk start as the
@@ -421,7 +423,6 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
goto err_free;
}
- n = ib_umem_page_count(mr->umem);
shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start, &n);
err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
@@ -511,7 +512,7 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
mmr->umem = NULL;
goto release_mpt_entry;
}
- n = ib_umem_page_count(mmr->umem);
+ n = ib_umem_num_dma_blocks(mmr->umem, PAGE_SIZE);
shift = PAGE_SHIFT;
err = mlx4_mr_rereg_mem_write(dev->dev, &mmr->mmr,
@@ -610,37 +611,27 @@ int mlx4_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
return 0;
}
-struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
- struct ib_udata *udata)
+int mlx4_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
{
- struct mlx4_ib_dev *dev = to_mdev(pd->device);
- struct mlx4_ib_mw *mw;
+ struct mlx4_ib_dev *dev = to_mdev(ibmw->device);
+ struct mlx4_ib_mw *mw = to_mmw(ibmw);
int err;
- mw = kmalloc(sizeof(*mw), GFP_KERNEL);
- if (!mw)
- return ERR_PTR(-ENOMEM);
-
- err = mlx4_mw_alloc(dev->dev, to_mpd(pd)->pdn,
- to_mlx4_type(type), &mw->mmw);
+ err = mlx4_mw_alloc(dev->dev, to_mpd(ibmw->pd)->pdn,
+ to_mlx4_type(ibmw->type), &mw->mmw);
if (err)
- goto err_free;
+ return err;
err = mlx4_mw_enable(dev->dev, &mw->mmw);
if (err)
goto err_mw;
- mw->ibmw.rkey = mw->mmw.key;
-
- return &mw->ibmw;
+ ibmw->rkey = mw->mmw.key;
+ return 0;
err_mw:
mlx4_mw_free(dev->dev, &mw->mmw);
-
-err_free:
- kfree(mw);
-
- return ERR_PTR(err);
+ return err;
}
int mlx4_ib_dealloc_mw(struct ib_mw *ibmw)
@@ -648,8 +639,6 @@ int mlx4_ib_dealloc_mw(struct ib_mw *ibmw)
struct mlx4_ib_mw *mw = to_mmw(ibmw);
mlx4_mw_free(to_mdev(ibmw->device)->dev, &mw->mmw);
- kfree(mw);
-
return 0;
}
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 2975f350b9fd..5cb8e602294c 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -68,27 +68,6 @@ enum {
};
enum {
- /*
- * Largest possible UD header: send with GRH and immediate
- * data plus 18 bytes for an Ethernet header with VLAN/802.1Q
- * tag. (LRH would only use 8 bytes, so Ethernet is the
- * biggest case)
- */
- MLX4_IB_UD_HEADER_SIZE = 82,
- MLX4_IB_LSO_HEADER_SPARE = 128,
-};
-
-struct mlx4_ib_sqp {
- struct mlx4_ib_qp qp;
- int pkey_index;
- u32 qkey;
- u32 send_psn;
- struct ib_ud_header ud_header;
- u8 header_buf[MLX4_IB_UD_HEADER_SIZE];
- struct ib_qp *roce_v2_gsi;
-};
-
-enum {
MLX4_IB_MIN_SQ_STRIDE = 6,
MLX4_IB_CACHE_LINE_SIZE = 64,
};
@@ -123,11 +102,6 @@ enum mlx4_ib_source_type {
MLX4_IB_RWQ_SRC = 1,
};
-static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
-{
- return container_of(mqp, struct mlx4_ib_sqp, qp);
-}
-
static int is_tunnel_qp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
{
if (!mlx4_is_master(dev->dev))
@@ -656,8 +630,6 @@ static int create_qp_rss(struct mlx4_ib_dev *dev,
if (err)
goto err_qpn;
- mutex_init(&qp->mutex);
-
INIT_LIST_HEAD(&qp->gid_list);
INIT_LIST_HEAD(&qp->steering_rules);
@@ -696,80 +668,72 @@ err_qpn:
return err;
}
-static struct ib_qp *_mlx4_ib_create_qp_rss(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata)
+static int _mlx4_ib_create_qp_rss(struct ib_pd *pd, struct mlx4_ib_qp *qp,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
{
- struct mlx4_ib_qp *qp;
struct mlx4_ib_create_qp_rss ucmd = {};
size_t required_cmd_sz;
int err;
if (!udata) {
pr_debug("RSS QP with NULL udata\n");
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
if (udata->outlen)
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
required_cmd_sz = offsetof(typeof(ucmd), reserved1) +
sizeof(ucmd.reserved1);
if (udata->inlen < required_cmd_sz) {
pr_debug("invalid inlen\n");
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) {
pr_debug("copy failed\n");
- return ERR_PTR(-EFAULT);
+ return -EFAULT;
}
if (memchr_inv(ucmd.reserved, 0, sizeof(ucmd.reserved)))
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
if (ucmd.comp_mask || ucmd.reserved1)
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
if (udata->inlen > sizeof(ucmd) &&
!ib_is_udata_cleared(udata, sizeof(ucmd),
udata->inlen - sizeof(ucmd))) {
pr_debug("inlen is not supported\n");
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
}
if (init_attr->qp_type != IB_QPT_RAW_PACKET) {
pr_debug("RSS QP with unsupported QP type %d\n",
init_attr->qp_type);
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
}
if (init_attr->create_flags) {
pr_debug("RSS QP doesn't support create flags\n");
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
}
if (init_attr->send_cq || init_attr->cap.max_send_wr) {
pr_debug("RSS QP with unsupported send attributes\n");
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
}
- qp = kzalloc(sizeof(*qp), GFP_KERNEL);
- if (!qp)
- return ERR_PTR(-ENOMEM);
-
qp->pri.vid = 0xFFFF;
qp->alt.vid = 0xFFFF;
err = create_qp_rss(to_mdev(pd->device), init_attr, &ucmd, qp);
- if (err) {
- kfree(qp);
- return ERR_PTR(err);
- }
+ if (err)
+ return err;
qp->ibqp.qp_num = qp->mqp.qpn;
-
- return &qp->ibqp;
+ return 0;
}
/*
@@ -873,7 +837,6 @@ static int create_rq(struct ib_pd *pd, struct ib_qp_init_attr *init_attr,
qp->mlx4_ib_qp_type = MLX4_IB_QPT_RAW_PACKET;
- mutex_init(&qp->mutex);
spin_lock_init(&qp->sq.lock);
spin_lock_init(&qp->rq.lock);
INIT_LIST_HEAD(&qp->gid_list);
@@ -922,7 +885,6 @@ static int create_rq(struct ib_pd *pd, struct ib_qp_init_attr *init_attr,
goto err;
}
- n = ib_umem_page_count(qp->umem);
shift = mlx4_ib_umem_calc_optimal_mtt_size(qp->umem, 0, &n);
err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt);
@@ -989,13 +951,11 @@ err:
static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr,
struct ib_udata *udata, int sqpn,
- struct mlx4_ib_qp **caller_qp)
+ struct mlx4_ib_qp *qp)
{
struct mlx4_ib_dev *dev = to_mdev(pd->device);
int qpn;
int err;
- struct mlx4_ib_sqp *sqp = NULL;
- struct mlx4_ib_qp *qp;
struct mlx4_ib_ucontext *context = rdma_udata_to_drv_context(
udata, struct mlx4_ib_ucontext, ibucontext);
enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
@@ -1043,27 +1003,18 @@ static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr,
sqpn = qpn;
}
- if (!*caller_qp) {
- if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI ||
- (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER |
- MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) {
- sqp = kzalloc(sizeof(struct mlx4_ib_sqp), GFP_KERNEL);
- if (!sqp)
- return -ENOMEM;
- qp = &sqp->qp;
- } else {
- qp = kzalloc(sizeof(struct mlx4_ib_qp), GFP_KERNEL);
- if (!qp)
- return -ENOMEM;
- }
- qp->pri.vid = 0xFFFF;
- qp->alt.vid = 0xFFFF;
- } else
- qp = *caller_qp;
+ if (init_attr->qp_type == IB_QPT_SMI ||
+ init_attr->qp_type == IB_QPT_GSI || qp_type == MLX4_IB_QPT_SMI ||
+ qp_type == MLX4_IB_QPT_GSI ||
+ (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER |
+ MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) {
+ qp->sqp = kzalloc(sizeof(struct mlx4_ib_sqp), GFP_KERNEL);
+ if (!qp->sqp)
+ return -ENOMEM;
+ }
qp->mlx4_ib_qp_type = qp_type;
- mutex_init(&qp->mutex);
spin_lock_init(&qp->sq.lock);
spin_lock_init(&qp->rq.lock);
INIT_LIST_HEAD(&qp->gid_list);
@@ -1117,7 +1068,6 @@ static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr,
goto err;
}
- n = ib_umem_page_count(qp->umem);
shift = mlx4_ib_umem_calc_optimal_mtt_size(qp->umem, 0, &n);
err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt);
@@ -1239,9 +1189,6 @@ static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr,
qp->mqp.event = mlx4_ib_qp_event;
- if (!*caller_qp)
- *caller_qp = qp;
-
spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
mlx4_ib_lock_cqs(to_mcq(init_attr->send_cq),
to_mcq(init_attr->recv_cq));
@@ -1293,10 +1240,7 @@ err_db:
mlx4_db_free(dev->dev, &qp->db);
err:
- if (!sqp && !*caller_qp)
- kfree(qp);
- kfree(sqp);
-
+ kfree(qp->sqp);
return err;
}
@@ -1410,7 +1354,6 @@ static void destroy_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
mlx4_qp_free(dev->dev, &qp->mqp);
mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1);
del_gid_entries(qp);
- kfree(qp->rss_ctx);
}
static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
@@ -1529,17 +1472,16 @@ static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr)
return dev->dev->caps.spec_qps[attr->port_num - 1].qp1_proxy;
}
-static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata)
+static int _mlx4_ib_create_qp(struct ib_pd *pd, struct mlx4_ib_qp *qp,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
{
- struct mlx4_ib_qp *qp = NULL;
int err;
int sup_u_create_flags = MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
u16 xrcdn = 0;
if (init_attr->rwq_ind_tbl)
- return _mlx4_ib_create_qp_rss(pd, init_attr, udata);
+ return _mlx4_ib_create_qp_rss(pd, qp, init_attr, udata);
/*
* We only support LSO, vendor flag1, and multicast loopback blocking,
@@ -1551,16 +1493,16 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
MLX4_IB_SRIOV_SQP |
MLX4_IB_QP_NETIF |
MLX4_IB_QP_CREATE_ROCE_V2_GSI))
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) {
if (init_attr->qp_type != IB_QPT_UD)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
if (init_attr->create_flags) {
if (udata && init_attr->create_flags & ~(sup_u_create_flags))
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP |
MLX4_IB_QP_CREATE_ROCE_V2_GSI |
@@ -1570,7 +1512,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
init_attr->qp_type > IB_QPT_GSI) ||
(init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI &&
init_attr->qp_type != IB_QPT_GSI))
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
switch (init_attr->qp_type) {
@@ -1581,53 +1523,43 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
fallthrough;
case IB_QPT_XRC_INI:
if (!(to_mdev(pd->device)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
- return ERR_PTR(-ENOSYS);
+ return -ENOSYS;
init_attr->recv_cq = init_attr->send_cq;
fallthrough;
case IB_QPT_RC:
case IB_QPT_UC:
case IB_QPT_RAW_PACKET:
- qp = kzalloc(sizeof(*qp), GFP_KERNEL);
- if (!qp)
- return ERR_PTR(-ENOMEM);
+ case IB_QPT_UD:
qp->pri.vid = 0xFFFF;
qp->alt.vid = 0xFFFF;
- fallthrough;
- case IB_QPT_UD:
- {
- err = create_qp_common(pd, init_attr, udata, 0, &qp);
- if (err) {
- kfree(qp);
- return ERR_PTR(err);
- }
+ err = create_qp_common(pd, init_attr, udata, 0, qp);
+ if (err)
+ return err;
qp->ibqp.qp_num = qp->mqp.qpn;
qp->xrcdn = xrcdn;
-
break;
- }
case IB_QPT_SMI:
case IB_QPT_GSI:
{
int sqpn;
- /* Userspace is not allowed to create special QPs: */
- if (udata)
- return ERR_PTR(-EINVAL);
if (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI) {
int res = mlx4_qp_reserve_range(to_mdev(pd->device)->dev,
1, 1, &sqpn, 0,
MLX4_RES_USAGE_DRIVER);
if (res)
- return ERR_PTR(res);
+ return res;
} else {
sqpn = get_sqp_num(to_mdev(pd->device), init_attr);
}
- err = create_qp_common(pd, init_attr, udata, sqpn, &qp);
+ qp->pri.vid = 0xFFFF;
+ qp->alt.vid = 0xFFFF;
+ err = create_qp_common(pd, init_attr, udata, sqpn, qp);
if (err)
- return ERR_PTR(err);
+ return err;
qp->port = init_attr->port_num;
qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 :
@@ -1636,25 +1568,33 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
}
default:
/* Don't support raw QPs */
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
}
-
- return &qp->ibqp;
+ return 0;
}
struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata) {
struct ib_device *device = pd ? pd->device : init_attr->xrcd->device;
- struct ib_qp *ibqp;
struct mlx4_ib_dev *dev = to_mdev(device);
+ struct mlx4_ib_qp *qp;
+ int ret;
- ibqp = _mlx4_ib_create_qp(pd, init_attr, udata);
+ qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+ if (!qp)
+ return ERR_PTR(-ENOMEM);
- if (!IS_ERR(ibqp) &&
- (init_attr->qp_type == IB_QPT_GSI) &&
+ mutex_init(&qp->mutex);
+ ret = _mlx4_ib_create_qp(pd, qp, init_attr, udata);
+ if (ret) {
+ kfree(qp);
+ return ERR_PTR(ret);
+ }
+
+ if (init_attr->qp_type == IB_QPT_GSI &&
!(init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI)) {
- struct mlx4_ib_sqp *sqp = to_msqp((to_mqp(ibqp)));
+ struct mlx4_ib_sqp *sqp = qp->sqp;
int is_eth = rdma_cap_eth_ah(&dev->ib_dev, init_attr->port_num);
if (is_eth &&
@@ -1666,14 +1606,14 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
pr_err("Failed to create GSI QP for RoCEv2 (%ld)\n", PTR_ERR(sqp->roce_v2_gsi));
sqp->roce_v2_gsi = NULL;
} else {
- sqp = to_msqp(to_mqp(sqp->roce_v2_gsi));
- sqp->qp.flags |= MLX4_IB_ROCE_V2_GSI_QP;
+ to_mqp(sqp->roce_v2_gsi)->flags |=
+ MLX4_IB_ROCE_V2_GSI_QP;
}
init_attr->create_flags &= ~MLX4_IB_QP_CREATE_ROCE_V2_GSI;
}
}
- return ibqp;
+ return &qp->ibqp;
}
static int _mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
@@ -1700,10 +1640,8 @@ static int _mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
destroy_qp_common(dev, mqp, MLX4_IB_QP_SRC, udata);
}
- if (is_sqp(dev, mqp))
- kfree(to_msqp(mqp));
- else
- kfree(mqp);
+ kfree(mqp->sqp);
+ kfree(mqp);
return 0;
}
@@ -1713,7 +1651,7 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
struct mlx4_ib_qp *mqp = to_mqp(qp);
if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
- struct mlx4_ib_sqp *sqp = to_msqp(mqp);
+ struct mlx4_ib_sqp *sqp = mqp->sqp;
if (sqp->roce_v2_gsi)
ib_destroy_qp(sqp->roce_v2_gsi);
@@ -2575,7 +2513,7 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
qp->alt_port = attr->alt_port_num;
if (is_sqp(dev, qp))
- store_sqp_attrs(to_msqp(qp), attr, attr_mask);
+ store_sqp_attrs(qp->sqp, attr, attr_mask);
/*
* If we moved QP0 to RTR, bring the IB link up; if we moved
@@ -2852,7 +2790,7 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
ret = _mlx4_ib_modify_qp(ibqp, attr, attr_mask, udata);
if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
- struct mlx4_ib_sqp *sqp = to_msqp(mqp);
+ struct mlx4_ib_sqp *sqp = mqp->sqp;
int err = 0;
if (sqp->roce_v2_gsi)
@@ -2877,12 +2815,13 @@ static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey)
return -EINVAL;
}
-static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
+static int build_sriov_qp0_header(struct mlx4_ib_qp *qp,
const struct ib_ud_wr *wr,
void *wqe, unsigned *mlx_seg_len)
{
- struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device);
- struct ib_device *ib_dev = &mdev->ib_dev;
+ struct mlx4_ib_dev *mdev = to_mdev(qp->ibqp.device);
+ struct mlx4_ib_sqp *sqp = qp->sqp;
+ struct ib_device *ib_dev = qp->ibqp.device;
struct mlx4_wqe_mlx_seg *mlx = wqe;
struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
struct mlx4_ib_ah *ah = to_mah(wr->ah);
@@ -2904,12 +2843,12 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
/* for proxy-qp0 sends, need to add in size of tunnel header */
/* for tunnel-qp0 sends, tunnel header is already in s/g list */
- if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER)
+ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER)
send_size += sizeof (struct mlx4_ib_tunnel_header);
ib_ud_header_init(send_size, 1, 0, 0, 0, 0, 0, 0, &sqp->ud_header);
- if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) {
+ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) {
sqp->ud_header.lrh.service_level =
be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
sqp->ud_header.lrh.destination_lid =
@@ -2926,26 +2865,26 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
sqp->ud_header.lrh.virtual_lane = 0;
sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
- err = ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey);
+ err = ib_get_cached_pkey(ib_dev, qp->port, 0, &pkey);
if (err)
return err;
sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
- if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER)
+ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER)
sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn);
else
sqp->ud_header.bth.destination_qpn =
- cpu_to_be32(mdev->dev->caps.spec_qps[sqp->qp.port - 1].qp0_tunnel);
+ cpu_to_be32(mdev->dev->caps.spec_qps[qp->port - 1].qp0_tunnel);
sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
if (mlx4_is_master(mdev->dev)) {
- if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
+ if (mlx4_get_parav_qkey(mdev->dev, qp->mqp.qpn, &qkey))
return -EINVAL;
} else {
- if (vf_get_qp0_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
+ if (vf_get_qp0_qkey(mdev->dev, qp->mqp.qpn, &qkey))
return -EINVAL;
}
sqp->ud_header.deth.qkey = cpu_to_be32(qkey);
- sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn);
+ sqp->ud_header.deth.source_qpn = cpu_to_be32(qp->mqp.qpn);
sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
sqp->ud_header.immediate_present = 0;
@@ -3029,10 +2968,11 @@ static int fill_gid_by_hw_index(struct mlx4_ib_dev *ibdev, u8 port_num,
}
#define MLX4_ROCEV2_QP1_SPORT 0xC000
-static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr,
+static int build_mlx_header(struct mlx4_ib_qp *qp, const struct ib_ud_wr *wr,
void *wqe, unsigned *mlx_seg_len)
{
- struct ib_device *ib_dev = sqp->qp.ibqp.device;
+ struct mlx4_ib_sqp *sqp = qp->sqp;
+ struct ib_device *ib_dev = qp->ibqp.device;
struct mlx4_ib_dev *ibdev = to_mdev(ib_dev);
struct mlx4_wqe_mlx_seg *mlx = wqe;
struct mlx4_wqe_ctrl_seg *ctrl = wqe;
@@ -3056,7 +2996,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr,
for (i = 0; i < wr->wr.num_sge; ++i)
send_size += wr->wr.sg_list[i].length;
- is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
+ is_eth = rdma_port_get_link_layer(qp->ibqp.device, qp->port) == IB_LINK_LAYER_ETHERNET;
is_grh = mlx4_ib_ah_grh_present(ah);
if (is_eth) {
enum ib_gid_type gid_type;
@@ -3070,9 +3010,9 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr,
if (err)
return err;
} else {
- err = fill_gid_by_hw_index(ibdev, sqp->qp.port,
- ah->av.ib.gid_index,
- &sgid, &gid_type);
+ err = fill_gid_by_hw_index(ibdev, qp->port,
+ ah->av.ib.gid_index, &sgid,
+ &gid_type);
if (!err) {
is_udp = gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
if (is_udp) {
@@ -3117,13 +3057,18 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr,
* indexes don't necessarily match the hw ones, so
* we must use our own cache
*/
- sqp->ud_header.grh.source_gid.global.subnet_prefix =
- cpu_to_be64(atomic64_read(&(to_mdev(ib_dev)->sriov.
- demux[sqp->qp.port - 1].
- subnet_prefix)));
- sqp->ud_header.grh.source_gid.global.interface_id =
- to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
- guid_cache[ah->av.ib.gid_index];
+ sqp->ud_header.grh.source_gid.global
+ .subnet_prefix =
+ cpu_to_be64(atomic64_read(
+ &(to_mdev(ib_dev)
+ ->sriov
+ .demux[qp->port - 1]
+ .subnet_prefix)));
+ sqp->ud_header.grh.source_gid.global
+ .interface_id =
+ to_mdev(ib_dev)
+ ->sriov.demux[qp->port - 1]
+ .guid_cache[ah->av.ib.gid_index];
} else {
sqp->ud_header.grh.source_gid =
ah->ibah.sgid_attr->gid;
@@ -3155,10 +3100,13 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr,
mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
if (!is_eth) {
- mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) |
- (sqp->ud_header.lrh.destination_lid ==
- IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |
- (sqp->ud_header.lrh.service_level << 8));
+ mlx->flags |=
+ cpu_to_be32((!qp->ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) |
+ (sqp->ud_header.lrh.destination_lid ==
+ IB_LID_PERMISSIVE ?
+ MLX4_WQE_MLX_SLR :
+ 0) |
+ (sqp->ud_header.lrh.service_level << 8));
if (ah->av.ib.port_pd & cpu_to_be32(0x80000000))
mlx->flags |= cpu_to_be32(0x1); /* force loopback */
mlx->rlid = sqp->ud_header.lrh.destination_lid;
@@ -3204,21 +3152,23 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr,
sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp);
}
} else {
- sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 :
- sl_to_vl(to_mdev(ib_dev),
- sqp->ud_header.lrh.service_level,
- sqp->qp.port);
- if (sqp->qp.ibqp.qp_num && sqp->ud_header.lrh.virtual_lane == 15)
+ sqp->ud_header.lrh.virtual_lane =
+ !qp->ibqp.qp_num ?
+ 15 :
+ sl_to_vl(to_mdev(ib_dev),
+ sqp->ud_header.lrh.service_level,
+ qp->port);
+ if (qp->ibqp.qp_num && sqp->ud_header.lrh.virtual_lane == 15)
return -EINVAL;
if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
}
sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
- if (!sqp->qp.ibqp.qp_num)
- err = ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index,
+ if (!qp->ibqp.qp_num)
+ err = ib_get_cached_pkey(ib_dev, qp->port, sqp->pkey_index,
&pkey);
else
- err = ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->pkey_index,
+ err = ib_get_cached_pkey(ib_dev, qp->port, wr->pkey_index,
&pkey);
if (err)
return err;
@@ -3228,7 +3178,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr,
sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
sqp->ud_header.deth.qkey = cpu_to_be32(wr->remote_qkey & 0x80000000 ?
sqp->qkey : wr->remote_qkey);
- sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
+ sqp->ud_header.deth.source_qpn = cpu_to_be32(qp->ibqp.qp_num);
header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
@@ -3551,14 +3501,14 @@ static int _mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
- struct mlx4_ib_sqp *sqp = to_msqp(qp);
+ struct mlx4_ib_sqp *sqp = qp->sqp;
if (sqp->roce_v2_gsi) {
struct mlx4_ib_ah *ah = to_mah(ud_wr(wr)->ah);
enum ib_gid_type gid_type;
union ib_gid gid;
- if (!fill_gid_by_hw_index(mdev, sqp->qp.port,
+ if (!fill_gid_by_hw_index(mdev, qp->port,
ah->av.ib.gid_index,
&gid, &gid_type))
qp = (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ?
@@ -3678,8 +3628,8 @@ static int _mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
break;
case MLX4_IB_QPT_TUN_SMI_OWNER:
- err = build_sriov_qp0_header(to_msqp(qp), ud_wr(wr),
- ctrl, &seglen);
+ err = build_sriov_qp0_header(qp, ud_wr(wr), ctrl,
+ &seglen);
if (unlikely(err)) {
*bad_wr = wr;
goto out;
@@ -3715,8 +3665,8 @@ static int _mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
break;
case MLX4_IB_QPT_PROXY_SMI_OWNER:
- err = build_sriov_qp0_header(to_msqp(qp), ud_wr(wr),
- ctrl, &seglen);
+ err = build_sriov_qp0_header(qp, ud_wr(wr), ctrl,
+ &seglen);
if (unlikely(err)) {
*bad_wr = wr;
goto out;
@@ -3749,8 +3699,7 @@ static int _mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
case MLX4_IB_QPT_SMI:
case MLX4_IB_QPT_GSI:
- err = build_mlx_header(to_msqp(qp), ud_wr(wr), ctrl,
- &seglen);
+ err = build_mlx_header(qp, ud_wr(wr), ctrl, &seglen);
if (unlikely(err)) {
*bad_wr = wr;
goto out;
@@ -4172,6 +4121,7 @@ struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd,
if (!qp)
return ERR_PTR(-ENOMEM);
+ mutex_init(&qp->mutex);
qp->pri.vid = 0xFFFF;
qp->alt.vid = 0xFFFF;
@@ -4327,7 +4277,7 @@ int mlx4_ib_modify_wq(struct ib_wq *ibwq, struct ib_wq_attr *wq_attr,
return err;
}
-void mlx4_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata)
+int mlx4_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(ibwq->device);
struct mlx4_ib_qp *qp = to_mqp((struct ib_qp *)ibwq);
@@ -4338,36 +4288,35 @@ void mlx4_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata)
destroy_qp_common(dev, qp, MLX4_IB_RWQ_SRC, udata);
kfree(qp);
+ return 0;
}
-struct ib_rwq_ind_table
-*mlx4_ib_create_rwq_ind_table(struct ib_device *device,
- struct ib_rwq_ind_table_init_attr *init_attr,
- struct ib_udata *udata)
+int mlx4_ib_create_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata)
{
- struct ib_rwq_ind_table *rwq_ind_table;
struct mlx4_ib_create_rwq_ind_tbl_resp resp = {};
unsigned int ind_tbl_size = 1 << init_attr->log_ind_tbl_size;
+ struct ib_device *device = rwq_ind_table->device;
unsigned int base_wqn;
size_t min_resp_len;
- int i;
- int err;
+ int i, err = 0;
if (udata->inlen > 0 &&
!ib_is_udata_cleared(udata, 0,
udata->inlen))
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
if (udata->outlen && udata->outlen < min_resp_len)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if (ind_tbl_size >
device->attrs.rss_caps.max_rwq_indirection_table_size) {
pr_debug("log_ind_tbl_size = %d is bigger than supported = %d\n",
ind_tbl_size,
device->attrs.rss_caps.max_rwq_indirection_table_size);
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
base_wqn = init_attr->ind_tbl[0]->wq_num;
@@ -4375,39 +4324,23 @@ struct ib_rwq_ind_table
if (base_wqn % ind_tbl_size) {
pr_debug("WQN=0x%x isn't aligned with indirection table size\n",
base_wqn);
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
for (i = 1; i < ind_tbl_size; i++) {
if (++base_wqn != init_attr->ind_tbl[i]->wq_num) {
pr_debug("indirection table's WQNs aren't consecutive\n");
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
}
- rwq_ind_table = kzalloc(sizeof(*rwq_ind_table), GFP_KERNEL);
- if (!rwq_ind_table)
- return ERR_PTR(-ENOMEM);
-
if (udata->outlen) {
resp.response_length = offsetof(typeof(resp), response_length) +
sizeof(resp.response_length);
err = ib_copy_to_udata(udata, &resp, resp.response_length);
- if (err)
- goto err;
}
- return rwq_ind_table;
-
-err:
- kfree(rwq_ind_table);
- return ERR_PTR(err);
-}
-
-int mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
-{
- kfree(ib_rwq_ind_tbl);
- return 0;
+ return err;
}
struct mlx4_ib_drain_cqe {
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index 8f9d5035142d..bf618529e734 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -115,8 +115,9 @@ int mlx4_ib_create_srq(struct ib_srq *ib_srq,
if (IS_ERR(srq->umem))
return PTR_ERR(srq->umem);
- err = mlx4_mtt_init(dev->dev, ib_umem_page_count(srq->umem),
- PAGE_SHIFT, &srq->mtt);
+ err = mlx4_mtt_init(
+ dev->dev, ib_umem_num_dma_blocks(srq->umem, PAGE_SIZE),
+ PAGE_SHIFT, &srq->mtt);
if (err)
goto err_buf;
@@ -260,7 +261,7 @@ int mlx4_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
return 0;
}
-void mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
+int mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(srq->device);
struct mlx4_ib_srq *msrq = to_msrq(srq);
@@ -282,6 +283,7 @@ void mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
mlx4_db_free(dev->dev, &msrq->db);
}
ib_umem_release(msrq->umem);
+ return 0;
}
void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index)
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
index 59e5ec39b447..505bc47fd575 100644
--- a/drivers/infiniband/hw/mlx5/ah.c
+++ b/drivers/infiniband/hw/mlx5/ah.c
@@ -106,8 +106,8 @@ int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
if (ah_type == RDMA_AH_ATTR_TYPE_ROCE && udata) {
int err;
struct mlx5_ib_create_ah_resp resp = {};
- u32 min_resp_len = offsetof(typeof(resp), dmac) +
- sizeof(resp.dmac);
+ u32 min_resp_len =
+ offsetofend(struct mlx5_ib_create_ah_resp, dmac);
if (udata->outlen < min_resp_len)
return -EINVAL;
@@ -147,8 +147,3 @@ int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
return 0;
}
-
-void mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags)
-{
- return;
-}
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index ebb2f108b64f..234f29912ba9 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -168,14 +168,14 @@ void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid)
mlx5_cmd_exec_in(dev, destroy_tis, in);
}
-void mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid)
+int mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid)
{
u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {};
MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
MLX5_SET(destroy_rqt_in, in, rqtn, rqtn);
MLX5_SET(destroy_rqt_in, in, uid, uid);
- mlx5_cmd_exec_in(dev, destroy_rqt, in);
+ return mlx5_cmd_exec_in(dev, destroy_rqt, in);
}
int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn,
@@ -209,14 +209,14 @@ void mlx5_cmd_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn,
mlx5_cmd_exec_in(dev, dealloc_transport_domain, in);
}
-void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid)
+int mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid)
{
u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {};
MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD);
MLX5_SET(dealloc_pd_in, in, pd, pdn);
MLX5_SET(dealloc_pd_in, in, uid, uid);
- mlx5_cmd_exec_in(dev, dealloc_pd, in);
+ return mlx5_cmd_exec_in(dev, dealloc_pd, in);
}
int mlx5_cmd_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index 1d192a8ca87d..88ea6ef8f2cb 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -44,10 +44,10 @@ int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr,
u64 length, u32 alignment);
void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length);
-void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid);
+int mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid);
void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid);
void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid);
-void mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid);
+int mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid);
int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn,
u16 uid);
void mlx5_cmd_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn,
diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c
index 145f3cb40ccb..70c8fd67ee2f 100644
--- a/drivers/infiniband/hw/mlx5/counters.c
+++ b/drivers/infiniband/hw/mlx5/counters.c
@@ -117,7 +117,7 @@ err_bound:
return ret;
}
-static void mlx5_ib_destroy_counters(struct ib_counters *counters)
+static int mlx5_ib_destroy_counters(struct ib_counters *counters)
{
struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
@@ -125,6 +125,7 @@ static void mlx5_ib_destroy_counters(struct ib_counters *counters)
if (mcounters->hw_cntrs_hndl)
mlx5_fc_destroy(to_mdev(counters->device)->mdev,
mcounters->hw_cntrs_hndl);
+ return 0;
}
static int mlx5_ib_create_counters(struct ib_counters *counters,
@@ -456,12 +457,12 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts);
num_counters += ARRAY_SIZE(ext_ppcnt_cnts);
}
- cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL);
+ cnts->names = kcalloc(num_counters, sizeof(*cnts->names), GFP_KERNEL);
if (!cnts->names)
return -ENOMEM;
cnts->offsets = kcalloc(num_counters,
- sizeof(cnts->offsets), GFP_KERNEL);
+ sizeof(*cnts->offsets), GFP_KERNEL);
if (!cnts->offsets)
goto err_names;
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index dceb0eb2bed1..fb62f1d04afa 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -168,7 +168,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
{
enum rdma_link_layer ll = rdma_port_get_link_layer(qp->ibqp.device, 1);
struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
- struct mlx5_ib_srq *srq;
+ struct mlx5_ib_srq *srq = NULL;
struct mlx5_ib_wq *wq;
u16 wqe_ctr;
u8 roce_packet_type;
@@ -180,7 +180,8 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
if (qp->ibqp.xrcd) {
msrq = mlx5_cmd_get_srq(dev, be32_to_cpu(cqe->srqn));
- srq = to_mibsrq(msrq);
+ if (msrq)
+ srq = to_mibsrq(msrq);
} else {
srq = to_msrq(qp->ibqp.srq);
}
@@ -254,7 +255,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
switch (roce_packet_type) {
case MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH:
- wc->network_hdr_type = RDMA_NETWORK_IB;
+ wc->network_hdr_type = RDMA_NETWORK_ROCE_V1;
break;
case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6:
wc->network_hdr_type = RDMA_NETWORK_IPV6;
@@ -1023,16 +1024,21 @@ err_cqb:
return err;
}
-void mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
+int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(cq->device);
struct mlx5_ib_cq *mcq = to_mcq(cq);
+ int ret;
+
+ ret = mlx5_core_destroy_cq(dev->mdev, &mcq->mcq);
+ if (ret)
+ return ret;
- mlx5_core_destroy_cq(dev->mdev, &mcq->mcq);
if (udata)
destroy_cq_user(mcq, udata);
else
destroy_cq_kernel(dev, mcq);
+ return 0;
}
static int is_equal_rsn(struct mlx5_cqe64 *cqe64, u32 rsn)
diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
index e9cfb9a2ef41..492cfe063bca 100644
--- a/drivers/infiniband/hw/mlx5/fs.c
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@ -136,12 +136,9 @@ static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask)
#define LAST_COUNTERS_FIELD counters
/* Field is the last supported field */
-#define FIELDS_NOT_SUPPORTED(filter, field)\
- memchr_inv((void *)&filter.field +\
- sizeof(filter.field), 0,\
- sizeof(filter) -\
- offsetof(typeof(filter), field) -\
- sizeof(filter.field))
+#define FIELDS_NOT_SUPPORTED(filter, field) \
+ memchr_inv((void *)&filter.field + sizeof(filter.field), 0, \
+ sizeof(filter) - offsetofend(typeof(filter), field))
int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
bool is_egress,
@@ -767,6 +764,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
{
bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
struct mlx5_flow_namespace *ns = NULL;
+ enum mlx5_flow_namespace_type fn_type;
struct mlx5_ib_flow_prio *prio;
struct mlx5_flow_table *ft;
int max_table_size;
@@ -780,11 +778,9 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
log_max_ft_size));
esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
DEVLINK_ESWITCH_ENCAP_MODE_NONE;
- if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
- enum mlx5_flow_namespace_type fn_type;
-
- if (flow_is_multicast_only(flow_attr) &&
- !dont_trap)
+ switch (flow_attr->type) {
+ case IB_FLOW_ATTR_NORMAL:
+ if (flow_is_multicast_only(flow_attr) && !dont_trap)
priority = MLX5_IB_FLOW_MCAST_PRIO;
else
priority = ib_prio_to_core_prio(flow_attr->priority,
@@ -797,12 +793,11 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
if (!dev->is_rep && !esw_encap &&
MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- reformat_l3_tunnel_to_l2))
+ reformat_l3_tunnel_to_l2))
flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
} else {
- max_table_size =
- BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
- log_max_ft_size));
+ max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_TX(
+ dev->mdev, log_max_ft_size));
fn_type = MLX5_FLOW_NAMESPACE_EGRESS;
prio = &dev->flow_db->egress_prios[priority];
if (!dev->is_rep && !esw_encap &&
@@ -812,27 +807,31 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
num_entries = MLX5_FS_MAX_ENTRIES;
num_groups = MLX5_FS_MAX_TYPES;
- } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
- flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
+ break;
+ case IB_FLOW_ATTR_ALL_DEFAULT:
+ case IB_FLOW_ATTR_MC_DEFAULT:
ns = mlx5_get_flow_namespace(dev->mdev,
MLX5_FLOW_NAMESPACE_LEFTOVERS);
- build_leftovers_ft_param(&priority,
- &num_entries,
- &num_groups);
+ build_leftovers_ft_param(&priority, &num_entries, &num_groups);
prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
- } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
+ break;
+ case IB_FLOW_ATTR_SNIFFER:
if (!MLX5_CAP_FLOWTABLE(dev->mdev,
allow_sniffer_and_nic_rx_shared_tir))
return ERR_PTR(-EOPNOTSUPP);
- ns = mlx5_get_flow_namespace(dev->mdev, ft_type == MLX5_IB_FT_RX ?
- MLX5_FLOW_NAMESPACE_SNIFFER_RX :
- MLX5_FLOW_NAMESPACE_SNIFFER_TX);
+ ns = mlx5_get_flow_namespace(
+ dev->mdev, ft_type == MLX5_IB_FT_RX ?
+ MLX5_FLOW_NAMESPACE_SNIFFER_RX :
+ MLX5_FLOW_NAMESPACE_SNIFFER_TX);
prio = &dev->flow_db->sniffer[ft_type];
priority = 0;
num_entries = 1;
num_groups = 1;
+ break;
+ default:
+ break;
}
if (!ns)
@@ -954,7 +953,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
if (!flow_is_multicast_only(flow_attr))
set_underlay_qp(dev, spec, underlay_qpn);
- if (dev->is_rep) {
+ if (dev->is_rep && flow_attr->type != IB_FLOW_ATTR_SNIFFER) {
struct mlx5_eswitch_rep *rep;
rep = dev->port[flow_attr->port - 1].rep;
@@ -1116,6 +1115,7 @@ static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev,
int err;
static const struct ib_flow_attr flow_attr = {
.num_of_specs = 0,
+ .type = IB_FLOW_ATTR_SNIFFER,
.size = sizeof(flow_attr)
};
@@ -1143,10 +1143,8 @@ err:
return ERR_PTR(err);
}
-
static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
struct ib_flow_attr *flow_attr,
- int domain,
struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(qp->device);
@@ -1162,8 +1160,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
int underlay_qpn;
if (udata && udata->inlen) {
- min_ucmd_sz = offsetof(typeof(ucmd_hdr), reserved) +
- sizeof(ucmd_hdr.reserved);
+ min_ucmd_sz = offsetofend(struct mlx5_ib_create_flow, reserved);
if (udata->inlen < min_ucmd_sz)
return ERR_PTR(-EOPNOTSUPP);
@@ -1197,10 +1194,9 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
goto free_ucmd;
}
- if (domain != IB_FLOW_DOMAIN_USER ||
- flow_attr->port > dev->num_ports ||
- (flow_attr->flags & ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP |
- IB_FLOW_ATTR_FLAGS_EGRESS))) {
+ if (flow_attr->port > dev->num_ports ||
+ (flow_attr->flags &
+ ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | IB_FLOW_ATTR_FLAGS_EGRESS))) {
err = -EINVAL;
goto free_ucmd;
}
@@ -1245,19 +1241,22 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
dst->tir_num = mqp->raw_packet_qp.rq.tirn;
}
- if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
+ switch (flow_attr->type) {
+ case IB_FLOW_ATTR_NORMAL:
underlay_qpn = (mqp->flags & IB_QP_CREATE_SOURCE_QPN) ?
mqp->underlay_qpn :
0;
handler = _create_flow_rule(dev, ft_prio, flow_attr, dst,
underlay_qpn, ucmd);
- } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
- flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
- handler = create_leftovers_rule(dev, ft_prio, flow_attr,
- dst);
- } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
+ break;
+ case IB_FLOW_ATTR_ALL_DEFAULT:
+ case IB_FLOW_ATTR_MC_DEFAULT:
+ handler = create_leftovers_rule(dev, ft_prio, flow_attr, dst);
+ break;
+ case IB_FLOW_ATTR_SNIFFER:
handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst);
- } else {
+ break;
+ default:
err = -EINVAL;
goto destroy_ft;
}
@@ -1305,39 +1304,47 @@ _get_flow_table(struct mlx5_ib_dev *dev,
esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
DEVLINK_ESWITCH_ENCAP_MODE_NONE;
- if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) {
- max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- log_max_ft_size));
+ switch (fs_matcher->ns_type) {
+ case MLX5_FLOW_NAMESPACE_BYPASS:
+ max_table_size = BIT(
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, log_max_ft_size));
if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap)
flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
reformat_l3_tunnel_to_l2) &&
!esw_encap)
flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
- } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) {
+ break;
+ case MLX5_FLOW_NAMESPACE_EGRESS:
max_table_size = BIT(
MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size));
- if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) && !esw_encap)
+ if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) &&
+ !esw_encap)
flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
- } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) {
+ break;
+ case MLX5_FLOW_NAMESPACE_FDB:
max_table_size = BIT(
MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size));
if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap)
flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
- if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, reformat_l3_tunnel_to_l2) &&
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev,
+ reformat_l3_tunnel_to_l2) &&
esw_encap)
flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
priority = FDB_BYPASS_PATH;
- } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) {
- max_table_size =
- BIT(MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
- log_max_ft_size));
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_RX:
+ max_table_size = BIT(
+ MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, log_max_ft_size));
priority = fs_matcher->priority;
- } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) {
- max_table_size =
- BIT(MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
- log_max_ft_size));
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_TX:
+ max_table_size = BIT(
+ MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, log_max_ft_size));
priority = fs_matcher->priority;
+ break;
+ default:
+ break;
}
max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES);
@@ -1346,16 +1353,24 @@ _get_flow_table(struct mlx5_ib_dev *dev,
if (!ns)
return ERR_PTR(-EOPNOTSUPP);
- if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS)
+ switch (fs_matcher->ns_type) {
+ case MLX5_FLOW_NAMESPACE_BYPASS:
prio = &dev->flow_db->prios[priority];
- else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS)
+ break;
+ case MLX5_FLOW_NAMESPACE_EGRESS:
prio = &dev->flow_db->egress_prios[priority];
- else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB)
+ break;
+ case MLX5_FLOW_NAMESPACE_FDB:
prio = &dev->flow_db->fdb;
- else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX)
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_RX:
prio = &dev->flow_db->rdma_rx[priority];
- else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX)
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_TX:
prio = &dev->flow_db->rdma_tx[priority];
+ break;
+ default: return ERR_PTR(-EINVAL);
+ }
if (!prio)
return ERR_PTR(-EINVAL);
@@ -1488,20 +1503,25 @@ static struct mlx5_ib_flow_handler *raw_fs_rule_add(
goto unlock;
}
- if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) {
+ switch (dest_type) {
+ case MLX5_FLOW_DESTINATION_TYPE_TIR:
dst[dst_num].type = dest_type;
dst[dst_num++].tir_num = dest_id;
flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) {
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
dst[dst_num++].ft_num = dest_id;
flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_PORT) {
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_PORT:
dst[dst_num++].type = MLX5_FLOW_DESTINATION_TYPE_PORT;
flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ break;
+ default:
+ break;
}
-
if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
dst[dst_num].counter_id = counter_id;
diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c
index 40d418153891..7fcad9135276 100644
--- a/drivers/infiniband/hw/mlx5/gsi.c
+++ b/drivers/infiniband/hw/mlx5/gsi.c
@@ -35,44 +35,19 @@
struct mlx5_ib_gsi_wr {
struct ib_cqe cqe;
struct ib_wc wc;
- int send_flags;
bool completed:1;
};
-struct mlx5_ib_gsi_qp {
- struct ib_qp ibqp;
- struct ib_qp *rx_qp;
- u8 port_num;
- struct ib_qp_cap cap;
- enum ib_sig_type sq_sig_type;
- /* Serialize qp state modifications */
- struct mutex mutex;
- struct ib_cq *cq;
- struct mlx5_ib_gsi_wr *outstanding_wrs;
- u32 outstanding_pi, outstanding_ci;
- int num_qps;
- /* Protects access to the tx_qps. Post send operations synchronize
- * with tx_qp creation in setup_qp(). Also protects the
- * outstanding_wrs array and indices.
- */
- spinlock_t lock;
- struct ib_qp **tx_qps;
-};
-
-static struct mlx5_ib_gsi_qp *gsi_qp(struct ib_qp *qp)
-{
- return container_of(qp, struct mlx5_ib_gsi_qp, ibqp);
-}
-
static bool mlx5_ib_deth_sqpn_cap(struct mlx5_ib_dev *dev)
{
return MLX5_CAP_GEN(dev->mdev, set_deth_sqpn);
}
/* Call with gsi->lock locked */
-static void generate_completions(struct mlx5_ib_gsi_qp *gsi)
+static void generate_completions(struct mlx5_ib_qp *mqp)
{
- struct ib_cq *gsi_cq = gsi->ibqp.send_cq;
+ struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
+ struct ib_cq *gsi_cq = mqp->ibqp.send_cq;
struct mlx5_ib_gsi_wr *wr;
u32 index;
@@ -83,10 +58,7 @@ static void generate_completions(struct mlx5_ib_gsi_qp *gsi)
if (!wr->completed)
break;
- if (gsi->sq_sig_type == IB_SIGNAL_ALL_WR ||
- wr->send_flags & IB_SEND_SIGNALED)
- WARN_ON_ONCE(mlx5_ib_generate_wc(gsi_cq, &wr->wc));
-
+ WARN_ON_ONCE(mlx5_ib_generate_wc(gsi_cq, &wr->wc));
wr->completed = false;
}
@@ -98,6 +70,7 @@ static void handle_single_completion(struct ib_cq *cq, struct ib_wc *wc)
struct mlx5_ib_gsi_qp *gsi = cq->cq_context;
struct mlx5_ib_gsi_wr *wr =
container_of(wc->wr_cqe, struct mlx5_ib_gsi_wr, cqe);
+ struct mlx5_ib_qp *mqp = container_of(gsi, struct mlx5_ib_qp, gsi);
u64 wr_id;
unsigned long flags;
@@ -106,19 +79,19 @@ static void handle_single_completion(struct ib_cq *cq, struct ib_wc *wc)
wr_id = wr->wc.wr_id;
wr->wc = *wc;
wr->wc.wr_id = wr_id;
- wr->wc.qp = &gsi->ibqp;
+ wr->wc.qp = &mqp->ibqp;
- generate_completions(gsi);
+ generate_completions(mqp);
spin_unlock_irqrestore(&gsi->lock, flags);
}
-struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr)
+int mlx5_ib_create_gsi(struct ib_pd *pd, struct mlx5_ib_qp *mqp,
+ struct ib_qp_init_attr *attr)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_gsi_qp *gsi;
- struct ib_qp_init_attr hw_init_attr = *init_attr;
- const u8 port_num = init_attr->port_num;
+ struct ib_qp_init_attr hw_init_attr = *attr;
+ const u8 port_num = attr->port_num;
int num_qps = 0;
int ret;
@@ -130,26 +103,19 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
num_qps = MLX5_MAX_PORTS;
}
- gsi = kzalloc(sizeof(*gsi), GFP_KERNEL);
- if (!gsi)
- return ERR_PTR(-ENOMEM);
-
+ gsi = &mqp->gsi;
gsi->tx_qps = kcalloc(num_qps, sizeof(*gsi->tx_qps), GFP_KERNEL);
- if (!gsi->tx_qps) {
- ret = -ENOMEM;
- goto err_free;
- }
+ if (!gsi->tx_qps)
+ return -ENOMEM;
- gsi->outstanding_wrs = kcalloc(init_attr->cap.max_send_wr,
- sizeof(*gsi->outstanding_wrs),
- GFP_KERNEL);
+ gsi->outstanding_wrs =
+ kcalloc(attr->cap.max_send_wr, sizeof(*gsi->outstanding_wrs),
+ GFP_KERNEL);
if (!gsi->outstanding_wrs) {
ret = -ENOMEM;
goto err_free_tx;
}
- mutex_init(&gsi->mutex);
-
mutex_lock(&dev->devr.mutex);
if (dev->devr.ports[port_num - 1].gsi) {
@@ -161,12 +127,10 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
gsi->num_qps = num_qps;
spin_lock_init(&gsi->lock);
- gsi->cap = init_attr->cap;
- gsi->sq_sig_type = init_attr->sq_sig_type;
- gsi->ibqp.qp_num = 1;
+ gsi->cap = attr->cap;
gsi->port_num = port_num;
- gsi->cq = ib_alloc_cq(pd->device, gsi, init_attr->cap.max_send_wr, 0,
+ gsi->cq = ib_alloc_cq(pd->device, gsi, attr->cap.max_send_wr, 0,
IB_POLL_SOFTIRQ);
if (IS_ERR(gsi->cq)) {
mlx5_ib_warn(dev, "unable to create send CQ for GSI QP. error %ld\n",
@@ -182,19 +146,31 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
hw_init_attr.cap.max_send_sge = 0;
hw_init_attr.cap.max_inline_data = 0;
}
- gsi->rx_qp = ib_create_qp(pd, &hw_init_attr);
+
+ gsi->rx_qp = mlx5_ib_create_qp(pd, &hw_init_attr, NULL);
if (IS_ERR(gsi->rx_qp)) {
mlx5_ib_warn(dev, "unable to create hardware GSI QP. error %ld\n",
PTR_ERR(gsi->rx_qp));
ret = PTR_ERR(gsi->rx_qp);
goto err_destroy_cq;
}
+ gsi->rx_qp->device = pd->device;
+ gsi->rx_qp->pd = pd;
+ gsi->rx_qp->real_qp = gsi->rx_qp;
+
+ gsi->rx_qp->qp_type = hw_init_attr.qp_type;
+ gsi->rx_qp->send_cq = hw_init_attr.send_cq;
+ gsi->rx_qp->recv_cq = hw_init_attr.recv_cq;
+ gsi->rx_qp->event_handler = hw_init_attr.event_handler;
+ spin_lock_init(&gsi->rx_qp->mr_lock);
+ INIT_LIST_HEAD(&gsi->rx_qp->rdma_mrs);
+ INIT_LIST_HEAD(&gsi->rx_qp->sig_mrs);
- dev->devr.ports[init_attr->port_num - 1].gsi = gsi;
+ dev->devr.ports[attr->port_num - 1].gsi = gsi;
mutex_unlock(&dev->devr.mutex);
- return &gsi->ibqp;
+ return 0;
err_destroy_cq:
ib_free_cq(gsi->cq);
@@ -203,23 +179,19 @@ err_free_wrs:
kfree(gsi->outstanding_wrs);
err_free_tx:
kfree(gsi->tx_qps);
-err_free:
- kfree(gsi);
- return ERR_PTR(ret);
+ return ret;
}
-int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp)
+int mlx5_ib_destroy_gsi(struct mlx5_ib_qp *mqp)
{
- struct mlx5_ib_dev *dev = to_mdev(qp->device);
- struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
+ struct mlx5_ib_dev *dev = to_mdev(mqp->ibqp.device);
+ struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
const int port_num = gsi->port_num;
int qp_index;
int ret;
- mlx5_ib_dbg(dev, "destroying GSI QP\n");
-
mutex_lock(&dev->devr.mutex);
- ret = ib_destroy_qp(gsi->rx_qp);
+ ret = mlx5_ib_destroy_qp(gsi->rx_qp, NULL);
if (ret) {
mlx5_ib_warn(dev, "unable to destroy hardware GSI QP. error %d\n",
ret);
@@ -241,7 +213,7 @@ int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp)
kfree(gsi->outstanding_wrs);
kfree(gsi->tx_qps);
- kfree(gsi);
+ kfree(mqp);
return 0;
}
@@ -259,7 +231,6 @@ static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi)
.max_send_sge = gsi->cap.max_send_sge,
.max_inline_data = gsi->cap.max_inline_data,
},
- .sq_sig_type = gsi->sq_sig_type,
.qp_type = IB_QPT_UD,
.create_flags = MLX5_IB_QP_CREATE_SQPN_QP1,
};
@@ -370,56 +341,54 @@ err_destroy_qp:
static void setup_qps(struct mlx5_ib_gsi_qp *gsi)
{
+ struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device);
u16 qp_index;
+ mutex_lock(&dev->devr.mutex);
for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index)
setup_qp(gsi, qp_index);
+ mutex_unlock(&dev->devr.mutex);
}
int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
int attr_mask)
{
struct mlx5_ib_dev *dev = to_mdev(qp->device);
- struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
+ struct mlx5_ib_qp *mqp = to_mqp(qp);
+ struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
int ret;
mlx5_ib_dbg(dev, "modifying GSI QP to state %d\n", attr->qp_state);
- mutex_lock(&gsi->mutex);
ret = ib_modify_qp(gsi->rx_qp, attr, attr_mask);
if (ret) {
mlx5_ib_warn(dev, "unable to modify GSI rx QP: %d\n", ret);
- goto unlock;
+ return ret;
}
if (to_mqp(gsi->rx_qp)->state == IB_QPS_RTS)
setup_qps(gsi);
-
-unlock:
- mutex_unlock(&gsi->mutex);
-
- return ret;
+ return 0;
}
int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
int qp_attr_mask,
struct ib_qp_init_attr *qp_init_attr)
{
- struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
+ struct mlx5_ib_qp *mqp = to_mqp(qp);
+ struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
int ret;
- mutex_lock(&gsi->mutex);
ret = ib_query_qp(gsi->rx_qp, qp_attr, qp_attr_mask, qp_init_attr);
qp_init_attr->cap = gsi->cap;
- mutex_unlock(&gsi->mutex);
-
return ret;
}
/* Call with gsi->lock locked */
-static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp *gsi,
+static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_qp *mqp,
struct ib_ud_wr *wr, struct ib_wc *wc)
{
+ struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device);
struct mlx5_ib_gsi_wr *gsi_wr;
@@ -448,22 +417,21 @@ static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp *gsi,
}
/* Call with gsi->lock locked */
-static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_gsi_qp *gsi,
- struct ib_ud_wr *wr)
+static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_qp *mqp, struct ib_ud_wr *wr)
{
struct ib_wc wc = {
{ .wr_id = wr->wr.wr_id },
.status = IB_WC_SUCCESS,
.opcode = IB_WC_SEND,
- .qp = &gsi->ibqp,
+ .qp = &mqp->ibqp,
};
int ret;
- ret = mlx5_ib_add_outstanding_wr(gsi, wr, &wc);
+ ret = mlx5_ib_add_outstanding_wr(mqp, wr, &wc);
if (ret)
return ret;
- generate_completions(gsi);
+ generate_completions(mqp);
return 0;
}
@@ -490,7 +458,8 @@ static struct ib_qp *get_tx_qp(struct mlx5_ib_gsi_qp *gsi, struct ib_ud_wr *wr)
int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr,
const struct ib_send_wr **bad_wr)
{
- struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
+ struct mlx5_ib_qp *mqp = to_mqp(qp);
+ struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
struct ib_qp *tx_qp;
unsigned long flags;
int ret;
@@ -503,14 +472,14 @@ int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr,
spin_lock_irqsave(&gsi->lock, flags);
tx_qp = get_tx_qp(gsi, &cur_wr);
if (!tx_qp) {
- ret = mlx5_ib_gsi_silent_drop(gsi, &cur_wr);
+ ret = mlx5_ib_gsi_silent_drop(mqp, &cur_wr);
if (ret)
goto err;
spin_unlock_irqrestore(&gsi->lock, flags);
continue;
}
- ret = mlx5_ib_add_outstanding_wr(gsi, &cur_wr, NULL);
+ ret = mlx5_ib_add_outstanding_wr(mqp, &cur_wr, NULL);
if (ret)
goto err;
@@ -534,7 +503,8 @@ err:
int mlx5_ib_gsi_post_recv(struct ib_qp *qp, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr)
{
- struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
+ struct mlx5_ib_qp *mqp = to_mqp(qp);
+ struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
return ib_post_recv(gsi->rx_qp, wr, bad_wr);
}
@@ -544,7 +514,5 @@ void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi)
if (!gsi)
return;
- mutex_lock(&gsi->mutex);
setup_qps(gsi);
- mutex_unlock(&gsi->mutex);
}
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index d60d63221b14..89e04ca62ae0 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -326,8 +326,8 @@ out:
spin_unlock(&port->mp.mpi_lock);
}
-static int translate_eth_legacy_proto_oper(u32 eth_proto_oper, u8 *active_speed,
- u8 *active_width)
+static int translate_eth_legacy_proto_oper(u32 eth_proto_oper,
+ u16 *active_speed, u8 *active_width)
{
switch (eth_proto_oper) {
case MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII):
@@ -384,7 +384,7 @@ static int translate_eth_legacy_proto_oper(u32 eth_proto_oper, u8 *active_speed,
return 0;
}
-static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u8 *active_speed,
+static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u16 *active_speed,
u8 *active_width)
{
switch (eth_proto_oper) {
@@ -436,7 +436,7 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u8 *active_speed,
return 0;
}
-static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed,
+static int translate_eth_proto_oper(u32 eth_proto_oper, u16 *active_speed,
u8 *active_width, bool ext)
{
return ext ?
@@ -546,7 +546,7 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num,
unsigned int index, const union ib_gid *gid,
const struct ib_gid_attr *attr)
{
- enum ib_gid_type gid_type = IB_GID_TYPE_IB;
+ enum ib_gid_type gid_type = IB_GID_TYPE_ROCE;
u16 vlan_id = 0xffff;
u8 roce_version = 0;
u8 roce_l3_type = 0;
@@ -561,7 +561,7 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num,
}
switch (gid_type) {
- case IB_GID_TYPE_IB:
+ case IB_GID_TYPE_ROCE:
roce_version = MLX5_ROCE_VERSION_1;
break;
case IB_GID_TYPE_ROCE_UDP_ENCAP:
@@ -840,7 +840,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
/* We support 'Gappy' memory registration too */
props->device_cap_flags |= IB_DEVICE_SG_GAPS_REG;
}
- props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
+ /* IB_WR_REG_MR always requires changing the entity size with UMR */
+ if (!MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
+ props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
if (MLX5_CAP_GEN(mdev, sho)) {
props->device_cap_flags |= IB_DEVICE_INTEGRITY_HANDOVER;
/* At this stage no support for signature handover */
@@ -1175,32 +1177,24 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
return 0;
}
-enum mlx5_ib_width {
- MLX5_IB_WIDTH_1X = 1 << 0,
- MLX5_IB_WIDTH_2X = 1 << 1,
- MLX5_IB_WIDTH_4X = 1 << 2,
- MLX5_IB_WIDTH_8X = 1 << 3,
- MLX5_IB_WIDTH_12X = 1 << 4
-};
-
-static void translate_active_width(struct ib_device *ibdev, u8 active_width,
- u8 *ib_width)
+static void translate_active_width(struct ib_device *ibdev, u16 active_width,
+ u8 *ib_width)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
- if (active_width & MLX5_IB_WIDTH_1X)
+ if (active_width & MLX5_PTYS_WIDTH_1X)
*ib_width = IB_WIDTH_1X;
- else if (active_width & MLX5_IB_WIDTH_2X)
+ else if (active_width & MLX5_PTYS_WIDTH_2X)
*ib_width = IB_WIDTH_2X;
- else if (active_width & MLX5_IB_WIDTH_4X)
+ else if (active_width & MLX5_PTYS_WIDTH_4X)
*ib_width = IB_WIDTH_4X;
- else if (active_width & MLX5_IB_WIDTH_8X)
+ else if (active_width & MLX5_PTYS_WIDTH_8X)
*ib_width = IB_WIDTH_8X;
- else if (active_width & MLX5_IB_WIDTH_12X)
+ else if (active_width & MLX5_PTYS_WIDTH_12X)
*ib_width = IB_WIDTH_12X;
else {
mlx5_ib_dbg(dev, "Invalid active_width %d, setting width to default value: 4x\n",
- (int)active_width);
+ active_width);
*ib_width = IB_WIDTH_4X;
}
@@ -1277,7 +1271,7 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
u16 max_mtu;
u16 oper_mtu;
int err;
- u8 ib_link_width_oper;
+ u16 ib_link_width_oper;
u8 vl_hw_cap;
rep = kzalloc(sizeof(*rep), GFP_KERNEL);
@@ -1310,16 +1304,13 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP)
props->port_cap_flags2 = rep->cap_mask2;
- err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port);
+ err = mlx5_query_ib_port_oper(mdev, &ib_link_width_oper,
+ &props->active_speed, port);
if (err)
goto out;
translate_active_width(ibdev, ib_link_width_oper, &props->active_width);
- err = mlx5_query_port_ib_proto_oper(mdev, &props->active_speed, port);
- if (err)
- goto out;
-
mlx5_query_port_max_mtu(mdev, &max_mtu, port);
props->max_mtu = mlx5_mtu_to_ib_mtu(max_mtu);
@@ -2354,7 +2345,9 @@ static inline int check_dm_type_support(struct mlx5_ib_dev *dev,
return -EPERM;
if (!(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) ||
- MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, sw_owner)))
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, sw_owner) ||
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner_v2) ||
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, sw_owner_v2)))
return -EOPNOTSUPP;
break;
}
@@ -2569,12 +2562,12 @@ static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
return 0;
}
-static void mlx5_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
+static int mlx5_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
{
struct mlx5_ib_dev *mdev = to_mdev(pd->device);
struct mlx5_ib_pd *mpd = to_mpd(pd);
- mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid);
+ return mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid);
}
static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
@@ -2699,9 +2692,7 @@ static void pkey_change_handler(struct work_struct *work)
container_of(work, struct mlx5_ib_port_resources,
pkey_change_work);
- mutex_lock(&ports->devr->mutex);
mlx5_ib_gsi_pkey_change(ports->gsi);
- mutex_unlock(&ports->devr->mutex);
}
static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev)
@@ -3127,11 +3118,9 @@ static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev)
atomic_inc(&devr->p0->usecnt);
atomic_set(&devr->s1->usecnt, 0);
- for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) {
+ for (port = 0; port < ARRAY_SIZE(devr->ports); ++port)
INIT_WORK(&devr->ports[port].pkey_change_work,
pkey_change_handler);
- devr->ports[port].devr = devr;
- }
return 0;
@@ -4098,6 +4087,8 @@ static const struct ib_device_ops mlx5_ib_dev_sriov_ops = {
static const struct ib_device_ops mlx5_ib_dev_mw_ops = {
.alloc_mw = mlx5_ib_alloc_mw,
.dealloc_mw = mlx5_ib_dealloc_mw,
+
+ INIT_RDMA_OBJ_SIZE(ib_mw, mlx5_ib_mw, ibmw),
};
static const struct ib_device_ops mlx5_ib_dev_xrc_ops = {
@@ -4268,6 +4259,9 @@ static const struct ib_device_ops mlx5_ib_dev_common_roce_ops = {
.destroy_wq = mlx5_ib_destroy_wq,
.get_netdev = mlx5_ib_get_netdev,
.modify_wq = mlx5_ib_modify_wq,
+
+ INIT_RDMA_OBJ_SIZE(ib_rwq_ind_table, mlx5_ib_rwq_ind_table,
+ ib_rwq_ind_tbl),
};
static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev)
@@ -4386,7 +4380,7 @@ static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
name = "mlx5_%d";
else
name = "mlx5_bond_%d";
- return ib_register_device(&dev->ib_dev, name);
+ return ib_register_device(&dev->ib_dev, name, &dev->mdev->pdev->dev);
}
static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index c19ec9fd8a63..13de3d2edd34 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -169,8 +169,8 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
int page_shift, __be64 *pas, int access_flags)
{
return __mlx5_ib_populate_pas(dev, umem, page_shift, 0,
- ib_umem_num_pages(umem), pas,
- access_flags);
+ ib_umem_num_dma_blocks(umem, PAGE_SIZE),
+ pas, access_flags);
}
int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset)
{
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 5287fc868662..b1f2b34e5955 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -384,6 +384,22 @@ struct mlx5_ib_dct {
u32 *in;
};
+struct mlx5_ib_gsi_qp {
+ struct ib_qp *rx_qp;
+ u8 port_num;
+ struct ib_qp_cap cap;
+ struct ib_cq *cq;
+ struct mlx5_ib_gsi_wr *outstanding_wrs;
+ u32 outstanding_pi, outstanding_ci;
+ int num_qps;
+ /* Protects access to the tx_qps. Post send operations synchronize
+ * with tx_qp creation in setup_qp(). Also protects the
+ * outstanding_wrs array and indices.
+ */
+ spinlock_t lock;
+ struct ib_qp **tx_qps;
+};
+
struct mlx5_ib_qp {
struct ib_qp ibqp;
union {
@@ -391,6 +407,7 @@ struct mlx5_ib_qp {
struct mlx5_ib_raw_packet_qp raw_packet_qp;
struct mlx5_ib_rss_qp rss_qp;
struct mlx5_ib_dct dct;
+ struct mlx5_ib_gsi_qp gsi;
};
struct mlx5_frag_buf buf;
@@ -693,10 +710,7 @@ struct mlx5_mr_cache {
unsigned long last_add;
};
-struct mlx5_ib_gsi_qp;
-
struct mlx5_ib_port_resources {
- struct mlx5_ib_resources *devr;
struct mlx5_ib_gsi_qp *gsi;
struct work_struct pkey_change_work;
};
@@ -1119,13 +1133,16 @@ void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index);
int mlx5_ib_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
struct ib_udata *udata);
int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
-void mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags);
+static inline int mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags)
+{
+ return 0;
+}
int mlx5_ib_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *init_attr,
struct ib_udata *udata);
int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr);
-void mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
+int mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr);
int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp);
@@ -1148,7 +1165,7 @@ int mlx5_ib_read_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index, void *buffer,
size_t buflen, size_t *bc);
int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct ib_udata *udata);
-void mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
+int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
@@ -1163,8 +1180,7 @@ int mlx5_ib_advise_mr(struct ib_pd *pd,
struct ib_sge *sg_list,
u32 num_sge,
struct uverbs_attr_bundle *attrs);
-struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
- struct ib_udata *udata);
+int mlx5_ib_alloc_mw(struct ib_mw *mw, struct ib_udata *udata);
int mlx5_ib_dealloc_mw(struct ib_mw *mw);
int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
int page_shift, int flags);
@@ -1193,7 +1209,7 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
const struct ib_mad *in, struct ib_mad *out,
size_t *out_mad_size, u16 *out_mad_pkey_index);
int mlx5_ib_alloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
-void mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
+int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset);
int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port);
int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev,
@@ -1229,7 +1245,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
- unsigned int entry);
+ unsigned int entry, int access_flags);
void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr);
@@ -1238,12 +1254,12 @@ int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr,
struct ib_udata *udata);
-void mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata);
+int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata);
int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
u32 wq_attr_mask, struct ib_udata *udata);
-struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
- struct ib_rwq_ind_table_init_attr *init_attr,
- struct ib_udata *udata);
+int mlx5_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata);
int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
struct ib_ucontext *context,
@@ -1267,6 +1283,7 @@ void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
enum ib_uverbs_advise_mr_advice advice,
u32 flags, struct ib_sge *sg_list, u32 num_sge);
+int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable);
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
{
@@ -1288,6 +1305,10 @@ mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
{
return -EOPNOTSUPP;
}
+static inline int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable)
+{
+ return -EOPNOTSUPP;
+}
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
extern const struct mmu_interval_notifier_ops mlx5_mn_ops;
@@ -1318,9 +1339,9 @@ void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num);
void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num);
/* GSI QP helper functions */
-struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr);
-int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp);
+int mlx5_ib_create_gsi(struct ib_pd *pd, struct mlx5_ib_qp *mqp,
+ struct ib_qp_init_attr *attr);
+int mlx5_ib_destroy_gsi(struct mlx5_ib_qp *mqp);
int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
int attr_mask);
int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
@@ -1358,7 +1379,7 @@ static inline void init_query_mad(struct ib_smp *mad)
static inline int is_qp1(enum ib_qp_type qp_type)
{
- return qp_type == MLX5_IB_QPT_HW_GSI;
+ return qp_type == MLX5_IB_QPT_HW_GSI || qp_type == IB_QPT_GSI;
}
#define MLX5_MAX_UMR_SHIFT 16
@@ -1442,25 +1463,54 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
struct mlx5_bfreg_info *bfregi, u32 bfregn,
bool dyn_bfreg);
-static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev,
- bool do_modify_atomic, int access_flags)
+static inline bool mlx5_ib_can_load_pas_with_umr(struct mlx5_ib_dev *dev,
+ size_t length)
{
+ /*
+ * umr_check_mkey_mask() rejects MLX5_MKEY_MASK_PAGE_SIZE which is
+ * always set if MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (aka
+ * MLX5_IB_UPD_XLT_ADDR and MLX5_IB_UPD_XLT_ENABLE) is set. Thus, a mkey
+ * can never be enabled without this capability. Simplify this weird
+ * quirky hardware by just saying it can't use PAS lists with UMR at
+ * all.
+ */
if (MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
return false;
- if (do_modify_atomic &&
+ /*
+ * length is the size of the MR in bytes when mlx5_ib_update_xlt() is
+ * used.
+ */
+ if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) &&
+ length >= MLX5_MAX_UMR_PAGES * PAGE_SIZE)
+ return false;
+ return true;
+}
+
+/*
+ * true if an existing MR can be reconfigured to new access_flags using UMR.
+ * Older HW cannot use UMR to update certain elements of the MKC. See
+ * umr_check_mkey_mask(), get_umr_update_access_mask() and umr_check_mkey_mask()
+ */
+static inline bool mlx5_ib_can_reconfig_with_umr(struct mlx5_ib_dev *dev,
+ unsigned int current_access_flags,
+ unsigned int target_access_flags)
+{
+ unsigned int diffs = current_access_flags ^ target_access_flags;
+
+ if ((diffs & IB_ACCESS_REMOTE_ATOMIC) &&
MLX5_CAP_GEN(dev->mdev, atomic) &&
MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
return false;
- if (access_flags & IB_ACCESS_RELAXED_ORDERING &&
+ if ((diffs & IB_ACCESS_RELAXED_ORDERING) &&
MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) &&
!MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
return false;
- if (access_flags & IB_ACCESS_RELAXED_ORDERING &&
- MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) &&
- !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
+ if ((diffs & IB_ACCESS_RELAXED_ORDERING) &&
+ MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) &&
+ !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
return false;
return true;
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 3e6f2f9c6655..b261797b258f 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -50,6 +50,29 @@ enum {
static void
create_mkey_callback(int status, struct mlx5_async_work *context);
+static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
+ struct ib_pd *pd)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+
+ MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
+ MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
+ MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
+ MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
+ MLX5_SET(mkc, mkc, lr, 1);
+
+ if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write))
+ MLX5_SET(mkc, mkc, relaxed_ordering_write,
+ !!(acc & IB_ACCESS_RELAXED_ORDERING));
+ if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read))
+ MLX5_SET(mkc, mkc, relaxed_ordering_read,
+ !!(acc & IB_ACCESS_RELAXED_ORDERING));
+
+ MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+ MLX5_SET64(mkc, mkc, start_addr, start_addr);
+}
+
static void
assign_mkey_variant(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey,
u32 *in)
@@ -100,7 +123,8 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
}
-static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length)
+static inline bool mlx5_ib_pas_fits_in_mr(struct mlx5_ib_mr *mr, u64 start,
+ u64 length)
{
return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >=
length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1));
@@ -152,12 +176,12 @@ static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc)
mr->cache_ent = ent;
mr->dev = ent->dev;
+ set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd);
MLX5_SET(mkc, mkc, free, 1);
MLX5_SET(mkc, mkc, umr_en, 1);
MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3);
MLX5_SET(mkc, mkc, access_mode_4_2, (ent->access_mode >> 2) & 0x7);
- MLX5_SET(mkc, mkc, qpn, 0xffffff);
MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
MLX5_SET(mkc, mkc, log_page_size, ent->page);
return mr;
@@ -534,7 +558,7 @@ static void cache_work_func(struct work_struct *work)
/* Allocate a special entry from the cache */
struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
- unsigned int entry)
+ unsigned int entry, int access_flags)
{
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent;
@@ -544,6 +568,10 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
entry >= ARRAY_SIZE(cache->ent)))
return ERR_PTR(-EINVAL);
+ /* Matches access in alloc_cache_mr() */
+ if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags))
+ return ERR_PTR(-EOPNOTSUPP);
+
ent = &cache->ent[entry];
spin_lock_irq(&ent->lock);
if (list_empty(&ent->head)) {
@@ -558,6 +586,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
queue_adjust_cache_locked(ent);
spin_unlock_irq(&ent->lock);
}
+ mr->access_flags = access_flags;
return mr;
}
@@ -730,8 +759,8 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
MLX5_IB_UMR_OCTOWORD;
ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
- !dev->is_rep &&
- mlx5_core_is_pf(dev->mdev))
+ !dev->is_rep && mlx5_core_is_pf(dev->mdev) &&
+ mlx5_ib_can_load_pas_with_umr(dev, 0))
ent->limit = dev->mdev->profile->mr_cache[i].limit;
else
ent->limit = 0;
@@ -774,29 +803,6 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
return 0;
}
-static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
- struct ib_pd *pd)
-{
- struct mlx5_ib_dev *dev = to_mdev(pd->device);
-
- MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
- MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
- MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
- MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
- MLX5_SET(mkc, mkc, lr, 1);
-
- if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write))
- MLX5_SET(mkc, mkc, relaxed_ordering_write,
- !!(acc & IB_ACCESS_RELAXED_ORDERING));
- if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read))
- MLX5_SET(mkc, mkc, relaxed_ordering_read,
- !!(acc & IB_ACCESS_RELAXED_ORDERING));
-
- MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
- MLX5_SET(mkc, mkc, qpn, 0xffffff);
- MLX5_SET64(mkc, mkc, start_addr, start_addr);
-}
-
struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
@@ -979,6 +985,11 @@ alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr,
if (!ent)
return ERR_PTR(-E2BIG);
+
+ /* Matches access in alloc_cache_mr() */
+ if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags))
+ return ERR_PTR(-EOPNOTSUPP);
+
mr = get_cache_mr(ent);
if (!mr) {
mr = create_cache_mr(ent);
@@ -1181,38 +1192,31 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
goto err_1;
}
pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
- if (populate && !(access_flags & IB_ACCESS_ON_DEMAND))
+ if (populate) {
+ if (WARN_ON(access_flags & IB_ACCESS_ON_DEMAND)) {
+ err = -EINVAL;
+ goto err_2;
+ }
mlx5_ib_populate_pas(dev, umem, page_shift, pas,
pg_cap ? MLX5_IB_MTT_PRESENT : 0);
+ }
/* The pg_access bit allows setting the access flags
* in the page list submitted with the command. */
MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap));
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ set_mkc_access_pd_addr_fields(mkc, access_flags, virt_addr,
+ populate ? pd : dev->umrc.pd);
MLX5_SET(mkc, mkc, free, !populate);
MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
- if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write))
- MLX5_SET(mkc, mkc, relaxed_ordering_write,
- !!(access_flags & IB_ACCESS_RELAXED_ORDERING));
- if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read))
- MLX5_SET(mkc, mkc, relaxed_ordering_read,
- !!(access_flags & IB_ACCESS_RELAXED_ORDERING));
- MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
- MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
- MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ));
- MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE));
- MLX5_SET(mkc, mkc, lr, 1);
MLX5_SET(mkc, mkc, umr_en, 1);
- MLX5_SET64(mkc, mkc, start_addr, virt_addr);
MLX5_SET64(mkc, mkc, len, length);
- MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
MLX5_SET(mkc, mkc, bsf_octword_size, 0);
MLX5_SET(mkc, mkc, translations_octword_size,
get_octo_len(virt_addr, length, page_shift));
MLX5_SET(mkc, mkc, log_page_size, page_shift);
- MLX5_SET(mkc, mkc, qpn, 0xffffff);
if (populate) {
MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
get_octo_len(virt_addr, length, page_shift));
@@ -1308,7 +1312,8 @@ int mlx5_ib_advise_mr(struct ib_pd *pd,
struct uverbs_attr_bundle *attrs)
{
if (advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH &&
- advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE)
+ advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE &&
+ advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT)
return -EOPNOTSUPP;
return mlx5_ib_advise_mr_prefetch(pd, advice, flags,
@@ -1353,7 +1358,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_mr *mr = NULL;
- bool use_umr;
+ bool xlt_with_umr;
struct ib_umem *umem;
int page_shift;
int npages;
@@ -1367,6 +1372,11 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
start, virt_addr, length, access_flags);
+ xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, length);
+ /* ODP requires xlt update via umr to work. */
+ if (!xlt_with_umr && (access_flags & IB_ACCESS_ON_DEMAND))
+ return ERR_PTR(-EINVAL);
+
if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && !start &&
length == U64_MAX) {
if (virt_addr != start)
@@ -1387,28 +1397,17 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (err < 0)
return ERR_PTR(err);
- use_umr = mlx5_ib_can_use_umr(dev, true, access_flags);
-
- if (order <= mr_cache_max_order(dev) && use_umr) {
+ if (xlt_with_umr) {
mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont,
page_shift, order, access_flags);
- if (PTR_ERR(mr) == -EAGAIN) {
- mlx5_ib_dbg(dev, "cache empty for order %d\n", order);
+ if (IS_ERR(mr))
mr = NULL;
- }
- } else if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) {
- if (access_flags & IB_ACCESS_ON_DEMAND) {
- err = -EINVAL;
- pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB\n");
- goto error;
- }
- use_umr = false;
}
if (!mr) {
mutex_lock(&dev->slow_path_mutex);
mr = reg_create(NULL, pd, virt_addr, length, umem, ncont,
- page_shift, access_flags, !use_umr);
+ page_shift, access_flags, !xlt_with_umr);
mutex_unlock(&dev->slow_path_mutex);
}
@@ -1422,15 +1421,16 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mr->umem = umem;
set_mr_fields(dev, mr, npages, length, access_flags);
- if (use_umr) {
+ if (xlt_with_umr && !(access_flags & IB_ACCESS_ON_DEMAND)) {
+ /*
+ * If the MR was created with reg_create then it will be
+ * configured properly but left disabled. It is safe to go ahead
+ * and configure it again via UMR while enabling it.
+ */
int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE;
- if (access_flags & IB_ACCESS_ON_DEMAND)
- update_xlt_flags |= MLX5_IB_UPD_XLT_ZAP;
-
err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift,
update_xlt_flags);
-
if (err) {
dereg_mr(dev, mr);
return ERR_PTR(err);
@@ -1448,6 +1448,12 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
dereg_mr(dev, mr);
return ERR_PTR(err);
}
+
+ err = mlx5_ib_init_odp_mr(mr, xlt_with_umr);
+ if (err) {
+ dereg_mr(dev, mr);
+ return ERR_PTR(err);
+ }
}
return &mr->ibmr;
@@ -1555,8 +1561,11 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
goto err;
}
- if (!mlx5_ib_can_use_umr(dev, true, access_flags) ||
- (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len))) {
+ if (!mlx5_ib_can_reconfig_with_umr(dev, mr->access_flags,
+ access_flags) ||
+ !mlx5_ib_can_load_pas_with_umr(dev, len) ||
+ (flags & IB_MR_REREG_TRANS &&
+ !mlx5_ib_pas_fits_in_mr(mr, addr, len))) {
/*
* UMR can't be used - MKey needs to be replaced.
*/
@@ -1727,9 +1736,9 @@ static void mlx5_set_umr_free_mkey(struct ib_pd *pd, u32 *in, int ndescs,
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ /* This is only used from the kernel, so setting the PD is OK. */
+ set_mkc_access_pd_addr_fields(mkc, 0, 0, pd);
MLX5_SET(mkc, mkc, free, 1);
- MLX5_SET(mkc, mkc, qpn, 0xffffff);
- MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3);
MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7);
@@ -1973,12 +1982,11 @@ struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd,
max_num_meta_sg);
}
-struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
- struct ib_udata *udata)
+int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
{
- struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct mlx5_ib_dev *dev = to_mdev(ibmw->device);
int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
- struct mlx5_ib_mw *mw = NULL;
+ struct mlx5_ib_mw *mw = to_mmw(ibmw);
u32 *in = NULL;
void *mkc;
int ndescs;
@@ -1991,21 +1999,20 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
if (err)
- return ERR_PTR(err);
+ return err;
if (req.comp_mask || req.reserved1 || req.reserved2)
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
if (udata->inlen > sizeof(req) &&
!ib_is_udata_cleared(udata, sizeof(req),
udata->inlen - sizeof(req)))
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4);
- mw = kzalloc(sizeof(*mw), GFP_KERNEL);
in = kzalloc(inlen, GFP_KERNEL);
- if (!mw || !in) {
+ if (!in) {
err = -ENOMEM;
goto free;
}
@@ -2014,11 +2021,11 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
MLX5_SET(mkc, mkc, free, 1);
MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
- MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
+ MLX5_SET(mkc, mkc, pd, to_mpd(ibmw->pd)->pdn);
MLX5_SET(mkc, mkc, umr_en, 1);
MLX5_SET(mkc, mkc, lr, 1);
MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS);
- MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2)));
+ MLX5_SET(mkc, mkc, en_rinval, !!((ibmw->type == IB_MW_TYPE_2)));
MLX5_SET(mkc, mkc, qpn, 0xffffff);
err = mlx5_ib_create_mkey(dev, &mw->mmkey, in, inlen);
@@ -2026,17 +2033,15 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
goto free;
mw->mmkey.type = MLX5_MKEY_MW;
- mw->ibmw.rkey = mw->mmkey.key;
+ ibmw->rkey = mw->mmkey.key;
mw->ndescs = ndescs;
- resp.response_length = min(offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length), udata->outlen);
+ resp.response_length =
+ min(offsetofend(typeof(resp), response_length), udata->outlen);
if (resp.response_length) {
err = ib_copy_to_udata(udata, &resp, resp.response_length);
- if (err) {
- mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey);
- goto free;
- }
+ if (err)
+ goto free_mkey;
}
if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
@@ -2048,21 +2053,19 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
}
kfree(in);
- return &mw->ibmw;
+ return 0;
free_mkey:
mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey);
free:
- kfree(mw);
kfree(in);
- return ERR_PTR(err);
+ return err;
}
int mlx5_ib_dealloc_mw(struct ib_mw *mw)
{
struct mlx5_ib_dev *dev = to_mdev(mw->device);
struct mlx5_ib_mw *mmw = to_mmw(mw);
- int err;
if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key));
@@ -2073,11 +2076,7 @@ int mlx5_ib_dealloc_mw(struct ib_mw *mw)
synchronize_srcu(&dev->odp_srcu);
}
- err = mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey);
- if (err)
- return err;
- kfree(mmw);
- return 0;
+ return mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey);
}
int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index cfd7efab114e..5c853ec1b0d8 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -382,7 +382,7 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
memset(caps, 0, sizeof(*caps));
if (!MLX5_CAP_GEN(dev->mdev, pg) ||
- !mlx5_ib_can_use_umr(dev, true, 0))
+ !mlx5_ib_can_load_pas_with_umr(dev, 0))
return;
caps->general_caps = IB_ODP_SUPPORT;
@@ -476,12 +476,12 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
if (IS_ERR(odp))
return ERR_CAST(odp);
- ret = mr = mlx5_mr_cache_alloc(imr->dev, MLX5_IMR_MTT_CACHE_ENTRY);
+ ret = mr = mlx5_mr_cache_alloc(imr->dev, MLX5_IMR_MTT_CACHE_ENTRY,
+ imr->access_flags);
if (IS_ERR(mr))
goto out_umem;
mr->ibmr.pd = imr->ibmr.pd;
- mr->access_flags = imr->access_flags;
mr->umem = &odp->umem;
mr->ibmr.lkey = mr->mmkey.key;
mr->ibmr.rkey = mr->mmkey.key;
@@ -540,14 +540,13 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
if (IS_ERR(umem_odp))
return ERR_CAST(umem_odp);
- imr = mlx5_mr_cache_alloc(dev, MLX5_IMR_KSM_CACHE_ENTRY);
+ imr = mlx5_mr_cache_alloc(dev, MLX5_IMR_KSM_CACHE_ENTRY, access_flags);
if (IS_ERR(imr)) {
err = PTR_ERR(imr);
goto out_umem;
}
imr->ibmr.pd = &pd->ibpd;
- imr->access_flags = access_flags;
imr->mmkey.iova = 0;
imr->umem = &umem_odp->umem;
imr->ibmr.lkey = imr->mmkey.key;
@@ -666,15 +665,21 @@ void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr)
}
#define MLX5_PF_FLAGS_DOWNGRADE BIT(1)
+#define MLX5_PF_FLAGS_SNAPSHOT BIT(2)
+#define MLX5_PF_FLAGS_ENABLE BIT(3)
static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp,
u64 user_va, size_t bcnt, u32 *bytes_mapped,
u32 flags)
{
int page_shift, ret, np;
bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE;
- unsigned long current_seq;
u64 access_mask;
u64 start_idx;
+ bool fault = !(flags & MLX5_PF_FLAGS_SNAPSHOT);
+ u32 xlt_flags = MLX5_IB_UPD_XLT_ATOMIC;
+
+ if (flags & MLX5_PF_FLAGS_ENABLE)
+ xlt_flags |= MLX5_IB_UPD_XLT_ENABLE;
page_shift = odp->page_shift;
start_idx = (user_va - ib_umem_start(odp)) >> page_shift;
@@ -683,25 +688,15 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp,
if (odp->umem.writable && !downgrade)
access_mask |= ODP_WRITE_ALLOWED_BIT;
- current_seq = mmu_interval_read_begin(&odp->notifier);
-
- np = ib_umem_odp_map_dma_pages(odp, user_va, bcnt, access_mask,
- current_seq);
+ np = ib_umem_odp_map_dma_and_lock(odp, user_va, bcnt, access_mask, fault);
if (np < 0)
return np;
- mutex_lock(&odp->umem_mutex);
- if (!mmu_interval_read_retry(&odp->notifier, current_seq)) {
- /*
- * No need to check whether the MTTs really belong to
- * this MR, since ib_umem_odp_map_dma_pages already
- * checks this.
- */
- ret = mlx5_ib_update_xlt(mr, start_idx, np,
- page_shift, MLX5_IB_UPD_XLT_ATOMIC);
- } else {
- ret = -EAGAIN;
- }
+ /*
+ * No need to check whether the MTTs really belong to this MR, since
+ * ib_umem_odp_map_dma_and_lock already checks this.
+ */
+ ret = mlx5_ib_update_xlt(mr, start_idx, np, page_shift, xlt_flags);
mutex_unlock(&odp->umem_mutex);
if (ret < 0) {
@@ -836,6 +831,20 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt,
flags);
}
+int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable)
+{
+ u32 flags = MLX5_PF_FLAGS_SNAPSHOT;
+ int ret;
+
+ if (enable)
+ flags |= MLX5_PF_FLAGS_ENABLE;
+
+ ret = pagefault_real_mr(mr, to_ib_umem_odp(mr->umem),
+ mr->umem->address, mr->umem->length, NULL,
+ flags);
+ return ret >= 0 ? 0 : ret;
+}
+
struct pf_frame {
struct pf_frame *next;
u32 key;
@@ -1862,6 +1871,9 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH)
pf_flags |= MLX5_PF_FLAGS_DOWNGRADE;
+ if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT)
+ pf_flags |= MLX5_PF_FLAGS_SNAPSHOT;
+
if (flags & IB_UVERBS_ADVISE_MR_FLAG_FLUSH)
return mlx5_ib_prefetch_sg_list(pd, advice, pf_flags, sg_list,
num_sge);
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 5758dbe64045..600e056798c0 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -1477,7 +1477,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_RQN;
resp->tirn = rq->tirn;
resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN;
- if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner)) {
+ if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) ||
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner_v2)) {
resp->tir_icm_addr = MLX5_GET(
create_tir_out, out, icm_address_31_0);
resp->tir_icm_addr |=
@@ -1739,7 +1740,8 @@ create_tir:
if (mucontext->devx_uid) {
params->resp.comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN;
params->resp.tirn = qp->rss_qp.tirn;
- if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner)) {
+ if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) ||
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner_v2)) {
params->resp.tir_icm_addr =
MLX5_GET(create_tir_out, out, icm_address_31_0);
params->resp.tir_icm_addr |=
@@ -2409,6 +2411,9 @@ static int create_dct(struct mlx5_ib_dev *dev, struct ib_pd *pd,
u32 uidx = params->uidx;
void *dctc;
+ if (mlx5_lag_is_active(dev->mdev) && !MLX5_CAP_GEN(dev->mdev, lag_dct))
+ return -EOPNOTSUPP;
+
qp->dct.in = kzalloc(MLX5_ST_SZ_BYTES(create_dct_in), GFP_KERNEL);
if (!qp->dct.in)
return -ENOMEM;
@@ -2506,18 +2511,6 @@ static int check_valid_flow(struct mlx5_ib_dev *dev, struct ib_pd *pd,
return -EINVAL;
}
- switch (attr->qp_type) {
- case IB_QPT_SMI:
- case MLX5_IB_QPT_HW_GSI:
- case MLX5_IB_QPT_REG_UMR:
- case IB_QPT_GSI:
- mlx5_ib_dbg(dev, "Kernel doesn't support QP type %d\n",
- attr->qp_type);
- return -EINVAL;
- default:
- break;
- }
-
/*
* We don't need to see this warning, it means that kernel code
* missing ib_pd. Placed here to catch developer's mistakes.
@@ -2780,21 +2773,23 @@ static int create_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
goto out;
}
- if (qp->type == MLX5_IB_QPT_DCT) {
+ switch (qp->type) {
+ case MLX5_IB_QPT_DCT:
err = create_dct(dev, pd, qp, params);
- goto out;
- }
-
- if (qp->type == IB_QPT_XRC_TGT) {
+ break;
+ case IB_QPT_XRC_TGT:
err = create_xrc_tgt_qp(dev, qp, params);
- goto out;
+ break;
+ case IB_QPT_GSI:
+ err = mlx5_ib_create_gsi(pd, qp, params->attr);
+ break;
+ default:
+ if (params->udata)
+ err = create_user_qp(dev, pd, qp, params);
+ else
+ err = create_kernel_qp(dev, pd, qp, params);
}
- if (params->udata)
- err = create_user_qp(dev, pd, qp, params);
- else
- err = create_kernel_qp(dev, pd, qp, params);
-
out:
if (err) {
mlx5_ib_err(dev, "Create QP type %d failed\n", qp->type);
@@ -2934,9 +2929,6 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attr,
if (err)
return ERR_PTR(err);
- if (attr->qp_type == IB_QPT_GSI)
- return mlx5_ib_gsi_create_qp(pd, attr);
-
params.udata = udata;
params.uidx = MLX5_IB_DEFAULT_UIDX;
params.attr = attr;
@@ -3005,9 +2997,14 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attr,
return &qp->ibqp;
destroy_qp:
- if (qp->type == MLX5_IB_QPT_DCT) {
+ switch (qp->type) {
+ case MLX5_IB_QPT_DCT:
mlx5_ib_destroy_dct(qp);
- } else {
+ break;
+ case IB_QPT_GSI:
+ mlx5_ib_destroy_gsi(qp);
+ break;
+ default:
/*
* These lines below are temp solution till QP allocation
* will be moved to be under IB/core responsiblity.
@@ -3032,7 +3029,7 @@ int mlx5_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
struct mlx5_ib_qp *mqp = to_mqp(qp);
if (unlikely(qp->qp_type == IB_QPT_GSI))
- return mlx5_ib_gsi_destroy_qp(qp);
+ return mlx5_ib_destroy_gsi(mqp);
if (mqp->type == MLX5_IB_QPT_DCT)
return mlx5_ib_destroy_dct(mqp);
@@ -3088,20 +3085,44 @@ enum {
MLX5_PATH_FLAG_COUNTER = 1 << 2,
};
+static int ib_to_mlx5_rate_map(u8 rate)
+{
+ switch (rate) {
+ case IB_RATE_PORT_CURRENT:
+ return 0;
+ case IB_RATE_56_GBPS:
+ return 1;
+ case IB_RATE_25_GBPS:
+ return 2;
+ case IB_RATE_100_GBPS:
+ return 3;
+ case IB_RATE_200_GBPS:
+ return 4;
+ case IB_RATE_50_GBPS:
+ return 5;
+ default:
+ return rate + MLX5_STAT_RATE_OFFSET;
+ };
+
+ return 0;
+}
+
static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
{
+ u32 stat_rate_support;
+
if (rate == IB_RATE_PORT_CURRENT)
return 0;
if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_600_GBPS)
return -EINVAL;
+ stat_rate_support = MLX5_CAP_GEN(dev->mdev, stat_rate_support);
while (rate != IB_RATE_PORT_CURRENT &&
- !(1 << (rate + MLX5_STAT_RATE_OFFSET) &
- MLX5_CAP_GEN(dev->mdev, stat_rate_support)))
+ !(1 << ib_to_mlx5_rate_map(rate) & stat_rate_support))
--rate;
- return rate ? rate + MLX5_STAT_RATE_OFFSET : rate;
+ return ib_to_mlx5_rate_map(rate);
}
static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
@@ -3643,14 +3664,12 @@ static unsigned int get_tx_affinity_rr(struct mlx5_ib_dev *dev,
MLX5_MAX_PORTS + 1;
}
-static bool qp_supports_affinity(struct ib_qp *qp)
+static bool qp_supports_affinity(struct mlx5_ib_qp *qp)
{
- if ((qp->qp_type == IB_QPT_RC) ||
- (qp->qp_type == IB_QPT_UD) ||
- (qp->qp_type == IB_QPT_UC) ||
- (qp->qp_type == IB_QPT_RAW_PACKET) ||
- (qp->qp_type == IB_QPT_XRC_INI) ||
- (qp->qp_type == IB_QPT_XRC_TGT))
+ if ((qp->type == IB_QPT_RC) || (qp->type == IB_QPT_UD) ||
+ (qp->type == IB_QPT_UC) || (qp->type == IB_QPT_RAW_PACKET) ||
+ (qp->type == IB_QPT_XRC_INI) || (qp->type == IB_QPT_XRC_TGT) ||
+ (qp->type == MLX5_IB_QPT_DCI))
return true;
return false;
}
@@ -3668,7 +3687,7 @@ static unsigned int get_tx_affinity(struct ib_qp *qp,
unsigned int tx_affinity;
if (!(mlx5_ib_lag_should_assign_affinity(dev) &&
- qp_supports_affinity(qp)))
+ qp_supports_affinity(mqp)))
return 0;
if (mqp->flags & MLX5_IB_QP_CREATE_SQPN_QP1)
@@ -4161,7 +4180,11 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
MLX5_SET(dctc, dctc, rae, 1);
}
MLX5_SET(dctc, dctc, pkey_index, attr->pkey_index);
- MLX5_SET(dctc, dctc, port, attr->port_num);
+ if (mlx5_lag_is_active(dev->mdev))
+ MLX5_SET(dctc, dctc, port,
+ get_tx_affinity_rr(dev, udata));
+ else
+ MLX5_SET(dctc, dctc, port, attr->port_num);
set_id = mlx5_ib_get_counters_id(dev, attr->port_num - 1);
MLX5_SET(dctc, dctc, counter_set_id, set_id);
@@ -4716,12 +4739,12 @@ int mlx5_ib_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata)
return mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, 0);
}
-void mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
+int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(xrcd->device);
u32 xrcdn = to_mxrcd(xrcd)->xrcdn;
- mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0);
+ return mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0);
}
static void mlx5_ib_wq_event(struct mlx5_core_qp *core_qp, int type)
@@ -4921,8 +4944,8 @@ static int prepare_user_rq(struct ib_pd *pd,
int err;
size_t required_cmd_sz;
- required_cmd_sz = offsetof(typeof(ucmd), single_stride_log_num_of_bytes)
- + sizeof(ucmd.single_stride_log_num_of_bytes);
+ required_cmd_sz = offsetofend(struct mlx5_ib_create_wq,
+ single_stride_log_num_of_bytes);
if (udata->inlen < required_cmd_sz) {
mlx5_ib_dbg(dev, "invalid inlen\n");
return -EINVAL;
@@ -5006,7 +5029,7 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
if (!udata)
return ERR_PTR(-ENOSYS);
- min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
+ min_resp_len = offsetofend(struct mlx5_ib_create_wq_resp, reserved);
if (udata->outlen && udata->outlen < min_resp_len)
return ERR_PTR(-EINVAL);
@@ -5036,8 +5059,8 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
rwq->ibwq.wq_num = rwq->core_qp.qpn;
rwq->ibwq.state = IB_WQS_RESET;
if (udata->outlen) {
- resp.response_length = offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length);
+ resp.response_length = offsetofend(
+ struct mlx5_ib_create_wq_resp, response_length);
err = ib_copy_to_udata(udata, &resp, resp.response_length);
if (err)
goto err_copy;
@@ -5056,22 +5079,27 @@ err:
return ERR_PTR(err);
}
-void mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata)
+int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(wq->device);
struct mlx5_ib_rwq *rwq = to_mrwq(wq);
+ int ret;
- mlx5_core_destroy_rq_tracked(dev, &rwq->core_qp);
+ ret = mlx5_core_destroy_rq_tracked(dev, &rwq->core_qp);
+ if (ret)
+ return ret;
destroy_user_rq(dev, wq->pd, rwq, udata);
kfree(rwq);
+ return 0;
}
-struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
- struct ib_rwq_ind_table_init_attr *init_attr,
- struct ib_udata *udata)
+int mlx5_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata)
{
- struct mlx5_ib_dev *dev = to_mdev(device);
- struct mlx5_ib_rwq_ind_table *rwq_ind_tbl;
+ struct mlx5_ib_rwq_ind_table *rwq_ind_tbl =
+ to_mrwq_ind_table(ib_rwq_ind_table);
+ struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_table->device);
int sz = 1 << init_attr->log_ind_tbl_size;
struct mlx5_ib_create_rwq_ind_tbl_resp resp = {};
size_t min_resp_len;
@@ -5084,30 +5112,25 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
if (udata->inlen > 0 &&
!ib_is_udata_cleared(udata, 0,
udata->inlen))
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
if (init_attr->log_ind_tbl_size >
MLX5_CAP_GEN(dev->mdev, log_max_rqt_size)) {
mlx5_ib_dbg(dev, "log_ind_tbl_size = %d is bigger than supported = %d\n",
init_attr->log_ind_tbl_size,
MLX5_CAP_GEN(dev->mdev, log_max_rqt_size));
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
- min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
+ min_resp_len =
+ offsetofend(struct mlx5_ib_create_rwq_ind_tbl_resp, reserved);
if (udata->outlen && udata->outlen < min_resp_len)
- return ERR_PTR(-EINVAL);
-
- rwq_ind_tbl = kzalloc(sizeof(*rwq_ind_tbl), GFP_KERNEL);
- if (!rwq_ind_tbl)
- return ERR_PTR(-ENOMEM);
+ return -EINVAL;
inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
in = kvzalloc(inlen, GFP_KERNEL);
- if (!in) {
- err = -ENOMEM;
- goto err;
- }
+ if (!in)
+ return -ENOMEM;
rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
@@ -5122,26 +5145,24 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
err = mlx5_core_create_rqt(dev->mdev, in, inlen, &rwq_ind_tbl->rqtn);
kvfree(in);
-
if (err)
- goto err;
+ return err;
rwq_ind_tbl->ib_rwq_ind_tbl.ind_tbl_num = rwq_ind_tbl->rqtn;
if (udata->outlen) {
- resp.response_length = offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length);
+ resp.response_length =
+ offsetofend(struct mlx5_ib_create_rwq_ind_tbl_resp,
+ response_length);
err = ib_copy_to_udata(udata, &resp, resp.response_length);
if (err)
goto err_copy;
}
- return &rwq_ind_tbl->ib_rwq_ind_tbl;
+ return 0;
err_copy:
mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid);
-err:
- kfree(rwq_ind_tbl);
- return ERR_PTR(err);
+ return err;
}
int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
@@ -5149,10 +5170,7 @@ int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
struct mlx5_ib_rwq_ind_table *rwq_ind_tbl = to_mrwq_ind_table(ib_rwq_ind_tbl);
struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_tbl->device);
- mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid);
-
- kfree(rwq_ind_tbl);
- return 0;
+ return mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid);
}
int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
@@ -5169,7 +5187,7 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
void *rqc;
void *in;
- required_cmd_sz = offsetof(typeof(ucmd), reserved) + sizeof(ucmd.reserved);
+ required_cmd_sz = offsetofend(struct mlx5_ib_modify_wq, reserved);
if (udata->inlen < required_cmd_sz)
return -EINVAL;
diff --git a/drivers/infiniband/hw/mlx5/qp.h b/drivers/infiniband/hw/mlx5/qp.h
index ba899df44c5b..5d4e140db99c 100644
--- a/drivers/infiniband/hw/mlx5/qp.h
+++ b/drivers/infiniband/hw/mlx5/qp.h
@@ -26,8 +26,8 @@ int mlx5_core_dct_query(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct,
int mlx5_core_set_delay_drop(struct mlx5_ib_dev *dev, u32 timeout_usec);
-void mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev,
- struct mlx5_core_qp *rq);
+int mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev,
+ struct mlx5_core_qp *rq);
int mlx5_core_create_sq_tracked(struct mlx5_ib_dev *dev, u32 *in, int inlen,
struct mlx5_core_qp *sq);
void mlx5_core_destroy_sq_tracked(struct mlx5_ib_dev *dev,
diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c
index 7c3968ef9cd1..c683d7000168 100644
--- a/drivers/infiniband/hw/mlx5/qpc.c
+++ b/drivers/infiniband/hw/mlx5/qpc.c
@@ -576,11 +576,12 @@ err_destroy_rq:
return err;
}
-void mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev,
- struct mlx5_core_qp *rq)
+int mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev,
+ struct mlx5_core_qp *rq)
{
destroy_resource_common(dev, rq);
destroy_rq_tracked(dev, rq->qpn, rq->uid);
+ return 0;
}
static void destroy_sq_tracked(struct mlx5_ib_dev *dev, u32 sqn, u16 uid)
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 7e10cbcb6d5c..e2f720eec1e1 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -389,24 +389,21 @@ out_box:
return ret;
}
-void mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
+int mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(srq->device);
struct mlx5_ib_srq *msrq = to_msrq(srq);
+ int ret;
+
+ ret = mlx5_cmd_destroy_srq(dev, &msrq->msrq);
+ if (ret)
+ return ret;
- mlx5_cmd_destroy_srq(dev, &msrq->msrq);
-
- if (srq->uobject) {
- mlx5_ib_db_unmap_user(
- rdma_udata_to_drv_context(
- udata,
- struct mlx5_ib_ucontext,
- ibucontext),
- &msrq->db);
- ib_umem_release(msrq->umem);
- } else {
+ if (udata)
+ destroy_srq_user(srq->pd, msrq, udata);
+ else
destroy_srq_kernel(dev, msrq);
- }
+ return 0;
}
void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index)
diff --git a/drivers/infiniband/hw/mlx5/srq.h b/drivers/infiniband/hw/mlx5/srq.h
index af197c36d757..2c3627b2509d 100644
--- a/drivers/infiniband/hw/mlx5/srq.h
+++ b/drivers/infiniband/hw/mlx5/srq.h
@@ -56,7 +56,7 @@ struct mlx5_srq_table {
int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_srq_attr *in);
-void mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq);
+int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq);
int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_srq_attr *out);
int mlx5_cmd_arm_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c
index 37aaacebd3f2..db889ec3fd48 100644
--- a/drivers/infiniband/hw/mlx5/srq_cmd.c
+++ b/drivers/infiniband/hw/mlx5/srq_cmd.c
@@ -590,22 +590,32 @@ err_destroy_srq_split:
return err;
}
-void mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
+int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
{
struct mlx5_srq_table *table = &dev->srq_table;
struct mlx5_core_srq *tmp;
int err;
- tmp = xa_erase_irq(&table->array, srq->srqn);
- if (!tmp || tmp != srq)
- return;
+ /* Delete entry, but leave index occupied */
+ tmp = xa_cmpxchg_irq(&table->array, srq->srqn, srq, XA_ZERO_ENTRY, 0);
+ if (WARN_ON(tmp != srq))
+ return xa_err(tmp) ?: -EINVAL;
err = destroy_srq_split(dev, srq);
- if (err)
- return;
+ if (err) {
+ /*
+ * We don't need to check returned result for an error,
+ * because we are storing in pre-allocated space xarray
+ * entry and it can't fail at this stage.
+ */
+ xa_cmpxchg_irq(&table->array, srq->srqn, XA_ZERO_ENTRY, srq, 0);
+ return err;
+ }
+ xa_erase_irq(&table->array, srq->srqn);
mlx5_core_res_put(&srq->common);
wait_for_completion(&srq->common.free);
+ return 0;
}
int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c
index 43880973a512..d6038fb6c50c 100644
--- a/drivers/infiniband/hw/mlx5/wr.c
+++ b/drivers/infiniband/hw/mlx5/wr.c
@@ -398,7 +398,8 @@ static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg)
seg->status = MLX5_MKEY_STATUS_FREE;
}
-static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg,
+static void set_reg_mkey_segment(struct mlx5_ib_dev *dev,
+ struct mlx5_mkey_seg *seg,
const struct ib_send_wr *wr)
{
const struct mlx5_umr_wr *umrwr = umr_wr(wr);
@@ -414,10 +415,12 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg,
MLX5_SET(mkc, seg, rr, !!(umrwr->access_flags & IB_ACCESS_REMOTE_READ));
MLX5_SET(mkc, seg, lw, !!(umrwr->access_flags & IB_ACCESS_LOCAL_WRITE));
MLX5_SET(mkc, seg, lr, 1);
- MLX5_SET(mkc, seg, relaxed_ordering_write,
- !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING));
- MLX5_SET(mkc, seg, relaxed_ordering_read,
- !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING));
+ if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
+ MLX5_SET(mkc, seg, relaxed_ordering_write,
+ !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING));
+ if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
+ MLX5_SET(mkc, seg, relaxed_ordering_read,
+ !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING));
if (umrwr->pd)
MLX5_SET(mkc, seg, pd, to_mpd(umrwr->pd)->pdn);
@@ -863,13 +866,11 @@ static int set_reg_wr(struct mlx5_ib_qp *qp,
bool atomic = wr->access & IB_ACCESS_REMOTE_ATOMIC;
u8 flags = 0;
- if (!mlx5_ib_can_use_umr(dev, atomic, wr->access)) {
- mlx5_ib_warn(to_mdev(qp->ibqp.device),
- "Fast update of %s for MR is disabled\n",
- (MLX5_CAP_GEN(dev->mdev,
- umr_modify_entity_size_disabled)) ?
- "entity size" :
- "atomic access");
+ /* Matches access in mlx5_set_umr_free_mkey() */
+ if (!mlx5_ib_can_reconfig_with_umr(dev, 0, wr->access)) {
+ mlx5_ib_warn(
+ to_mdev(qp->ibqp.device),
+ "Fast update for MR access flags is not possible\n");
return -EINVAL;
}
@@ -1263,7 +1264,7 @@ static int handle_qpt_reg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
- set_reg_mkey_segment(*seg, wr);
+ set_reg_mkey_segment(dev, *seg, wr);
*seg += sizeof(struct mlx5_mkey_seg);
*size += sizeof(struct mlx5_mkey_seg) / 16;
handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index 7550e9d03dec..9dbbf4d16796 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -548,7 +548,7 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
struct ib_qp_cap *cap,
int qpn,
int port,
- struct mthca_sqp *sqp,
+ struct mthca_qp *qp,
struct ib_udata *udata);
void mthca_free_qp(struct mthca_dev *dev, struct mthca_qp *qp);
int mthca_create_ah(struct mthca_dev *dev,
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 9fa2f9164a47..c4d9cdc4ee97 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -373,9 +373,10 @@ static int mthca_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
return 0;
}
-static void mthca_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
+static int mthca_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
{
mthca_pd_free(to_mdev(pd->device), to_mpd(pd));
+ return 0;
}
static int mthca_ah_create(struct ib_ah *ibah,
@@ -389,9 +390,10 @@ static int mthca_ah_create(struct ib_ah *ibah,
init_attr->ah_attr, ah);
}
-static void mthca_ah_destroy(struct ib_ah *ah, u32 flags)
+static int mthca_ah_destroy(struct ib_ah *ah, u32 flags)
{
mthca_destroy_ah(to_mdev(ah->device), to_mah(ah));
+ return 0;
}
static int mthca_create_srq(struct ib_srq *ibsrq,
@@ -440,7 +442,7 @@ static int mthca_create_srq(struct ib_srq *ibsrq,
return 0;
}
-static void mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
+static int mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
{
if (udata) {
struct mthca_ucontext *context =
@@ -454,6 +456,7 @@ static void mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
}
mthca_free_srq(to_mdev(srq->device), to_msrq(srq));
+ return 0;
}
static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
@@ -532,13 +535,14 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
case IB_QPT_SMI:
case IB_QPT_GSI:
{
- /* Don't allow userspace to create special QPs */
- if (udata)
- return ERR_PTR(-EINVAL);
-
- qp = kzalloc(sizeof(struct mthca_sqp), GFP_KERNEL);
+ qp = kzalloc(sizeof(*qp), GFP_KERNEL);
if (!qp)
return ERR_PTR(-ENOMEM);
+ qp->sqp = kzalloc(sizeof(struct mthca_sqp), GFP_KERNEL);
+ if (!qp->sqp) {
+ kfree(qp);
+ return ERR_PTR(-ENOMEM);
+ }
qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
@@ -547,7 +551,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
to_mcq(init_attr->recv_cq),
init_attr->sq_sig_type, &init_attr->cap,
qp->ibqp.qp_num, init_attr->port_num,
- to_msqp(qp), udata);
+ qp, udata);
break;
}
default:
@@ -556,6 +560,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
}
if (err) {
+ kfree(qp->sqp);
kfree(qp);
return ERR_PTR(err);
}
@@ -588,7 +593,8 @@ static int mthca_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
to_mqp(qp)->rq.db_index);
}
mthca_free_qp(to_mdev(qp->device), to_mqp(qp));
- kfree(qp);
+ kfree(to_mqp(qp)->sqp);
+ kfree(to_mqp(qp));
return 0;
}
@@ -789,7 +795,7 @@ out:
return ret;
}
-static void mthca_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
+static int mthca_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
{
if (udata) {
struct mthca_ucontext *context =
@@ -808,6 +814,7 @@ static void mthca_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
to_mcq(cq)->set_ci_db_index);
}
mthca_free_cq(to_mdev(cq->device), to_mcq(cq));
+ return 0;
}
static inline u32 convert_access(int acc)
@@ -846,7 +853,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt, int acc, struct ib_udata *udata)
{
struct mthca_dev *dev = to_mdev(pd->device);
- struct sg_dma_page_iter sg_iter;
+ struct ib_block_iter biter;
struct mthca_ucontext *context = rdma_udata_to_drv_context(
udata, struct mthca_ucontext, ibucontext);
struct mthca_mr *mr;
@@ -877,7 +884,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
goto err;
}
- n = ib_umem_num_pages(mr->umem);
+ n = ib_umem_num_dma_blocks(mr->umem, PAGE_SIZE);
mr->mtt = mthca_alloc_mtt(dev, n);
if (IS_ERR(mr->mtt)) {
@@ -895,8 +902,8 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages));
- for_each_sg_dma_page(mr->umem->sg_head.sgl, &sg_iter, mr->umem->nmap, 0) {
- pages[i++] = sg_page_iter_dma_address(&sg_iter);
+ rdma_umem_for_each_dma_block(mr->umem, &biter, PAGE_SIZE) {
+ pages[i++] = rdma_block_iter_dma_address(&biter);
/*
* Be friendly to write_mtt and pass it chunks
@@ -1199,7 +1206,7 @@ int mthca_register_device(struct mthca_dev *dev)
mutex_init(&dev->cap_mask_mutex);
rdma_set_device_sysfs_group(&dev->ib_dev, &mthca_attr_group);
- ret = ib_register_device(&dev->ib_dev, "mthca%d");
+ ret = ib_register_device(&dev->ib_dev, "mthca%d", &dev->pdev->dev);
if (ret)
return ret;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h
index 84c64bff0d92..8a77483bb33c 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.h
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -240,6 +240,16 @@ struct mthca_wq {
__be32 *db;
};
+struct mthca_sqp {
+ int pkey_index;
+ u32 qkey;
+ u32 send_psn;
+ struct ib_ud_header ud_header;
+ int header_buf_size;
+ void *header_buf;
+ dma_addr_t header_dma;
+};
+
struct mthca_qp {
struct ib_qp ibqp;
int refcount;
@@ -265,17 +275,7 @@ struct mthca_qp {
wait_queue_head_t wait;
struct mutex mutex;
-};
-
-struct mthca_sqp {
- struct mthca_qp qp;
- int pkey_index;
- u32 qkey;
- u32 send_psn;
- struct ib_ud_header ud_header;
- int header_buf_size;
- void *header_buf;
- dma_addr_t header_dma;
+ struct mthca_sqp *sqp;
};
static inline struct mthca_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
@@ -313,9 +313,4 @@ static inline struct mthca_qp *to_mqp(struct ib_qp *ibqp)
return container_of(ibqp, struct mthca_qp, ibqp);
}
-static inline struct mthca_sqp *to_msqp(struct mthca_qp *qp)
-{
- return container_of(qp, struct mthca_sqp, qp);
-}
-
#endif /* MTHCA_PROVIDER_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index c6e95d0d760a..08a2a7afafd3 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -809,7 +809,7 @@ static int __mthca_modify_qp(struct ib_qp *ibqp,
qp->alt_port = attr->alt_port_num;
if (is_sqp(dev, qp))
- store_attrs(to_msqp(qp), attr, attr_mask);
+ store_attrs(qp->sqp, attr, attr_mask);
/*
* If we moved QP0 to RTR, bring the IB link up; if we moved
@@ -1368,39 +1368,40 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
struct ib_qp_cap *cap,
int qpn,
int port,
- struct mthca_sqp *sqp,
+ struct mthca_qp *qp,
struct ib_udata *udata)
{
u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1;
int err;
- sqp->qp.transport = MLX;
- err = mthca_set_qp_size(dev, cap, pd, &sqp->qp);
+ qp->transport = MLX;
+ err = mthca_set_qp_size(dev, cap, pd, qp);
if (err)
return err;
- sqp->header_buf_size = sqp->qp.sq.max * MTHCA_UD_HEADER_SIZE;
- sqp->header_buf = dma_alloc_coherent(&dev->pdev->dev, sqp->header_buf_size,
- &sqp->header_dma, GFP_KERNEL);
- if (!sqp->header_buf)
+ qp->sqp->header_buf_size = qp->sq.max * MTHCA_UD_HEADER_SIZE;
+ qp->sqp->header_buf =
+ dma_alloc_coherent(&dev->pdev->dev, qp->sqp->header_buf_size,
+ &qp->sqp->header_dma, GFP_KERNEL);
+ if (!qp->sqp->header_buf)
return -ENOMEM;
spin_lock_irq(&dev->qp_table.lock);
if (mthca_array_get(&dev->qp_table.qp, mqpn))
err = -EBUSY;
else
- mthca_array_set(&dev->qp_table.qp, mqpn, sqp);
+ mthca_array_set(&dev->qp_table.qp, mqpn, qp->sqp);
spin_unlock_irq(&dev->qp_table.lock);
if (err)
goto err_out;
- sqp->qp.port = port;
- sqp->qp.qpn = mqpn;
- sqp->qp.transport = MLX;
+ qp->port = port;
+ qp->qpn = mqpn;
+ qp->transport = MLX;
err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq,
- send_policy, &sqp->qp, udata);
+ send_policy, qp, udata);
if (err)
goto err_out_free;
@@ -1421,10 +1422,9 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
mthca_unlock_cqs(send_cq, recv_cq);
- err_out:
- dma_free_coherent(&dev->pdev->dev, sqp->header_buf_size,
- sqp->header_buf, sqp->header_dma);
-
+err_out:
+ dma_free_coherent(&dev->pdev->dev, qp->sqp->header_buf_size,
+ qp->sqp->header_buf, qp->sqp->header_dma);
return err;
}
@@ -1487,20 +1487,19 @@ void mthca_free_qp(struct mthca_dev *dev,
if (is_sqp(dev, qp)) {
atomic_dec(&(to_mpd(qp->ibqp.pd)->sqp_count));
- dma_free_coherent(&dev->pdev->dev,
- to_msqp(qp)->header_buf_size,
- to_msqp(qp)->header_buf,
- to_msqp(qp)->header_dma);
+ dma_free_coherent(&dev->pdev->dev, qp->sqp->header_buf_size,
+ qp->sqp->header_buf, qp->sqp->header_dma);
} else
mthca_free(&dev->qp_table.alloc, qp->qpn);
}
/* Create UD header for an MLX send and build a data segment for it */
-static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
- int ind, const struct ib_ud_wr *wr,
+static int build_mlx_header(struct mthca_dev *dev, struct mthca_qp *qp, int ind,
+ const struct ib_ud_wr *wr,
struct mthca_mlx_seg *mlx,
struct mthca_data_seg *data)
{
+ struct mthca_sqp *sqp = qp->sqp;
int header_size;
int err;
u16 pkey;
@@ -1513,7 +1512,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
if (err)
return err;
mlx->flags &= ~cpu_to_be32(MTHCA_NEXT_SOLICIT | 1);
- mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MTHCA_MLX_VL15 : 0) |
+ mlx->flags |= cpu_to_be32((!qp->ibqp.qp_num ? MTHCA_MLX_VL15 : 0) |
(sqp->ud_header.lrh.destination_lid ==
IB_LID_PERMISSIVE ? MTHCA_MLX_SLR : 0) |
(sqp->ud_header.lrh.service_level << 8));
@@ -1534,29 +1533,29 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
return -EINVAL;
}
- sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0;
+ sqp->ud_header.lrh.virtual_lane = !qp->ibqp.qp_num ? 15 : 0;
if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
- if (!sqp->qp.ibqp.qp_num)
- ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port,
- sqp->pkey_index, &pkey);
+ if (!qp->ibqp.qp_num)
+ ib_get_cached_pkey(&dev->ib_dev, qp->port, sqp->pkey_index,
+ &pkey);
else
- ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port,
- wr->pkey_index, &pkey);
+ ib_get_cached_pkey(&dev->ib_dev, qp->port, wr->pkey_index,
+ &pkey);
sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn);
sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
sqp->ud_header.deth.qkey = cpu_to_be32(wr->remote_qkey & 0x80000000 ?
sqp->qkey : wr->remote_qkey);
- sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
+ sqp->ud_header.deth.source_qpn = cpu_to_be32(qp->ibqp.qp_num);
header_size = ib_ud_header_pack(&sqp->ud_header,
sqp->header_buf +
ind * MTHCA_UD_HEADER_SIZE);
data->byte_count = cpu_to_be32(header_size);
- data->lkey = cpu_to_be32(to_mpd(sqp->qp.ibqp.pd)->ntmr.ibmr.lkey);
+ data->lkey = cpu_to_be32(to_mpd(qp->ibqp.pd)->ntmr.ibmr.lkey);
data->addr = cpu_to_be64(sqp->header_dma +
ind * MTHCA_UD_HEADER_SIZE);
@@ -1735,9 +1734,9 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
break;
case MLX:
- err = build_mlx_header(dev, to_msqp(qp), ind, ud_wr(wr),
- wqe - sizeof (struct mthca_next_seg),
- wqe);
+ err = build_mlx_header(
+ dev, qp, ind, ud_wr(wr),
+ wqe - sizeof(struct mthca_next_seg), wqe);
if (err) {
*bad_wr = wr;
goto out;
@@ -2065,9 +2064,9 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
break;
case MLX:
- err = build_mlx_header(dev, to_msqp(qp), ind, ud_wr(wr),
- wqe - sizeof (struct mthca_next_seg),
- wqe);
+ err = build_mlx_header(
+ dev, qp, ind, ud_wr(wr),
+ wqe - sizeof(struct mthca_next_seg), wqe);
if (err) {
*bad_wr = wr;
goto out;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h
index fcfe0e82197a..5eb61c110090 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma.h
@@ -185,7 +185,6 @@ struct ocrdma_hw_mr {
u32 num_pbes;
u32 pbl_size;
u32 pbe_size;
- u64 fbo;
u64 va;
};
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index 6eea02b18968..699a8b719ed6 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -215,12 +215,13 @@ av_err:
return status;
}
-void ocrdma_destroy_ah(struct ib_ah *ibah, u32 flags)
+int ocrdma_destroy_ah(struct ib_ah *ibah, u32 flags)
{
struct ocrdma_ah *ah = get_ocrdma_ah(ibah);
struct ocrdma_dev *dev = get_ocrdma_dev(ibah->device);
ocrdma_free_av(dev, ah);
+ return 0;
}
int ocrdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
index 8b73b3489f3a..35cf2e2ff391 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
@@ -53,7 +53,7 @@ enum {
int ocrdma_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
struct ib_udata *udata);
-void ocrdma_destroy_ah(struct ib_ah *ah, u32 flags);
+int ocrdma_destroy_ah(struct ib_ah *ah, u32 flags);
int ocrdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
int ocrdma_process_mad(struct ib_device *dev, int process_mad_flags,
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index e07bf0b2209a..c51c3f40700e 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -1962,6 +1962,7 @@ static int ocrdma_mbx_reg_mr(struct ocrdma_dev *dev, struct ocrdma_hw_mr *hwmr,
int i;
struct ocrdma_reg_nsmr *cmd;
struct ocrdma_reg_nsmr_rsp *rsp;
+ u64 fbo = hwmr->va & (hwmr->pbe_size - 1);
cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_REGISTER_NSMR, sizeof(*cmd));
if (!cmd)
@@ -1987,8 +1988,8 @@ static int ocrdma_mbx_reg_mr(struct ocrdma_dev *dev, struct ocrdma_hw_mr *hwmr,
OCRDMA_REG_NSMR_HPAGE_SIZE_SHIFT;
cmd->totlen_low = hwmr->len;
cmd->totlen_high = upper_32_bits(hwmr->len);
- cmd->fbo_low = (u32) (hwmr->fbo & 0xffffffff);
- cmd->fbo_high = (u32) upper_32_bits(hwmr->fbo);
+ cmd->fbo_low = lower_32_bits(fbo);
+ cmd->fbo_high = upper_32_bits(fbo);
cmd->va_loaddr = (u32) hwmr->va;
cmd->va_hiaddr = (u32) upper_32_bits(hwmr->va);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index d8c47d24d6d6..9b96661a7143 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -255,7 +255,9 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
if (ret)
return ret;
- return ib_register_device(&dev->ibdev, "ocrdma%d");
+ dma_set_max_seg_size(&dev->nic_info.pdev->dev, UINT_MAX);
+ return ib_register_device(&dev->ibdev, "ocrdma%d",
+ &dev->nic_info.pdev->dev);
}
static int ocrdma_alloc_resources(struct ocrdma_dev *dev)
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index c1751c9a0f62..7350fe16f164 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -112,7 +112,7 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
}
static inline void get_link_speed_and_width(struct ocrdma_dev *dev,
- u8 *ib_speed, u8 *ib_width)
+ u16 *ib_speed, u8 *ib_width)
{
int status;
u8 speed;
@@ -664,7 +664,7 @@ exit:
return status;
}
-void ocrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+int ocrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
@@ -682,10 +682,11 @@ void ocrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
if (is_ucontext_pd(uctx, pd)) {
ocrdma_release_ucontext_pd(uctx);
- return;
+ return 0;
}
}
_ocrdma_dealloc_pd(dev, pd);
+ return 0;
}
static int ocrdma_alloc_lkey(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
@@ -810,14 +811,12 @@ static int ocrdma_build_pbl_tbl(struct ocrdma_dev *dev, struct ocrdma_hw_mr *mr)
return status;
}
-static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
- u32 num_pbes)
+static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr)
{
struct ocrdma_pbe *pbe;
- struct sg_dma_page_iter sg_iter;
+ struct ib_block_iter biter;
struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table;
- struct ib_umem *umem = mr->umem;
- int pbe_cnt, total_num_pbes = 0;
+ int pbe_cnt;
u64 pg_addr;
if (!mr->hwmr.num_pbes)
@@ -826,19 +825,14 @@ static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
pbe = (struct ocrdma_pbe *)pbl_tbl->va;
pbe_cnt = 0;
- for_each_sg_dma_page (umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
+ rdma_umem_for_each_dma_block (mr->umem, &biter, PAGE_SIZE) {
/* store the page address in pbe */
- pg_addr = sg_page_iter_dma_address(&sg_iter);
+ pg_addr = rdma_block_iter_dma_address(&biter);
pbe->pa_lo = cpu_to_le32(pg_addr);
pbe->pa_hi = cpu_to_le32(upper_32_bits(pg_addr));
pbe_cnt += 1;
- total_num_pbes += 1;
pbe++;
- /* if done building pbes, issue the mbx cmd. */
- if (total_num_pbes == num_pbes)
- return;
-
/* if the given pbl is full storing the pbes,
* move to next pbl.
*/
@@ -857,7 +851,6 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
struct ocrdma_mr *mr;
struct ocrdma_pd *pd;
- u32 num_pbes;
pd = get_ocrdma_pd(ibpd);
@@ -872,13 +865,12 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
status = -EFAULT;
goto umem_err;
}
- num_pbes = ib_umem_page_count(mr->umem);
- status = ocrdma_get_pbl_info(dev, mr, num_pbes);
+ status = ocrdma_get_pbl_info(
+ dev, mr, ib_umem_num_dma_blocks(mr->umem, PAGE_SIZE));
if (status)
goto umem_err;
mr->hwmr.pbe_size = PAGE_SIZE;
- mr->hwmr.fbo = ib_umem_offset(mr->umem);
mr->hwmr.va = usr_addr;
mr->hwmr.len = len;
mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
@@ -889,7 +881,7 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
status = ocrdma_build_pbl_tbl(dev, &mr->hwmr);
if (status)
goto umem_err;
- build_user_pbes(dev, mr, num_pbes);
+ build_user_pbes(dev, mr);
status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc);
if (status)
goto mbx_err;
@@ -1056,7 +1048,7 @@ static void ocrdma_flush_cq(struct ocrdma_cq *cq)
spin_unlock_irqrestore(&cq->cq_lock, flags);
}
-void ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+int ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
{
struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
struct ocrdma_eq *eq = NULL;
@@ -1081,6 +1073,7 @@ void ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
ocrdma_get_db_addr(dev, pdid),
dev->nic_info.db_page_size);
}
+ return 0;
}
static int ocrdma_add_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp)
@@ -1857,7 +1850,7 @@ int ocrdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
return status;
}
-void ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
+int ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
{
struct ocrdma_srq *srq;
struct ocrdma_dev *dev = get_ocrdma_dev(ibsrq->device);
@@ -1872,6 +1865,7 @@ void ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
kfree(srq->idx_bit_fields);
kfree(srq->rqe_wr_id_tbl);
+ return 0;
}
/* unprivileged verbs and their support functions. */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
index df8e3b923a44..425d554e7f3f 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
@@ -67,12 +67,12 @@ void ocrdma_dealloc_ucontext(struct ib_ucontext *uctx);
int ocrdma_mmap(struct ib_ucontext *, struct vm_area_struct *vma);
int ocrdma_alloc_pd(struct ib_pd *pd, struct ib_udata *udata);
-void ocrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
+int ocrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
int ocrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct ib_udata *udata);
int ocrdma_resize_cq(struct ib_cq *, int cqe, struct ib_udata *);
-void ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
+int ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
struct ib_qp *ocrdma_create_qp(struct ib_pd *,
struct ib_qp_init_attr *attrs,
@@ -92,7 +92,7 @@ int ocrdma_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *attr,
int ocrdma_modify_srq(struct ib_srq *, struct ib_srq_attr *,
enum ib_srq_attr_mask, struct ib_udata *);
int ocrdma_query_srq(struct ib_srq *, struct ib_srq_attr *);
-void ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata);
+int ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata);
int ocrdma_post_srq_recv(struct ib_srq *, const struct ib_recv_wr *,
const struct ib_recv_wr **bad_recv_wr);
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index d85f992bac29..967641662b24 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -177,6 +177,8 @@ static int qedr_iw_register_device(struct qedr_dev *dev)
}
static const struct ib_device_ops qedr_roce_dev_ops = {
+ .alloc_xrcd = qedr_alloc_xrcd,
+ .dealloc_xrcd = qedr_dealloc_xrcd,
.get_port_immutable = qedr_roce_port_immutable,
.query_pkey = qedr_query_pkey,
};
@@ -186,6 +188,10 @@ static void qedr_roce_register_device(struct qedr_dev *dev)
dev->ibdev.node_type = RDMA_NODE_IB_CA;
ib_set_device_ops(&dev->ibdev, &qedr_roce_dev_ops);
+
+ dev->ibdev.uverbs_cmd_mask |= QEDR_UVERBS(OPEN_XRCD) |
+ QEDR_UVERBS(CLOSE_XRCD) |
+ QEDR_UVERBS(CREATE_XSRQ);
}
static const struct ib_device_ops qedr_dev_ops = {
@@ -232,6 +238,7 @@ static const struct ib_device_ops qedr_dev_ops = {
INIT_RDMA_OBJ_SIZE(ib_cq, qedr_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_pd, qedr_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_srq, qedr_srq, ibsrq),
+ INIT_RDMA_OBJ_SIZE(ib_xrcd, qedr_xrcd, ibxrcd),
INIT_RDMA_OBJ_SIZE(ib_ucontext, qedr_ucontext, ibucontext),
};
@@ -286,7 +293,8 @@ static int qedr_register_device(struct qedr_dev *dev)
if (rc)
return rc;
- return ib_register_device(&dev->ibdev, "qedr%d");
+ dma_set_max_seg_size(&dev->pdev->dev, UINT_MAX);
+ return ib_register_device(&dev->ibdev, "qedr%d", &dev->pdev->dev);
}
/* This function allocates fast-path status block memory */
@@ -602,7 +610,7 @@ static int qedr_set_device_attr(struct qedr_dev *dev)
qed_attr = dev->ops->rdma_query_device(dev->rdma_ctx);
/* Part 2 - check capabilities */
- page_size = ~dev->attr.page_size_caps + 1;
+ page_size = ~qed_attr->page_size_caps + 1;
if (page_size > PAGE_SIZE) {
DP_ERR(dev,
"Kernel PAGE_SIZE is %ld which is smaller than minimum page size (%d) required by qedr\n",
@@ -705,6 +713,18 @@ static void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle)
event.event = IB_EVENT_SRQ_ERR;
event_type = EVENT_TYPE_SRQ;
break;
+ case ROCE_ASYNC_EVENT_XRC_DOMAIN_ERR:
+ event.event = IB_EVENT_QP_ACCESS_ERR;
+ event_type = EVENT_TYPE_QP;
+ break;
+ case ROCE_ASYNC_EVENT_INVALID_XRCETH_ERR:
+ event.event = IB_EVENT_QP_ACCESS_ERR;
+ event_type = EVENT_TYPE_QP;
+ break;
+ case ROCE_ASYNC_EVENT_XRC_SRQ_CATASTROPHIC_ERR:
+ event.event = IB_EVENT_CQ_ERR;
+ event_type = EVENT_TYPE_CQ;
+ break;
default:
DP_ERR(dev, "unsupported event %d on handle=%llx\n",
e_code, roce_handle64);
@@ -1026,6 +1046,13 @@ static void qedr_notify(struct qedr_dev *dev, enum qede_rdma_event event)
case QEDE_CHANGE_ADDR:
qedr_mac_address_change(dev);
break;
+ case QEDE_CHANGE_MTU:
+ if (rdma_protocol_iwarp(&dev->ibdev, 1))
+ if (dev->ndev->mtu != dev->iwarp_max_mtu)
+ DP_NOTICE(dev,
+ "Mtu was changed from %d to %d. This will not take affect for iWARP until qedr is reloaded\n",
+ dev->iwarp_max_mtu, dev->ndev->mtu);
+ break;
default:
pr_err("Event not supported\n");
}
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
index 460292179b32..9dde70373a55 100644
--- a/drivers/infiniband/hw/qedr/qedr.h
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -310,6 +310,11 @@ struct qedr_pd {
struct qedr_ucontext *uctx;
};
+struct qedr_xrcd {
+ struct ib_xrcd ibxrcd;
+ u16 xrcd_id;
+};
+
struct qedr_qp_hwq_info {
/* WQE Elements */
struct qed_chain pbl;
@@ -361,6 +366,7 @@ struct qedr_srq {
struct ib_umem *prod_umem;
u16 srq_id;
u32 srq_limit;
+ bool is_xrc;
/* lock to protect srq recv post */
spinlock_t lock;
};
@@ -573,6 +579,11 @@ static inline struct qedr_pd *get_qedr_pd(struct ib_pd *ibpd)
return container_of(ibpd, struct qedr_pd, ibpd);
}
+static inline struct qedr_xrcd *get_qedr_xrcd(struct ib_xrcd *ibxrcd)
+{
+ return container_of(ibxrcd, struct qedr_xrcd, ibxrcd);
+}
+
static inline struct qedr_cq *get_qedr_cq(struct ib_cq *ibcq)
{
return container_of(ibcq, struct qedr_cq, ibcq);
@@ -598,6 +609,28 @@ static inline struct qedr_srq *get_qedr_srq(struct ib_srq *ibsrq)
return container_of(ibsrq, struct qedr_srq, ibsrq);
}
+static inline bool qedr_qp_has_srq(struct qedr_qp *qp)
+{
+ return qp->srq;
+}
+
+static inline bool qedr_qp_has_sq(struct qedr_qp *qp)
+{
+ if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_XRC_TGT)
+ return 0;
+
+ return 1;
+}
+
+static inline bool qedr_qp_has_rq(struct qedr_qp *qp)
+{
+ if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_XRC_INI ||
+ qp->qp_type == IB_QPT_XRC_TGT || qedr_qp_has_srq(qp))
+ return 0;
+
+ return 1;
+}
+
static inline struct qedr_user_mmap_entry *
get_qedr_mmap_entry(struct rdma_user_mmap_entry *rdma_entry)
{
diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c
index 97fc7dd353b0..c7169d2c69e5 100644
--- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c
+++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c
@@ -736,7 +736,7 @@ int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
struct qedr_dev *dev = ep->dev;
struct qedr_qp *qp;
struct qed_iwarp_accept_in params;
- int rc = 0;
+ int rc;
DP_DEBUG(dev, QEDR_MSG_IWARP, "Accept on qpid=%d\n", conn_param->qpn);
@@ -759,8 +759,10 @@ int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
params.ord = conn_param->ord;
if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT,
- &qp->iwarp_cm_flags))
+ &qp->iwarp_cm_flags)) {
+ rc = -EINVAL;
goto err; /* QP already destroyed */
+ }
rc = dev->ops->iwarp_accept(dev->rdma_ctx, &params);
if (rc) {
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index b49bef94637e..019642ff24a7 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -136,6 +136,8 @@ int qedr_query_device(struct ib_device *ibdev,
IB_DEVICE_RC_RNR_NAK_GEN |
IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
+ if (!rdma_protocol_iwarp(&dev->ibdev, 1))
+ attr->device_cap_flags |= IB_DEVICE_XRC;
attr->max_send_sge = qattr->max_sge;
attr->max_recv_sge = qattr->max_sge;
attr->max_sge_rd = qattr->max_sge;
@@ -157,13 +159,13 @@ int qedr_query_device(struct ib_device *ibdev,
attr->local_ca_ack_delay = qattr->dev_ack_delay;
attr->max_fast_reg_page_list_len = qattr->max_mr / 8;
- attr->max_pkeys = QEDR_ROCE_PKEY_MAX;
+ attr->max_pkeys = qattr->max_pkey;
attr->max_ah = qattr->max_ah;
return 0;
}
-static inline void get_link_speed_and_width(int speed, u8 *ib_speed,
+static inline void get_link_speed_and_width(int speed, u16 *ib_speed,
u8 *ib_width)
{
switch (speed) {
@@ -231,15 +233,16 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
}
attr->max_mtu = IB_MTU_4096;
- attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
attr->lid = 0;
attr->lmc = 0;
attr->sm_lid = 0;
attr->sm_sl = 0;
attr->ip_gids = true;
if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
+ attr->active_mtu = iboe_get_mtu(dev->iwarp_max_mtu);
attr->gid_tbl_len = 1;
} else {
+ attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
attr->gid_tbl_len = QEDR_MAX_SGID;
attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
}
@@ -471,15 +474,33 @@ int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
return 0;
}
-void qedr_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+int qedr_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
struct qedr_dev *dev = get_qedr_dev(ibpd->device);
struct qedr_pd *pd = get_qedr_pd(ibpd);
DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
+ return 0;
+}
+
+
+int qedr_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata)
+{
+ struct qedr_dev *dev = get_qedr_dev(ibxrcd->device);
+ struct qedr_xrcd *xrcd = get_qedr_xrcd(ibxrcd);
+
+ return dev->ops->rdma_alloc_xrcd(dev->rdma_ctx, &xrcd->xrcd_id);
}
+int qedr_dealloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata)
+{
+ struct qedr_dev *dev = get_qedr_dev(ibxrcd->device);
+ u16 xrcd_id = get_qedr_xrcd(ibxrcd)->xrcd_id;
+
+ dev->ops->rdma_dealloc_xrcd(dev->rdma_ctx, xrcd_id);
+ return 0;
+}
static void qedr_free_pbl(struct qedr_dev *dev,
struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl)
{
@@ -600,11 +621,9 @@ static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
struct qedr_pbl_info *pbl_info, u32 pg_shift)
{
int pbe_cnt, total_num_pbes = 0;
- u32 fw_pg_cnt, fw_pg_per_umem_pg;
struct qedr_pbl *pbl_tbl;
- struct sg_dma_page_iter sg_iter;
+ struct ib_block_iter biter;
struct regpair *pbe;
- u64 pg_addr;
if (!pbl_info->num_pbes)
return;
@@ -625,32 +644,25 @@ static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
pbe_cnt = 0;
- fw_pg_per_umem_pg = BIT(PAGE_SHIFT - pg_shift);
+ rdma_umem_for_each_dma_block (umem, &biter, BIT(pg_shift)) {
+ u64 pg_addr = rdma_block_iter_dma_address(&biter);
- for_each_sg_dma_page (umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
- pg_addr = sg_page_iter_dma_address(&sg_iter);
- for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) {
- pbe->lo = cpu_to_le32(pg_addr);
- pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
+ pbe->lo = cpu_to_le32(pg_addr);
+ pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
- pg_addr += BIT(pg_shift);
- pbe_cnt++;
- total_num_pbes++;
- pbe++;
+ pbe_cnt++;
+ total_num_pbes++;
+ pbe++;
- if (total_num_pbes == pbl_info->num_pbes)
- return;
+ if (total_num_pbes == pbl_info->num_pbes)
+ return;
- /* If the given pbl is full storing the pbes,
- * move to next pbl.
- */
- if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) {
- pbl_tbl++;
- pbe = (struct regpair *)pbl_tbl->va;
- pbe_cnt = 0;
- }
-
- fw_pg_cnt++;
+ /* If the given pbl is full storing the pbes, move to next pbl.
+ */
+ if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) {
+ pbl_tbl++;
+ pbe = (struct regpair *)pbl_tbl->va;
+ pbe_cnt = 0;
}
}
}
@@ -792,9 +804,7 @@ static inline int qedr_init_user_queue(struct ib_udata *udata,
return PTR_ERR(q->umem);
}
- fw_pages = ib_umem_page_count(q->umem) <<
- (PAGE_SHIFT - FW_PAGE_SHIFT);
-
+ fw_pages = ib_umem_num_dma_blocks(q->umem, 1 << FW_PAGE_SHIFT);
rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
if (rc)
goto err0;
@@ -999,7 +1009,7 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
/* Generate doorbell address. */
cq->db.data.icid = cq->icid;
cq->db_addr = dev->db_addr + db_offset;
- cq->db.data.params = DB_AGG_CMD_SET <<
+ cq->db.data.params = DB_AGG_CMD_MAX <<
RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
/* point to the very last element, passing it we will toggle */
@@ -1051,7 +1061,7 @@ int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
#define QEDR_DESTROY_CQ_MAX_ITERATIONS (10)
#define QEDR_DESTROY_CQ_ITER_DURATION (10)
-void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
{
struct qedr_dev *dev = get_qedr_dev(ibcq->device);
struct qed_rdma_destroy_cq_out_params oparams;
@@ -1066,7 +1076,7 @@ void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
/* GSIs CQs are handled by driver, so they don't exist in the FW */
if (cq->cq_type == QEDR_CQ_TYPE_GSI) {
qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data);
- return;
+ return 0;
}
iparams.icid = cq->icid;
@@ -1114,6 +1124,7 @@ void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
* Since the destroy CQ ramrod has also been received on the EQ we can
* be certain that there's no event handler in process.
*/
+ return 0;
}
static inline int get_gid_info_from_table(struct ib_qp *ibqp,
@@ -1146,7 +1157,7 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp,
SET_FIELD(qp_params->modify_flags,
QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
break;
- case RDMA_NETWORK_IB:
+ case RDMA_NETWORK_ROCE_V1:
memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
sizeof(qp_params->sgid));
memcpy(&qp_params->dgid.bytes[0],
@@ -1166,6 +1177,8 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp,
QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
qp_params->roce_mode = ROCE_V2_IPV4;
break;
+ default:
+ return -EINVAL;
}
for (i = 0; i < 4; i++) {
@@ -1186,7 +1199,10 @@ static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
struct qedr_device_attr *qattr = &dev->attr;
/* QP0... attrs->qp_type == IB_QPT_GSI */
- if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) {
+ if (attrs->qp_type != IB_QPT_RC &&
+ attrs->qp_type != IB_QPT_GSI &&
+ attrs->qp_type != IB_QPT_XRC_INI &&
+ attrs->qp_type != IB_QPT_XRC_TGT) {
DP_DEBUG(dev, QEDR_MSG_QP,
"create qp: unsupported qp type=0x%x requested\n",
attrs->qp_type);
@@ -1221,12 +1237,20 @@ static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
return -EINVAL;
}
- /* Unprivileged user space cannot create special QP */
- if (udata && attrs->qp_type == IB_QPT_GSI) {
- DP_ERR(dev,
- "create qp: userspace can't create special QPs of type=0x%x\n",
- attrs->qp_type);
- return -EINVAL;
+ /* verify consumer QPs are not trying to use GSI QP's CQ.
+ * TGT QP isn't associated with RQ/SQ
+ */
+ if ((attrs->qp_type != IB_QPT_GSI) && (dev->gsi_qp_created) &&
+ (attrs->qp_type != IB_QPT_XRC_TGT)) {
+ struct qedr_cq *send_cq = get_qedr_cq(attrs->send_cq);
+ struct qedr_cq *recv_cq = get_qedr_cq(attrs->recv_cq);
+
+ if ((send_cq->cq_type == QEDR_CQ_TYPE_GSI) ||
+ (recv_cq->cq_type == QEDR_CQ_TYPE_GSI)) {
+ DP_ERR(dev,
+ "create qp: consumer QP cannot use GSI CQs.\n");
+ return -EINVAL;
+ }
}
return 0;
@@ -1248,8 +1272,8 @@ static int qedr_copy_srq_uresp(struct qedr_dev *dev,
}
static void qedr_copy_rq_uresp(struct qedr_dev *dev,
- struct qedr_create_qp_uresp *uresp,
- struct qedr_qp *qp)
+ struct qedr_create_qp_uresp *uresp,
+ struct qedr_qp *qp)
{
/* iWARP requires two doorbells per RQ. */
if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
@@ -1291,8 +1315,12 @@ static int qedr_copy_qp_uresp(struct qedr_dev *dev,
int rc;
memset(uresp, 0, sizeof(*uresp));
- qedr_copy_sq_uresp(dev, uresp, qp);
- qedr_copy_rq_uresp(dev, uresp, qp);
+
+ if (qedr_qp_has_sq(qp))
+ qedr_copy_sq_uresp(dev, uresp, qp);
+
+ if (qedr_qp_has_rq(qp))
+ qedr_copy_rq_uresp(dev, uresp, qp);
uresp->atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
uresp->qp_id = qp->qp_id;
@@ -1316,18 +1344,25 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev,
kref_init(&qp->refcnt);
init_completion(&qp->iwarp_cm_comp);
}
+
qp->pd = pd;
qp->qp_type = attrs->qp_type;
qp->max_inline_data = attrs->cap.max_inline_data;
- qp->sq.max_sges = attrs->cap.max_send_sge;
qp->state = QED_ROCE_QP_STATE_RESET;
qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
- qp->sq_cq = get_qedr_cq(attrs->send_cq);
qp->dev = dev;
+ if (qedr_qp_has_sq(qp)) {
+ qp->sq.max_sges = attrs->cap.max_send_sge;
+ qp->sq_cq = get_qedr_cq(attrs->send_cq);
+ DP_DEBUG(dev, QEDR_MSG_QP,
+ "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
+ qp->sq.max_sges, qp->sq_cq->icid);
+ }
- if (attrs->srq) {
+ if (attrs->srq)
qp->srq = get_qedr_srq(attrs->srq);
- } else {
+
+ if (qedr_qp_has_rq(qp)) {
qp->rq_cq = get_qedr_cq(attrs->recv_cq);
qp->rq.max_sges = attrs->cap.max_recv_sge;
DP_DEBUG(dev, QEDR_MSG_QP,
@@ -1346,30 +1381,26 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev,
static int qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
{
- int rc;
+ int rc = 0;
- qp->sq.db = dev->db_addr +
- DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
- qp->sq.db_data.data.icid = qp->icid + 1;
- rc = qedr_db_recovery_add(dev, qp->sq.db,
- &qp->sq.db_data,
- DB_REC_WIDTH_32B,
- DB_REC_KERNEL);
- if (rc)
- return rc;
+ if (qedr_qp_has_sq(qp)) {
+ qp->sq.db = dev->db_addr +
+ DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
+ qp->sq.db_data.data.icid = qp->icid + 1;
+ rc = qedr_db_recovery_add(dev, qp->sq.db, &qp->sq.db_data,
+ DB_REC_WIDTH_32B, DB_REC_KERNEL);
+ if (rc)
+ return rc;
+ }
- if (!qp->srq) {
+ if (qedr_qp_has_rq(qp)) {
qp->rq.db = dev->db_addr +
DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
qp->rq.db_data.data.icid = qp->icid;
-
- rc = qedr_db_recovery_add(dev, qp->rq.db,
- &qp->rq.db_data,
- DB_REC_WIDTH_32B,
- DB_REC_KERNEL);
- if (rc)
- qedr_db_recovery_del(dev, qp->sq.db,
- &qp->sq.db_data);
+ rc = qedr_db_recovery_add(dev, qp->rq.db, &qp->rq.db_data,
+ DB_REC_WIDTH_32B, DB_REC_KERNEL);
+ if (rc && qedr_qp_has_sq(qp))
+ qedr_db_recovery_del(dev, qp->sq.db, &qp->sq.db_data);
}
return rc;
@@ -1392,6 +1423,10 @@ static int qedr_check_srq_params(struct qedr_dev *dev,
DP_ERR(dev,
"create srq: unsupported sge=0x%x requested (max_srq_sge=0x%x)\n",
attrs->attr.max_sge, qattr->max_sge);
+ }
+
+ if (!udata && attrs->srq_type == IB_SRQT_XRC) {
+ DP_ERR(dev, "XRC SRQs are not supported in kernel-space\n");
return -EINVAL;
}
@@ -1516,6 +1551,7 @@ int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr,
return -EINVAL;
srq->dev = dev;
+ srq->is_xrc = (init_attr->srq_type == IB_SRQT_XRC);
hw_srq = &srq->hw_srq;
spin_lock_init(&srq->lock);
@@ -1557,6 +1593,14 @@ int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr,
in_params.prod_pair_addr = phy_prod_pair_addr;
in_params.num_pages = page_cnt;
in_params.page_size = page_size;
+ if (srq->is_xrc) {
+ struct qedr_xrcd *xrcd = get_qedr_xrcd(init_attr->ext.xrc.xrcd);
+ struct qedr_cq *cq = get_qedr_cq(init_attr->ext.cq);
+
+ in_params.is_xrc = 1;
+ in_params.xrcd_id = xrcd->xrcd_id;
+ in_params.cq_cid = cq->icid;
+ }
rc = dev->ops->rdma_create_srq(dev->rdma_ctx, &in_params, &out_params);
if (rc)
@@ -1591,7 +1635,7 @@ err0:
return -EFAULT;
}
-void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
+int qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
{
struct qed_rdma_destroy_srq_in_params in_params = {};
struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
@@ -1599,6 +1643,7 @@ void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
xa_erase_irq(&dev->srqs, srq->srq_id);
in_params.srq_id = srq->srq_id;
+ in_params.is_xrc = srq->is_xrc;
dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params);
if (ibsrq->uobject)
@@ -1609,6 +1654,7 @@ void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
DP_DEBUG(dev, QEDR_MSG_SRQ,
"destroy srq: destroyed srq with srq_id=0x%0x\n",
srq->srq_id);
+ return 0;
}
int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
@@ -1649,6 +1695,20 @@ int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
return 0;
}
+static enum qed_rdma_qp_type qedr_ib_to_qed_qp_type(enum ib_qp_type ib_qp_type)
+{
+ switch (ib_qp_type) {
+ case IB_QPT_RC:
+ return QED_RDMA_QP_TYPE_RC;
+ case IB_QPT_XRC_INI:
+ return QED_RDMA_QP_TYPE_XRC_INI;
+ case IB_QPT_XRC_TGT:
+ return QED_RDMA_QP_TYPE_XRC_TGT;
+ default:
+ return QED_RDMA_QP_TYPE_INVAL;
+ }
+}
+
static inline void
qedr_init_common_qp_in_params(struct qedr_dev *dev,
struct qedr_pd *pd,
@@ -1663,20 +1723,27 @@ qedr_init_common_qp_in_params(struct qedr_dev *dev,
params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
params->fmr_and_reserved_lkey = fmr_and_reserved_lkey;
- params->pd = pd->pd_id;
- params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
- params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
+ params->qp_type = qedr_ib_to_qed_qp_type(attrs->qp_type);
params->stats_queue = 0;
- params->srq_id = 0;
- params->use_srq = false;
- if (!qp->srq) {
+ if (pd) {
+ params->pd = pd->pd_id;
+ params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
+ }
+
+ if (qedr_qp_has_sq(qp))
+ params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
+
+ if (qedr_qp_has_rq(qp))
params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
- } else {
+ if (qedr_qp_has_srq(qp)) {
params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
params->srq_id = qp->srq->srq_id;
params->use_srq = true;
+ } else {
+ params->srq_id = 0;
+ params->use_srq = false;
}
}
@@ -1690,8 +1757,10 @@ static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
"rq_len=%zd"
"\n",
qp,
- qp->usq.buf_addr,
- qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len);
+ qedr_qp_has_sq(qp) ? qp->usq.buf_addr : 0x0,
+ qedr_qp_has_sq(qp) ? qp->usq.buf_len : 0,
+ qedr_qp_has_rq(qp) ? qp->urq.buf_addr : 0x0,
+ qedr_qp_has_sq(qp) ? qp->urq.buf_len : 0);
}
static inline void
@@ -1717,11 +1786,15 @@ static void qedr_cleanup_user(struct qedr_dev *dev,
struct qedr_ucontext *ctx,
struct qedr_qp *qp)
{
- ib_umem_release(qp->usq.umem);
- qp->usq.umem = NULL;
+ if (qedr_qp_has_sq(qp)) {
+ ib_umem_release(qp->usq.umem);
+ qp->usq.umem = NULL;
+ }
- ib_umem_release(qp->urq.umem);
- qp->urq.umem = NULL;
+ if (qedr_qp_has_rq(qp)) {
+ ib_umem_release(qp->urq.umem);
+ qp->urq.umem = NULL;
+ }
if (rdma_protocol_roce(&dev->ibdev, 1)) {
qedr_free_pbl(dev, &qp->usq.pbl_info, qp->usq.pbl_tbl);
@@ -1756,28 +1829,38 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
{
struct qed_rdma_create_qp_in_params in_params;
struct qed_rdma_create_qp_out_params out_params;
- struct qedr_pd *pd = get_qedr_pd(ibpd);
- struct qedr_create_qp_uresp uresp;
- struct qedr_ucontext *ctx = pd ? pd->uctx : NULL;
- struct qedr_create_qp_ureq ureq;
+ struct qedr_create_qp_uresp uresp = {};
+ struct qedr_create_qp_ureq ureq = {};
int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1);
- int rc = -EINVAL;
+ struct qedr_ucontext *ctx = NULL;
+ struct qedr_pd *pd = NULL;
+ int rc = 0;
qp->create_type = QEDR_QP_CREATE_USER;
- memset(&ureq, 0, sizeof(ureq));
- rc = ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), udata->inlen));
- if (rc) {
- DP_ERR(dev, "Problem copying data from user space\n");
- return rc;
+
+ if (ibpd) {
+ pd = get_qedr_pd(ibpd);
+ ctx = pd->uctx;
}
- /* SQ - read access only (0) */
- rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr,
- ureq.sq_len, true, 0, alloc_and_init);
- if (rc)
- return rc;
+ if (udata) {
+ rc = ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
+ udata->inlen));
+ if (rc) {
+ DP_ERR(dev, "Problem copying data from user space\n");
+ return rc;
+ }
+ }
- if (!qp->srq) {
+ if (qedr_qp_has_sq(qp)) {
+ /* SQ - read access only (0) */
+ rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr,
+ ureq.sq_len, true, 0, alloc_and_init);
+ if (rc)
+ return rc;
+ }
+
+ if (qedr_qp_has_rq(qp)) {
/* RQ - read access only (0) */
rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr,
ureq.rq_len, true, 0, alloc_and_init);
@@ -1789,9 +1872,21 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
in_params.qp_handle_lo = ureq.qp_handle_lo;
in_params.qp_handle_hi = ureq.qp_handle_hi;
- in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
- in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
- if (!qp->srq) {
+
+ if (qp->qp_type == IB_QPT_XRC_TGT) {
+ struct qedr_xrcd *xrcd = get_qedr_xrcd(attrs->xrcd);
+
+ in_params.xrcd_id = xrcd->xrcd_id;
+ in_params.qp_handle_lo = qp->qp_id;
+ in_params.use_srq = 1;
+ }
+
+ if (qedr_qp_has_sq(qp)) {
+ in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
+ in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
+ }
+
+ if (qedr_qp_has_rq(qp)) {
in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
}
@@ -1813,39 +1908,32 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
qp->qp_id = out_params.qp_id;
qp->icid = out_params.icid;
- rc = qedr_copy_qp_uresp(dev, qp, udata, &uresp);
- if (rc)
- goto err;
+ if (udata) {
+ rc = qedr_copy_qp_uresp(dev, qp, udata, &uresp);
+ if (rc)
+ goto err;
+ }
/* db offset was calculated in copy_qp_uresp, now set in the user q */
- ctx = pd->uctx;
- qp->usq.db_addr = ctx->dpi_addr + uresp.sq_db_offset;
- qp->urq.db_addr = ctx->dpi_addr + uresp.rq_db_offset;
-
- if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
- qp->urq.db_rec_db2_addr = ctx->dpi_addr + uresp.rq_db2_offset;
-
- /* calculate the db_rec_db2 data since it is constant so no
- * need to reflect from user
- */
- qp->urq.db_rec_db2_data.data.icid = cpu_to_le16(qp->icid);
- qp->urq.db_rec_db2_data.data.value =
- cpu_to_le16(DQ_TCM_IWARP_POST_RQ_CF_CMD);
+ if (qedr_qp_has_sq(qp)) {
+ qp->usq.db_addr = ctx->dpi_addr + uresp.sq_db_offset;
+ rc = qedr_db_recovery_add(dev, qp->usq.db_addr,
+ &qp->usq.db_rec_data->db_data,
+ DB_REC_WIDTH_32B,
+ DB_REC_USER);
+ if (rc)
+ goto err;
}
- rc = qedr_db_recovery_add(dev, qp->usq.db_addr,
- &qp->usq.db_rec_data->db_data,
- DB_REC_WIDTH_32B,
- DB_REC_USER);
- if (rc)
- goto err;
-
- rc = qedr_db_recovery_add(dev, qp->urq.db_addr,
- &qp->urq.db_rec_data->db_data,
- DB_REC_WIDTH_32B,
- DB_REC_USER);
- if (rc)
- goto err;
+ if (qedr_qp_has_rq(qp)) {
+ qp->urq.db_addr = ctx->dpi_addr + uresp.rq_db_offset;
+ rc = qedr_db_recovery_add(dev, qp->urq.db_addr,
+ &qp->urq.db_rec_data->db_data,
+ DB_REC_WIDTH_32B,
+ DB_REC_USER);
+ if (rc)
+ goto err;
+ }
if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
rc = qedr_db_recovery_add(dev, qp->urq.db_rec_db2_addr,
@@ -1856,7 +1944,6 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
goto err;
}
qedr_qp_user_print(dev, qp);
-
return rc;
err:
rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
@@ -2112,16 +2199,47 @@ static int qedr_create_kernel_qp(struct qedr_dev *dev,
return rc;
}
+static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp,
+ struct ib_udata *udata)
+{
+ struct qedr_ucontext *ctx =
+ rdma_udata_to_drv_context(udata, struct qedr_ucontext,
+ ibucontext);
+ int rc;
+
+ if (qp->qp_type != IB_QPT_GSI) {
+ rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
+ if (rc)
+ return rc;
+ }
+
+ if (qp->create_type == QEDR_QP_CREATE_USER)
+ qedr_cleanup_user(dev, ctx, qp);
+ else
+ qedr_cleanup_kernel(dev, qp);
+
+ return 0;
+}
+
struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
struct ib_qp_init_attr *attrs,
struct ib_udata *udata)
{
- struct qedr_dev *dev = get_qedr_dev(ibpd->device);
- struct qedr_pd *pd = get_qedr_pd(ibpd);
+ struct qedr_xrcd *xrcd = NULL;
+ struct qedr_pd *pd = NULL;
+ struct qedr_dev *dev;
struct qedr_qp *qp;
struct ib_qp *ibqp;
int rc = 0;
+ if (attrs->qp_type == IB_QPT_XRC_TGT) {
+ xrcd = get_qedr_xrcd(attrs->xrcd);
+ dev = get_qedr_dev(xrcd->ibxrcd.device);
+ } else {
+ pd = get_qedr_pd(ibpd);
+ dev = get_qedr_dev(ibpd->device);
+ }
+
DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
udata ? "user library" : "kernel", pd);
@@ -2152,25 +2270,27 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
return ibqp;
}
- if (udata)
+ if (udata || xrcd)
rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs);
else
rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs);
if (rc)
- goto err;
+ goto out_free_qp;
qp->ibqp.qp_num = qp->qp_id;
if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
rc = xa_insert(&dev->qps, qp->qp_id, qp, GFP_KERNEL);
if (rc)
- goto err;
+ goto out_free_qp_resources;
}
return &qp->ibqp;
-err:
+out_free_qp_resources:
+ qedr_free_qp_resources(dev, qp, udata);
+out_free_qp:
kfree(qp);
return ERR_PTR(-EFAULT);
@@ -2636,7 +2756,7 @@ int qedr_query_qp(struct ib_qp *ibqp,
qp_attr->cap.max_recv_wr = qp->rq.max_wr;
qp_attr->cap.max_send_sge = qp->sq.max_sges;
qp_attr->cap.max_recv_sge = qp->rq.max_sges;
- qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE;
+ qp_attr->cap.max_inline_data = dev->attr.max_inline;
qp_init_attr->cap = qp_attr->cap;
qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
@@ -2671,28 +2791,6 @@ err:
return rc;
}
-static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp,
- struct ib_udata *udata)
-{
- struct qedr_ucontext *ctx =
- rdma_udata_to_drv_context(udata, struct qedr_ucontext,
- ibucontext);
- int rc;
-
- if (qp->qp_type != IB_QPT_GSI) {
- rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
- if (rc)
- return rc;
- }
-
- if (qp->create_type == QEDR_QP_CREATE_USER)
- qedr_cleanup_user(dev, ctx, qp);
- else
- qedr_cleanup_kernel(dev, qp);
-
- return 0;
-}
-
int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
{
struct qedr_qp *qp = get_qedr_qp(ibqp);
@@ -2752,6 +2850,8 @@ int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
if (rdma_protocol_iwarp(&dev->ibdev, 1))
qedr_iw_qp_rem_ref(&qp->ibqp);
+ else
+ kfree(qp);
return 0;
}
@@ -2766,11 +2866,12 @@ int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
return 0;
}
-void qedr_destroy_ah(struct ib_ah *ibah, u32 flags)
+int qedr_destroy_ah(struct ib_ah *ibah, u32 flags)
{
struct qedr_ah *ah = get_qedr_ah(ibah);
rdma_destroy_ah_attr(&ah->attr);
+ return 0;
}
static void free_mr_info(struct qedr_dev *dev, struct mr_info *info)
@@ -2861,7 +2962,8 @@ struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
goto err0;
}
- rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1);
+ rc = init_mr_info(dev, &mr->info,
+ ib_umem_num_dma_blocks(mr->umem, PAGE_SIZE), 1);
if (rc)
goto err1;
@@ -2888,10 +2990,8 @@ struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
mr->hw_mr.page_size_log = PAGE_SHIFT;
- mr->hw_mr.fbo = ib_umem_offset(mr->umem);
mr->hw_mr.length = len;
mr->hw_mr.vaddr = usr_addr;
- mr->hw_mr.zbva = false;
mr->hw_mr.phy_mr = false;
mr->hw_mr.dma_mr = false;
@@ -2984,10 +3084,8 @@ static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
mr->hw_mr.pbl_ptr = 0;
mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
- mr->hw_mr.fbo = 0;
mr->hw_mr.length = 0;
mr->hw_mr.vaddr = 0;
- mr->hw_mr.zbva = false;
mr->hw_mr.phy_mr = true;
mr->hw_mr.dma_mr = false;
@@ -3765,10 +3863,10 @@ int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
* in first 4 bytes and need to update WQE producer in
* next 4 bytes.
*/
- srq->hw_srq.virt_prod_pair_addr->sge_prod = hw_srq->sge_prod;
+ srq->hw_srq.virt_prod_pair_addr->sge_prod = cpu_to_le32(hw_srq->sge_prod);
/* Make sure sge producer is updated first */
dma_wmb();
- srq->hw_srq.virt_prod_pair_addr->wqe_prod = hw_srq->wqe_prod;
+ srq->hw_srq.virt_prod_pair_addr->wqe_prod = cpu_to_le32(hw_srq->wqe_prod);
wr = wr->next;
}
diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h
index 39dd6286ba39..2672c32bc2f7 100644
--- a/drivers/infiniband/hw/qedr/verbs.h
+++ b/drivers/infiniband/hw/qedr/verbs.h
@@ -47,12 +47,13 @@ void qedr_dealloc_ucontext(struct ib_ucontext *uctx);
int qedr_mmap(struct ib_ucontext *ucontext, struct vm_area_struct *vma);
void qedr_mmap_free(struct rdma_user_mmap_entry *rdma_entry);
int qedr_alloc_pd(struct ib_pd *pd, struct ib_udata *udata);
-void qedr_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
-
+int qedr_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
+int qedr_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata);
+int qedr_dealloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata);
int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct ib_udata *udata);
int qedr_resize_cq(struct ib_cq *, int cqe, struct ib_udata *);
-void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
+int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
struct ib_qp *qedr_create_qp(struct ib_pd *, struct ib_qp_init_attr *attrs,
struct ib_udata *);
@@ -67,12 +68,12 @@ int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *attr,
int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
-void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata);
+int qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata);
int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_recv_wr);
int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
struct ib_udata *udata);
-void qedr_destroy_ah(struct ib_ah *ibah, u32 flags);
+int qedr_destroy_ah(struct ib_ah *ibah, u32 flags);
int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata);
struct ib_mr *qedr_get_dma_mr(struct ib_pd *, int acc);
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index 432d6d0fd7f4..ee211423058a 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -619,11 +619,11 @@ struct qib_pportdata {
/* LID mask control */
u8 lmc;
u8 link_width_supported;
- u8 link_speed_supported;
+ u16 link_speed_supported;
u8 link_width_enabled;
- u8 link_speed_enabled;
+ u16 link_speed_enabled;
u8 link_width_active;
- u8 link_speed_active;
+ u16 link_speed_active;
u8 vls_supported;
u8 vls_operational;
/* Rx Polarity inversion (compensate for ~tx on partner) */
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index a10eab89aee4..189a0ce6056a 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -1733,9 +1733,9 @@ done:
return;
}
-static void qib_error_tasklet(unsigned long data)
+static void qib_error_tasklet(struct tasklet_struct *t)
{
- struct qib_devdata *dd = (struct qib_devdata *)data;
+ struct qib_devdata *dd = from_tasklet(dd, t, error_tasklet);
handle_7322_errors(dd);
qib_write_kreg(dd, kr_errmask, dd->cspec->errormask);
@@ -3537,8 +3537,7 @@ try_intx:
for (i = 0; i < ARRAY_SIZE(redirect); i++)
qib_write_kreg(dd, kr_intredirect + i, redirect[i]);
dd->cspec->main_int_mask = mask;
- tasklet_init(&dd->error_tasklet, qib_error_tasklet,
- (unsigned long)dd);
+ tasklet_setup(&dd->error_tasklet, qib_error_tasklet);
}
/**
diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index e7789e724f56..f83e331977f8 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -2293,76 +2293,50 @@ static int process_cc(struct ib_device *ibdev, int mad_flags,
struct ib_mad *out_mad)
{
struct ib_cc_mad *ccp = (struct ib_cc_mad *)out_mad;
- int ret;
-
*out_mad = *in_mad;
if (ccp->class_version != 2) {
ccp->status |= IB_SMP_UNSUP_VERSION;
- ret = reply((struct ib_smp *)ccp);
- goto bail;
+ return reply((struct ib_smp *)ccp);
}
switch (ccp->method) {
case IB_MGMT_METHOD_GET:
switch (ccp->attr_id) {
case IB_CC_ATTR_CLASSPORTINFO:
- ret = cc_get_classportinfo(ccp, ibdev);
- goto bail;
-
+ return cc_get_classportinfo(ccp, ibdev);
case IB_CC_ATTR_CONGESTION_INFO:
- ret = cc_get_congestion_info(ccp, ibdev, port);
- goto bail;
-
+ return cc_get_congestion_info(ccp, ibdev, port);
case IB_CC_ATTR_CA_CONGESTION_SETTING:
- ret = cc_get_congestion_setting(ccp, ibdev, port);
- goto bail;
-
+ return cc_get_congestion_setting(ccp, ibdev, port);
case IB_CC_ATTR_CONGESTION_CONTROL_TABLE:
- ret = cc_get_congestion_control_table(ccp, ibdev, port);
- goto bail;
-
- fallthrough;
+ return cc_get_congestion_control_table(ccp, ibdev, port);
default:
ccp->status |= IB_SMP_UNSUP_METH_ATTR;
- ret = reply((struct ib_smp *) ccp);
- goto bail;
+ return reply((struct ib_smp *) ccp);
}
-
case IB_MGMT_METHOD_SET:
switch (ccp->attr_id) {
case IB_CC_ATTR_CA_CONGESTION_SETTING:
- ret = cc_set_congestion_setting(ccp, ibdev, port);
- goto bail;
-
+ return cc_set_congestion_setting(ccp, ibdev, port);
case IB_CC_ATTR_CONGESTION_CONTROL_TABLE:
- ret = cc_set_congestion_control_table(ccp, ibdev, port);
- goto bail;
-
- fallthrough;
+ return cc_set_congestion_control_table(ccp, ibdev, port);
default:
ccp->status |= IB_SMP_UNSUP_METH_ATTR;
- ret = reply((struct ib_smp *) ccp);
- goto bail;
+ return reply((struct ib_smp *) ccp);
}
-
case IB_MGMT_METHOD_GET_RESP:
/*
* The ib_mad module will call us to process responses
* before checking for other consumers.
* Just tell the caller to process it normally.
*/
- ret = IB_MAD_RESULT_SUCCESS;
- goto bail;
-
- case IB_MGMT_METHOD_TRAP:
- default:
- ccp->status |= IB_SMP_UNSUP_METHOD;
- ret = reply((struct ib_smp *) ccp);
+ return IB_MAD_RESULT_SUCCESS;
}
-bail:
- return ret;
+ /* method is unsupported */
+ ccp->status |= IB_SMP_UNSUP_METHOD;
+ return reply((struct ib_smp *) ccp);
}
/**
diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c
index 8f8d61736656..5e86cbf7d70e 100644
--- a/drivers/infiniband/hw/qib/qib_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_sdma.c
@@ -62,7 +62,7 @@ static void sdma_get(struct qib_sdma_state *);
static void sdma_put(struct qib_sdma_state *);
static void sdma_set_state(struct qib_pportdata *, enum qib_sdma_states);
static void sdma_start_sw_clean_up(struct qib_pportdata *);
-static void sdma_sw_clean_up_task(unsigned long);
+static void sdma_sw_clean_up_task(struct tasklet_struct *);
static void unmap_desc(struct qib_pportdata *, unsigned);
static void sdma_get(struct qib_sdma_state *ss)
@@ -119,9 +119,10 @@ static void clear_sdma_activelist(struct qib_pportdata *ppd)
}
}
-static void sdma_sw_clean_up_task(unsigned long opaque)
+static void sdma_sw_clean_up_task(struct tasklet_struct *t)
{
- struct qib_pportdata *ppd = (struct qib_pportdata *) opaque;
+ struct qib_pportdata *ppd = from_tasklet(ppd, t,
+ sdma_sw_clean_up_task);
unsigned long flags;
spin_lock_irqsave(&ppd->sdma_lock, flags);
@@ -436,8 +437,7 @@ int qib_setup_sdma(struct qib_pportdata *ppd)
INIT_LIST_HEAD(&ppd->sdma_activelist);
- tasklet_init(&ppd->sdma_sw_clean_up_task, sdma_sw_clean_up_task,
- (unsigned long)ppd);
+ tasklet_setup(&ppd->sdma_sw_clean_up_task, sdma_sw_clean_up_task);
ret = dd->f_init_sdma_regs(ppd);
if (ret)
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index 662e7fc7f628..aa2e65fc5cd6 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -315,7 +315,6 @@ static int usnic_port_immutable(struct ib_device *ibdev, u8 port_num,
if (err)
return err;
- immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
return 0;
@@ -355,7 +354,6 @@ static const struct ib_device_ops usnic_dev_ops = {
.modify_qp = usnic_ib_modify_qp,
.query_device = usnic_ib_query_device,
.query_gid = usnic_ib_query_gid,
- .query_pkey = usnic_ib_query_pkey,
.query_port = usnic_ib_query_port,
.query_qp = usnic_ib_query_qp,
.reg_user_mr = usnic_ib_reg_mr,
@@ -427,7 +425,8 @@ static void *usnic_ib_device_add(struct pci_dev *dev)
if (ret)
goto err_fwd_dealloc;
- if (ib_register_device(&us_ibdev->ib_dev, "usnic_%d"))
+ dma_set_max_seg_size(&dev->dev, SZ_2G);
+ if (ib_register_device(&us_ibdev->ib_dev, "usnic_%d", &dev->dev))
goto err_fwd_dealloc;
usnic_fwd_set_mtu(us_ibdev->ufdev, us_ibdev->netdev->mtu);
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index b8a77ce11590..9e961f8ffa10 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -367,7 +367,6 @@ int usnic_ib_query_port(struct ib_device *ibdev, u8 port,
props->port_cap_flags = 0;
props->gid_tbl_len = 1;
- props->pkey_tbl_len = 1;
props->bad_pkey_cntr = 0;
props->qkey_viol_cntr = 0;
props->max_mtu = IB_MTU_4096;
@@ -437,16 +436,6 @@ int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
return 0;
}
-int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
- u16 *pkey)
-{
- if (index > 0)
- return -EINVAL;
-
- *pkey = 0xffff;
- return 0;
-}
-
int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
struct usnic_ib_pd *pd = to_upd(ibpd);
@@ -460,9 +449,10 @@ int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
return 0;
}
-void usnic_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
+int usnic_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
{
usnic_uiom_dealloc_pd((to_upd(pd))->umem_pd);
+ return 0;
}
struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd,
@@ -596,9 +586,9 @@ int usnic_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
return 0;
}
-void usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
+int usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
{
- return;
+ return 0;
}
struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length,
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
index 2aedf78c13cf..11fe1ba6bbc9 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
@@ -48,10 +48,8 @@ int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
struct ib_qp_init_attr *qp_init_attr);
int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
union ib_gid *gid);
-int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
- u16 *pkey);
int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
-void usnic_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
+int usnic_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata);
@@ -60,7 +58,7 @@ int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
int usnic_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct ib_udata *udata);
-void usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
+int usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
index 4f6cc0de7ef9..319546a39a0d 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
@@ -142,7 +142,7 @@ int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
goto err_cq;
}
- npages = ib_umem_page_count(cq->umem);
+ npages = ib_umem_num_dma_blocks(cq->umem, PAGE_SIZE);
} else {
/* One extra page for shared ring state */
npages = 1 + (entries * sizeof(struct pvrdma_cqe) +
@@ -235,7 +235,7 @@ static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq)
* @cq: the completion queue to destroy.
* @udata: user data or null for kernel object
*/
-void pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
+int pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
{
struct pvrdma_cq *vcq = to_vcq(cq);
union pvrdma_cmd_req req;
@@ -261,6 +261,7 @@ void pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
pvrdma_free_cq(dev, vcq);
atomic_dec(&dev->num_cqs);
+ return 0;
}
static inline struct pvrdma_cqe *get_cqe(struct pvrdma_cq *cq, int i)
@@ -375,7 +376,7 @@ retry:
* pvrdma_poll_cq - poll for work completion queue entries
* @ibcq: completion queue
* @num_entries: the maximum number of entries
- * @entry: pointer to work completion array
+ * @wc: pointer to work completion array
*
* @return: number of polled completion entries
*/
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index 780fd2dfc07e..fa2a3fa0c3e4 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -270,7 +270,7 @@ static int pvrdma_register_device(struct pvrdma_dev *dev)
spin_lock_init(&dev->srq_tbl_lock);
rdma_set_device_sysfs_group(&dev->ib_dev, &pvrdma_attr_group);
- ret = ib_register_device(&dev->ib_dev, "vmw_pvrdma%d");
+ ret = ib_register_device(&dev->ib_dev, "vmw_pvrdma%d", &dev->pdev->dev);
if (ret)
goto err_srq_free;
@@ -854,7 +854,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
goto err_free_resource;
}
}
-
+ dma_set_max_seg_size(&pdev->dev, UINT_MAX);
pci_set_master(pdev);
/* Map register space */
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c
index 7944c58ded0e..ba43ad07898c 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c
@@ -182,17 +182,16 @@ int pvrdma_page_dir_insert_dma(struct pvrdma_page_dir *pdir, u64 idx,
int pvrdma_page_dir_insert_umem(struct pvrdma_page_dir *pdir,
struct ib_umem *umem, u64 offset)
{
+ struct ib_block_iter biter;
u64 i = offset;
int ret = 0;
- struct sg_dma_page_iter sg_iter;
if (offset >= pdir->npages)
return -EINVAL;
- for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
- dma_addr_t addr = sg_page_iter_dma_address(&sg_iter);
-
- ret = pvrdma_page_dir_insert_dma(pdir, i, addr);
+ rdma_umem_for_each_dma_block (umem, &biter, PAGE_SIZE) {
+ ret = pvrdma_page_dir_insert_dma(
+ pdir, i, rdma_block_iter_dma_address(&biter));
if (ret)
goto exit;
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
index 77a010e68208..e80848bfb3bd 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
@@ -133,7 +133,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
return ERR_CAST(umem);
}
- npages = ib_umem_num_pages(umem);
+ npages = ib_umem_num_dma_blocks(umem, PAGE_SIZE);
if (npages < 0 || npages > PVRDMA_PAGE_DIR_MAX_PAGES) {
dev_warn(&dev->pdev->dev, "overflow %d pages in mem region\n",
npages);
@@ -270,6 +270,7 @@ freemr:
/**
* pvrdma_dereg_mr - deregister a memory region
* @ibmr: memory region
+ * @udata: pointer to user data
*
* @return: 0 on success.
*/
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
index 9a8f2a9507be..428256c55065 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -232,8 +232,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
switch (init_attr->qp_type) {
case IB_QPT_GSI:
if (init_attr->port_num == 0 ||
- init_attr->port_num > pd->device->phys_port_cnt ||
- udata) {
+ init_attr->port_num > pd->device->phys_port_cnt) {
dev_warn(&dev->pdev->dev, "invalid queuepair attrs\n");
ret = -EINVAL;
goto err_qp;
@@ -298,9 +297,11 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
goto err_qp;
}
- qp->npages_send = ib_umem_page_count(qp->sumem);
+ qp->npages_send =
+ ib_umem_num_dma_blocks(qp->sumem, PAGE_SIZE);
if (!is_srq)
- qp->npages_recv = ib_umem_page_count(qp->rumem);
+ qp->npages_recv = ib_umem_num_dma_blocks(
+ qp->rumem, PAGE_SIZE);
else
qp->npages_recv = 0;
qp->npages = qp->npages_send + qp->npages_recv;
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
index d330decfb80a..082208f9aa90 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
@@ -90,7 +90,7 @@ int pvrdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
/**
* pvrdma_create_srq - create shared receive queue
- * @pd: protection domain
+ * @ibsrq: the IB shared receive queue
* @init_attr: shared receive queue attributes
* @udata: user data
*
@@ -152,7 +152,7 @@ int pvrdma_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr,
goto err_srq;
}
- srq->npages = ib_umem_page_count(srq->umem);
+ srq->npages = ib_umem_num_dma_blocks(srq->umem, PAGE_SIZE);
if (srq->npages < 0 || srq->npages > PVRDMA_PAGE_DIR_MAX_PAGES) {
dev_warn(&dev->pdev->dev,
@@ -240,7 +240,7 @@ static void pvrdma_free_srq(struct pvrdma_dev *dev, struct pvrdma_srq *srq)
*
* @return: 0 for success.
*/
-void pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
+int pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
{
struct pvrdma_srq *vsrq = to_vsrq(srq);
union pvrdma_cmd_req req;
@@ -259,6 +259,7 @@ void pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
ret);
pvrdma_free_srq(dev, vsrq);
+ return 0;
}
/**
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
index ccbded2d26ce..fc412cbfd042 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
@@ -479,9 +479,9 @@ err:
* @pd: the protection domain to be released
* @udata: user data or null for kernel object
*
- * @return: 0 on success, otherwise errno.
+ * @return: Always 0
*/
-void pvrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
+int pvrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
{
struct pvrdma_dev *dev = to_vdev(pd->device);
union pvrdma_cmd_req req = {};
@@ -498,14 +498,14 @@ void pvrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
ret);
atomic_dec(&dev->num_pds);
+ return 0;
}
/**
* pvrdma_create_ah - create an address handle
- * @pd: the protection domain
- * @ah_attr: the attributes of the AH
- * @udata: user data blob
- * @flags: create address handle flags (see enum rdma_create_ah_flags)
+ * @ibah: the IB address handle
+ * @init_attr: the attributes of the AH
+ * @udata: pointer to user data
*
* @return: 0 on success, otherwise errno.
*/
@@ -548,9 +548,10 @@ int pvrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
* @flags: destroy address handle flags (see enum rdma_destroy_ah_flags)
*
*/
-void pvrdma_destroy_ah(struct ib_ah *ah, u32 flags)
+int pvrdma_destroy_ah(struct ib_ah *ah, u32 flags)
{
struct pvrdma_dev *dev = to_vdev(ah->device);
atomic_dec(&dev->num_ahs);
+ return 0;
}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
index 699b20849a7e..f0e5ffba2d51 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
@@ -176,7 +176,7 @@ struct pvrdma_port_attr {
u8 subnet_timeout;
u8 init_type_reply;
u8 active_width;
- u8 active_speed;
+ u16 active_speed;
u8 phys_state;
u8 reserved[2];
};
@@ -399,7 +399,7 @@ int pvrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
int pvrdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata);
void pvrdma_dealloc_ucontext(struct ib_ucontext *context);
int pvrdma_alloc_pd(struct ib_pd *pd, struct ib_udata *udata);
-void pvrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
+int pvrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
struct ib_mr *pvrdma_get_dma_mr(struct ib_pd *pd, int acc);
struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
@@ -411,19 +411,19 @@ int pvrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
int sg_nents, unsigned int *sg_offset);
int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct ib_udata *udata);
-void pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
+int pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
int pvrdma_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
struct ib_udata *udata);
-void pvrdma_destroy_ah(struct ib_ah *ah, u32 flags);
+int pvrdma_destroy_ah(struct ib_ah *ah, u32 flags);
int pvrdma_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *init_attr,
struct ib_udata *udata);
int pvrdma_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
int pvrdma_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
-void pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
+int pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c
index 75a04b1497c4..b938c4ffa99a 100644
--- a/drivers/infiniband/sw/rdmavt/ah.c
+++ b/drivers/infiniband/sw/rdmavt/ah.c
@@ -132,7 +132,7 @@ int rvt_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
*
* Return: 0 on success
*/
-void rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags)
+int rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags)
{
struct rvt_dev_info *dev = ib_to_rvt(ibah->device);
struct rvt_ah *ah = ibah_to_rvtah(ibah);
@@ -143,6 +143,7 @@ void rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags)
spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
rdma_destroy_ah_attr(&ah->attr);
+ return 0;
}
/**
diff --git a/drivers/infiniband/sw/rdmavt/ah.h b/drivers/infiniband/sw/rdmavt/ah.h
index 40b7123fec76..5a85edd06491 100644
--- a/drivers/infiniband/sw/rdmavt/ah.h
+++ b/drivers/infiniband/sw/rdmavt/ah.h
@@ -52,7 +52,7 @@
int rvt_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
struct ib_udata *udata);
-void rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags);
+int rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags);
int rvt_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
int rvt_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c
index 04d2e72017fe..19248be14093 100644
--- a/drivers/infiniband/sw/rdmavt/cq.c
+++ b/drivers/infiniband/sw/rdmavt/cq.c
@@ -315,7 +315,7 @@ bail_wc:
*
* Called by ib_destroy_cq() in the generic verbs code.
*/
-void rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+int rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
{
struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
struct rvt_dev_info *rdi = cq->rdi;
@@ -328,6 +328,7 @@ void rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
kref_put(&cq->ip->ref, rvt_release_mmap_info);
else
vfree(cq->kqueue);
+ return 0;
}
/**
diff --git a/drivers/infiniband/sw/rdmavt/cq.h b/drivers/infiniband/sw/rdmavt/cq.h
index 5e26a2eb19a4..feb01e7ee004 100644
--- a/drivers/infiniband/sw/rdmavt/cq.h
+++ b/drivers/infiniband/sw/rdmavt/cq.h
@@ -53,7 +53,7 @@
int rvt_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct ib_udata *udata);
-void rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
+int rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags);
int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
diff --git a/drivers/infiniband/sw/rdmavt/pd.c b/drivers/infiniband/sw/rdmavt/pd.c
index a403718f0b5e..01b7abf91520 100644
--- a/drivers/infiniband/sw/rdmavt/pd.c
+++ b/drivers/infiniband/sw/rdmavt/pd.c
@@ -95,11 +95,12 @@ bail:
*
* Return: always 0
*/
-void rvt_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+int rvt_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
struct rvt_dev_info *dev = ib_to_rvt(ibpd->device);
spin_lock(&dev->n_pds_lock);
dev->n_pds_allocated--;
spin_unlock(&dev->n_pds_lock);
+ return 0;
}
diff --git a/drivers/infiniband/sw/rdmavt/pd.h b/drivers/infiniband/sw/rdmavt/pd.h
index 71ba76d72b1d..06a6a38beedc 100644
--- a/drivers/infiniband/sw/rdmavt/pd.h
+++ b/drivers/infiniband/sw/rdmavt/pd.h
@@ -51,6 +51,6 @@
#include <rdma/rdma_vt.h>
int rvt_alloc_pd(struct ib_pd *pd, struct ib_udata *udata);
-void rvt_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
+int rvt_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
#endif /* DEF_RDMAVTPD_H */
diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c
index f547c115af03..64d98bf238ab 100644
--- a/drivers/infiniband/sw/rdmavt/srq.c
+++ b/drivers/infiniband/sw/rdmavt/srq.c
@@ -332,7 +332,7 @@ int rvt_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
* @ibsrq: srq object to destroy
*
*/
-void rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
+int rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
{
struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq);
struct rvt_dev_info *dev = ib_to_rvt(ibsrq->device);
@@ -343,4 +343,5 @@ void rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
if (srq->ip)
kref_put(&srq->ip->ref, rvt_release_mmap_info);
kvfree(srq->rq.kwq);
+ return 0;
}
diff --git a/drivers/infiniband/sw/rdmavt/srq.h b/drivers/infiniband/sw/rdmavt/srq.h
index 6427d7d62a9a..d5a1a053b1b9 100644
--- a/drivers/infiniband/sw/rdmavt/srq.h
+++ b/drivers/infiniband/sw/rdmavt/srq.h
@@ -56,6 +56,6 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask attr_mask,
struct ib_udata *udata);
int rvt_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
-void rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata);
+int rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata);
#endif /* DEF_RVTSRQ_H */
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index f904bb34477a..52218684ad4a 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -95,9 +95,7 @@ struct rvt_dev_info *rvt_alloc_device(size_t size, int nports)
if (!rdi)
return rdi;
- rdi->ports = kcalloc(nports,
- sizeof(struct rvt_ibport **),
- GFP_KERNEL);
+ rdi->ports = kcalloc(nports, sizeof(*rdi->ports), GFP_KERNEL);
if (!rdi->ports)
ib_dealloc_device(&rdi->ibdev);
@@ -581,7 +579,9 @@ int rvt_register_device(struct rvt_dev_info *rdi)
spin_lock_init(&rdi->n_cqs_lock);
/* DMA Operations */
- rdi->ibdev.dev.dma_ops = rdi->ibdev.dev.dma_ops ? : &dma_virt_ops;
+ rdi->ibdev.dev.dma_parms = rdi->ibdev.dev.parent->dma_parms;
+ dma_set_coherent_mask(&rdi->ibdev.dev,
+ rdi->ibdev.dev.parent->coherent_dma_mask);
/* Protection Domain */
spin_lock_init(&rdi->n_pds_lock);
@@ -629,7 +629,7 @@ int rvt_register_device(struct rvt_dev_info *rdi)
rdi->ibdev.num_comp_vectors = 1;
/* We are now good to announce we exist */
- ret = ib_register_device(&rdi->ibdev, dev_name(&rdi->ibdev.dev));
+ ret = ib_register_device(&rdi->ibdev, dev_name(&rdi->ibdev.dev), NULL);
if (ret) {
rvt_pr_err(rdi, "Failed to register driver with ib core.\n");
goto bail_wss;
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 77f2c7cd1216..95f0de0c8b49 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include <rdma/rdma_netlink.h>
@@ -279,6 +252,12 @@ static int rxe_newlink(const char *ibdev_name, struct net_device *ndev)
struct rxe_dev *exists;
int err = 0;
+ if (is_vlan_dev(ndev)) {
+ pr_err("rxe creation allowed on top of a real device only\n");
+ err = -EPERM;
+ goto err;
+ }
+
exists = rxe_get_dev_from_net(ndev);
if (exists) {
ib_device_put(&exists->ib_dev);
@@ -305,13 +284,6 @@ static int __init rxe_module_init(void)
{
int err;
- /* initialize slab caches for managed objects */
- err = rxe_cache_init();
- if (err) {
- pr_err("unable to init object pools\n");
- return err;
- }
-
err = rxe_net_init();
if (err)
return err;
@@ -327,7 +299,6 @@ static void __exit rxe_module_exit(void)
rdma_link_unregister(&rxe_link_ops);
ib_unregister_driver(RDMA_DRIVER_RXE);
rxe_net_exit();
- rxe_cache_exit();
rxe_initialized = false;
pr_info("unloaded\n");
diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h
index cae1b0a24c85..623fd17df02d 100644
--- a/drivers/infiniband/sw/rxe/rxe.h
+++ b/drivers/infiniband/sw/rxe/rxe.h
@@ -1,34 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#ifndef RXE_H
diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c
index 81ee756c19b8..38021e2c8688 100644
--- a/drivers/infiniband/sw/rxe/rxe_av.c
+++ b/drivers/infiniband/sw/rxe/rxe_av.c
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include "rxe.h"
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index 7b4df0028388..0a1e6393250b 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include <linux/skbuff.h>
@@ -690,9 +663,8 @@ int rxe_completer(void *arg)
*/
/* there is nothing to retry in this case */
- if (!wqe || (wqe->state == wqe_state_posted)) {
+ if (!wqe || (wqe->state == wqe_state_posted))
goto exit;
- }
/* if we've started a retry, don't start another
* retry sequence, unless this is a timeout.
diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c
index ad3090131126..43394c3f29d4 100644
--- a/drivers/infiniband/sw/rxe/rxe_cq.c
+++ b/drivers/infiniband/sw/rxe/rxe_cq.c
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include <linux/vmalloc.h>
#include "rxe.h"
@@ -66,9 +39,9 @@ err1:
return -EINVAL;
}
-static void rxe_send_complete(unsigned long data)
+static void rxe_send_complete(struct tasklet_struct *t)
{
- struct rxe_cq *cq = (struct rxe_cq *)data;
+ struct rxe_cq *cq = from_tasklet(cq, t, comp_task);
unsigned long flags;
spin_lock_irqsave(&cq->cq_lock, flags);
@@ -107,7 +80,7 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
cq->is_dying = false;
- tasklet_init(&cq->comp_task, rxe_send_complete, (unsigned long)cq);
+ tasklet_setup(&cq->comp_task, rxe_send_complete);
spin_lock_init(&cq->cq_lock);
cq->ibcq.cqe = cqe;
diff --git a/drivers/infiniband/sw/rxe/rxe_hdr.h b/drivers/infiniband/sw/rxe/rxe_hdr.h
index ce003666b800..3b483b75dfe3 100644
--- a/drivers/infiniband/sw/rxe/rxe_hdr.h
+++ b/drivers/infiniband/sw/rxe/rxe_hdr.h
@@ -1,34 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#ifndef RXE_HDR_H
diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.c b/drivers/infiniband/sw/rxe/rxe_hw_counters.c
index 636edb5f4cf4..ac9154f0593d 100644
--- a/drivers/infiniband/sw/rxe/rxe_hw_counters.c
+++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.c
@@ -1,33 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include "rxe.h"
diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.h b/drivers/infiniband/sw/rxe/rxe_hw_counters.h
index 72c0d63c79e0..49ee6f96656d 100644
--- a/drivers/infiniband/sw/rxe/rxe_hw_counters.h
+++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.h
@@ -1,33 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
* Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#ifndef RXE_HW_COUNTERS_H
diff --git a/drivers/infiniband/sw/rxe/rxe_icrc.c b/drivers/infiniband/sw/rxe/rxe_icrc.c
index 39e0be31aab1..66b2aad54bb7 100644
--- a/drivers/infiniband/sw/rxe/rxe_icrc.c
+++ b/drivers/infiniband/sw/rxe/rxe_icrc.c
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include "rxe.h"
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 39dc3bfa5d5d..0d758760b9ae 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -1,34 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#ifndef RXE_LOC_H
diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c
index 522a7942c56c..c02315aed8d1 100644
--- a/drivers/infiniband/sw/rxe/rxe_mcast.c
+++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include "rxe.h"
diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c
index 7887f623f62c..035f226af133 100644
--- a/drivers/infiniband/sw/rxe/rxe_mmap.c
+++ b/drivers/infiniband/sw/rxe/rxe_mmap.c
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include <linux/module.h>
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index ce24144de16a..d2ce852447c1 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include "rxe.h"
@@ -79,13 +52,8 @@ static void rxe_mem_init(int access, struct rxe_mem *mem)
u32 lkey = mem->pelem.index << 8 | rxe_get_key();
u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
- if (mem->pelem.pool->type == RXE_TYPE_MR) {
- mem->ibmr.lkey = lkey;
- mem->ibmr.rkey = rkey;
- }
-
- mem->lkey = lkey;
- mem->rkey = rkey;
+ mem->ibmr.lkey = lkey;
+ mem->ibmr.rkey = rkey;
mem->state = RXE_MEM_STATE_INVALID;
mem->type = RXE_MEM_TYPE_NONE;
mem->map_shift = ilog2(RXE_BUF_PER_MAP);
@@ -149,7 +117,7 @@ void rxe_mem_init_dma(struct rxe_pd *pd,
{
rxe_mem_init(access, mem);
- mem->pd = pd;
+ mem->ibmr.pd = &pd->ibpd;
mem->access = access;
mem->state = RXE_MEM_STATE_VALID;
mem->type = RXE_MEM_TYPE_DMA;
@@ -218,7 +186,7 @@ int rxe_mem_init_user(struct rxe_pd *pd, u64 start,
}
}
- mem->pd = pd;
+ mem->ibmr.pd = &pd->ibpd;
mem->umem = umem;
mem->access = access;
mem->length = length;
@@ -248,7 +216,7 @@ int rxe_mem_init_fast(struct rxe_pd *pd,
if (err)
goto err1;
- mem->pd = pd;
+ mem->ibmr.pd = &pd->ibpd;
mem->max_buf = max_pages;
mem->state = RXE_MEM_STATE_FREE;
mem->type = RXE_MEM_TYPE_MR;
@@ -368,7 +336,7 @@ int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length,
memcpy(dest, src, length);
if (crcp)
- *crcp = rxe_crc32(to_rdev(mem->pd->ibpd.device),
+ *crcp = rxe_crc32(to_rdev(mem->ibmr.device),
*crcp, dest, length);
return 0;
@@ -402,7 +370,7 @@ int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length,
memcpy(dest, src, bytes);
if (crcp)
- crc = rxe_crc32(to_rdev(mem->pd->ibpd.device),
+ crc = rxe_crc32(to_rdev(mem->ibmr.device),
crc, dest, bytes);
length -= bytes;
@@ -575,9 +543,9 @@ struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key,
if (!mem)
return NULL;
- if (unlikely((type == lookup_local && mem->lkey != key) ||
- (type == lookup_remote && mem->rkey != key) ||
- mem->pd != pd ||
+ if (unlikely((type == lookup_local && mr_lkey(mem) != key) ||
+ (type == lookup_remote && mr_rkey(mem) != key) ||
+ mr_pd(mem) != pd ||
(access && !(access & mem->access)) ||
mem->state != RXE_MEM_STATE_VALID)) {
rxe_drop_ref(mem);
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 0c3808611f95..575e1a4ec821 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include <linux/skbuff.h>
@@ -120,7 +93,7 @@ static struct dst_entry *rxe_find_route6(struct net_device *ndev,
ndst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(recv_sockets.sk6->sk),
recv_sockets.sk6->sk, &fl6,
NULL);
- if (unlikely(IS_ERR(ndst))) {
+ if (IS_ERR(ndst)) {
pr_err_ratelimited("no route to %pI6\n", daddr);
return NULL;
}
@@ -160,14 +133,14 @@ static struct dst_entry *rxe_find_route(struct net_device *ndev,
if (dst)
dst_release(dst);
- if (av->network_type == RDMA_NETWORK_IPV4) {
+ if (av->network_type == RXE_NETWORK_TYPE_IPV4) {
struct in_addr *saddr;
struct in_addr *daddr;
saddr = &av->sgid_addr._sockaddr_in.sin_addr;
daddr = &av->dgid_addr._sockaddr_in.sin_addr;
dst = rxe_find_route4(ndev, saddr, daddr);
- } else if (av->network_type == RDMA_NETWORK_IPV6) {
+ } else if (av->network_type == RXE_NETWORK_TYPE_IPV6) {
struct in6_addr *saddr6;
struct in6_addr *daddr6;
@@ -469,7 +442,7 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
if (IS_ERR(attr))
return NULL;
- if (av->network_type == RDMA_NETWORK_IPV4)
+ if (av->network_type == RXE_NETWORK_TYPE_IPV6)
hdr_len = ETH_HLEN + sizeof(struct udphdr) +
sizeof(struct iphdr);
else
@@ -496,7 +469,7 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
skb->dev = ndev;
rcu_read_unlock();
- if (av->network_type == RDMA_NETWORK_IPV4)
+ if (av->network_type == RXE_NETWORK_TYPE_IPV4)
skb->protocol = htons(ETH_P_IP);
else
skb->protocol = htons(ETH_P_IPV6);
diff --git a/drivers/infiniband/sw/rxe/rxe_net.h b/drivers/infiniband/sw/rxe/rxe_net.h
index 2ca71d3d245c..45d80d00f86b 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.h
+++ b/drivers/infiniband/sw/rxe/rxe_net.h
@@ -1,34 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#ifndef RXE_NET_H
diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.c b/drivers/infiniband/sw/rxe/rxe_opcode.c
index 4cf11063e0b5..0cb4b01fd910 100644
--- a/drivers/infiniband/sw/rxe/rxe_opcode.c
+++ b/drivers/infiniband/sw/rxe/rxe_opcode.c
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include <rdma/ib_pack.h>
diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.h b/drivers/infiniband/sw/rxe/rxe_opcode.h
index 307604e9c78d..1041ac9a9233 100644
--- a/drivers/infiniband/sw/rxe/rxe_opcode.h
+++ b/drivers/infiniband/sw/rxe/rxe_opcode.h
@@ -1,34 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#ifndef RXE_OPCODE_H
diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
index 2f381aeafcb5..25ab50d9b7c2 100644
--- a/drivers/infiniband/sw/rxe/rxe_param.h
+++ b/drivers/infiniband/sw/rxe/rxe_param.h
@@ -1,34 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#ifndef RXE_PARAM_H
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index fbcbac52290b..b374eb53e2fe 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include "rxe.h"
@@ -110,62 +83,6 @@ static inline const char *pool_name(struct rxe_pool *pool)
return rxe_type_info[pool->type].name;
}
-static inline struct kmem_cache *pool_cache(struct rxe_pool *pool)
-{
- return rxe_type_info[pool->type].cache;
-}
-
-static void rxe_cache_clean(size_t cnt)
-{
- int i;
- struct rxe_type_info *type;
-
- for (i = 0; i < cnt; i++) {
- type = &rxe_type_info[i];
- if (!(type->flags & RXE_POOL_NO_ALLOC)) {
- kmem_cache_destroy(type->cache);
- type->cache = NULL;
- }
- }
-}
-
-int rxe_cache_init(void)
-{
- int err;
- int i;
- size_t size;
- struct rxe_type_info *type;
-
- for (i = 0; i < RXE_NUM_TYPES; i++) {
- type = &rxe_type_info[i];
- size = ALIGN(type->size, RXE_POOL_ALIGN);
- if (!(type->flags & RXE_POOL_NO_ALLOC)) {
- type->cache =
- kmem_cache_create(type->name, size,
- RXE_POOL_ALIGN,
- RXE_POOL_CACHE_FLAGS, NULL);
- if (!type->cache) {
- pr_err("Unable to init kmem cache for %s\n",
- type->name);
- err = -ENOMEM;
- goto err1;
- }
- }
- }
-
- return 0;
-
-err1:
- rxe_cache_clean(i);
-
- return err;
-}
-
-void rxe_cache_exit(void)
-{
- rxe_cache_clean(RXE_NUM_TYPES);
-}
-
static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min)
{
int err = 0;
@@ -406,7 +323,7 @@ void *rxe_alloc(struct rxe_pool *pool)
if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
goto out_cnt;
- elem = kmem_cache_zalloc(pool_cache(pool),
+ elem = kzalloc(rxe_type_info[pool->type].size,
(pool->flags & RXE_POOL_ATOMIC) ?
GFP_ATOMIC : GFP_KERNEL);
if (!elem)
@@ -468,7 +385,7 @@ void rxe_elem_release(struct kref *kref)
pool->cleanup(elem);
if (!(pool->flags & RXE_POOL_NO_ALLOC))
- kmem_cache_free(pool_cache(pool), elem);
+ kfree(elem);
atomic_dec(&pool->num_elem);
ib_device_put(&pool->rxe->ib_dev);
rxe_pool_put(pool);
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h
index 2f2cff1cbe43..432745ffc8d4 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.h
+++ b/drivers/infiniband/sw/rxe/rxe_pool.h
@@ -1,34 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#ifndef RXE_POOL_H
@@ -69,7 +42,6 @@ struct rxe_type_info {
u32 min_index;
size_t key_offset;
size_t key_size;
- struct kmem_cache *cache;
};
extern struct rxe_type_info rxe_type_info[];
@@ -113,12 +85,6 @@ struct rxe_pool {
size_t key_size;
};
-/* initialize slab caches for managed objects */
-int rxe_cache_init(void);
-
-/* cleanup slab caches for managed objects */
-void rxe_cache_exit(void);
-
/* initialize a pool of objects with given limit on
* number of elements. gets parameters from rxe_type_info
* pool elements will be allocated out of a slab cache
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index 6c11c3aeeca6..656a5b4be847 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include <linux/skbuff.h>
@@ -628,9 +601,8 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
if (mask & IB_QP_QKEY)
qp->attr.qkey = attr->qkey;
- if (mask & IB_QP_AV) {
+ if (mask & IB_QP_AV)
rxe_init_av(&attr->ah_attr, &qp->pri_av);
- }
if (mask & IB_QP_ALT_PATH) {
rxe_init_av(&attr->alt_ah_attr, &qp->alt_av);
diff --git a/drivers/infiniband/sw/rxe/rxe_queue.c b/drivers/infiniband/sw/rxe/rxe_queue.c
index 245040c3a35d..fa69241b1187 100644
--- a/drivers/infiniband/sw/rxe/rxe_queue.c
+++ b/drivers/infiniband/sw/rxe/rxe_queue.c
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must retailuce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include <linux/vmalloc.h>
diff --git a/drivers/infiniband/sw/rxe/rxe_queue.h b/drivers/infiniband/sw/rxe/rxe_queue.h
index 8ef17d617022..7d434a6837a7 100644
--- a/drivers/infiniband/sw/rxe/rxe_queue.h
+++ b/drivers/infiniband/sw/rxe/rxe_queue.h
@@ -1,34 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#ifndef RXE_QUEUE_H
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index 7e123d3c4d09..c9984a28eecc 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include <linux/skbuff.h>
@@ -260,6 +233,8 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
struct rxe_mc_elem *mce;
struct rxe_qp *qp;
union ib_gid dgid;
+ struct sk_buff *per_qp_skb;
+ struct rxe_pkt_info *per_qp_pkt;
int err;
if (skb->protocol == htons(ETH_P_IP))
@@ -288,26 +263,44 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
if (err)
continue;
- /* if *not* the last qp in the list
- * increase the users of the skb then post to the next qp
+ /* for all but the last qp create a new clone of the
+ * skb and pass to the qp.
*/
if (mce->qp_list.next != &mcg->qp_list)
- skb_get(skb);
+ per_qp_skb = skb_clone(skb, GFP_ATOMIC);
+ else
+ per_qp_skb = skb;
+
+ if (unlikely(!per_qp_skb))
+ continue;
- pkt->qp = qp;
+ per_qp_pkt = SKB_TO_PKT(per_qp_skb);
+ per_qp_pkt->qp = qp;
rxe_add_ref(qp);
- rxe_rcv_pkt(pkt, skb);
+ rxe_rcv_pkt(per_qp_pkt, per_qp_skb);
}
spin_unlock_bh(&mcg->mcg_lock);
rxe_drop_ref(mcg); /* drop ref from rxe_pool_get_key. */
+ return;
+
err1:
kfree_skb(skb);
}
-static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb)
+/**
+ * rxe_chk_dgid - validate destination IP address
+ * @rxe: rxe device that received packet
+ * @skb: the received packet buffer
+ *
+ * Accept any loopback packets
+ * Extract IP address from packet and
+ * Accept if multicast packet
+ * Accept if matches an SGID table entry
+ */
+static int rxe_chk_dgid(struct rxe_dev *rxe, struct sk_buff *skb)
{
struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
const struct ib_gid_attr *gid_attr;
@@ -325,6 +318,9 @@ static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb)
pdgid = (union ib_gid *)&ipv6_hdr(skb)->daddr;
}
+ if (rdma_is_multicast_addr((struct in6_addr *)pdgid))
+ return 0;
+
gid_attr = rdma_find_gid_by_port(&rxe->ib_dev, pdgid,
IB_GID_TYPE_ROCE_UDP_ENCAP,
1, skb->dev);
@@ -349,8 +345,8 @@ void rxe_rcv(struct sk_buff *skb)
if (unlikely(skb->len < pkt->offset + RXE_BTH_BYTES))
goto drop;
- if (rxe_match_dgid(rxe, skb) < 0) {
- pr_warn_ratelimited("failed matching dgid\n");
+ if (rxe_chk_dgid(rxe, skb) < 0) {
+ pr_warn_ratelimited("failed checking dgid\n");
goto drop;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 34df2b55e650..af3923bf0a36 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include <linux/skbuff.h>
@@ -644,8 +617,8 @@ next_wqe:
rmr->state = RXE_MEM_STATE_VALID;
rmr->access = wqe->wr.wr.reg.access;
- rmr->lkey = wqe->wr.wr.reg.key;
- rmr->rkey = wqe->wr.wr.reg.key;
+ rmr->ibmr.lkey = wqe->wr.wr.reg.key;
+ rmr->ibmr.rkey = wqe->wr.wr.reg.key;
rmr->iova = wqe->wr.wr.reg.mr->iova;
wqe->state = wqe_state_done;
wqe->status = IB_WC_SUCCESS;
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index c4a8195bf670..c7e3b6a4af38 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include <linux/skbuff.h>
diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c
index d8459431534e..41b0d1e11baf 100644
--- a/drivers/infiniband/sw/rxe/rxe_srq.c
+++ b/drivers/infiniband/sw/rxe/rxe_srq.c
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include <linux/vmalloc.h>
diff --git a/drivers/infiniband/sw/rxe/rxe_sysfs.c b/drivers/infiniband/sw/rxe/rxe_sysfs.c
index 2af31d421bfc..666202ddff48 100644
--- a/drivers/infiniband/sw/rxe/rxe_sysfs.c
+++ b/drivers/infiniband/sw/rxe/rxe_sysfs.c
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include "rxe.h"
@@ -78,6 +51,12 @@ static int rxe_param_set_add(const char *val, const struct kernel_param *kp)
return -EINVAL;
}
+ if (is_vlan_dev(ndev)) {
+ pr_err("rxe creation allowed on top of a real device only\n");
+ err = -EPERM;
+ goto err;
+ }
+
exists = rxe_get_dev_from_net(ndev);
if (exists) {
ib_device_put(&exists->ib_dev);
diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c
index ecdac3f8fcc9..6951fdcb31bf 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.c
+++ b/drivers/infiniband/sw/rxe/rxe_task.c
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include <linux/kernel.h>
@@ -55,12 +28,12 @@ int __rxe_do_task(struct rxe_task *task)
* a second caller finds the task already running
* but looks just after the last call to func
*/
-void rxe_do_task(unsigned long data)
+void rxe_do_task(struct tasklet_struct *t)
{
int cont;
int ret;
unsigned long flags;
- struct rxe_task *task = (struct rxe_task *)data;
+ struct rxe_task *task = from_tasklet(task, t, tasklet);
spin_lock_irqsave(&task->state_lock, flags);
switch (task->state) {
@@ -123,7 +96,7 @@ int rxe_init_task(void *obj, struct rxe_task *task,
snprintf(task->name, sizeof(task->name), "%s", name);
task->destroyed = false;
- tasklet_init(&task->tasklet, rxe_do_task, (unsigned long)task);
+ tasklet_setup(&task->tasklet, rxe_do_task);
task->state = TASK_STATE_START;
spin_lock_init(&task->state_lock);
@@ -159,7 +132,7 @@ void rxe_run_task(struct rxe_task *task, int sched)
if (sched)
tasklet_schedule(&task->tasklet);
else
- rxe_do_task((unsigned long)task);
+ rxe_do_task(&task->tasklet);
}
void rxe_disable_task(struct rxe_task *task)
diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h
index 08ff42d451c6..11d183fd3338 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.h
+++ b/drivers/infiniband/sw/rxe/rxe_task.h
@@ -1,34 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#ifndef RXE_TASK_H
@@ -60,7 +33,7 @@ struct rxe_task {
/*
* init rxe_task structure
* arg => parameter to pass to fcn
- * fcn => function to call until it returns != 0
+ * func => function to call until it returns != 0
*/
int rxe_init_task(void *obj, struct rxe_task *task,
void *arg, int (*func)(void *), char *name);
@@ -80,7 +53,7 @@ int __rxe_do_task(struct rxe_task *task);
* work to do someone must reschedule the task before
* leaving
*/
-void rxe_do_task(unsigned long data);
+void rxe_do_task(struct tasklet_struct *t);
/* run a task, else schedule it to run as a tasklet, The decision
* to run or schedule tasklet is based on the parameter sched.
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 8522e9a3e914..1fc022362fbe 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include <linux/dma-mapping.h>
@@ -175,11 +148,12 @@ static int rxe_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
return rxe_add_to_pool(&rxe->pd_pool, &pd->pelem);
}
-static void rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+static int rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
struct rxe_pd *pd = to_rpd(ibpd);
rxe_drop_ref(pd);
+ return 0;
}
static int rxe_create_ah(struct ib_ah *ibah,
@@ -227,11 +201,12 @@ static int rxe_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
return 0;
}
-static void rxe_destroy_ah(struct ib_ah *ibah, u32 flags)
+static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags)
{
struct rxe_ah *ah = to_rah(ibah);
rxe_drop_ref(ah);
+ return 0;
}
static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
@@ -365,7 +340,7 @@ static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
return 0;
}
-static void rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
+static int rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
{
struct rxe_srq *srq = to_rsrq(ibsrq);
@@ -374,6 +349,7 @@ static void rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
rxe_drop_ref(srq->pd);
rxe_drop_ref(srq);
+ return 0;
}
static int rxe_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
@@ -803,13 +779,14 @@ static int rxe_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
return rxe_add_to_pool(&rxe->cq_pool, &cq->pelem);
}
-static void rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
{
struct rxe_cq *cq = to_rcq(ibcq);
rxe_cq_disable(cq);
rxe_drop_ref(cq);
+ return 0;
}
static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
@@ -944,7 +921,7 @@ static int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
struct rxe_mem *mr = to_rmr(ibmr);
mr->state = RXE_MEM_STATE_ZOMBIE;
- rxe_drop_ref(mr->pd);
+ rxe_drop_ref(mr_pd(mr));
rxe_drop_index(mr);
rxe_drop_ref(mr);
return 0;
@@ -1151,12 +1128,9 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name)
dev->local_dma_lkey = 0;
addrconf_addr_eui48((unsigned char *)&dev->node_guid,
rxe->ndev->dev_addr);
- dev->dev.dma_ops = &dma_virt_ops;
dev->dev.dma_parms = &rxe->dma_parms;
- rxe->dma_parms = (struct device_dma_parameters)
- { .max_segment_size = SZ_2G };
- dma_coerce_mask_and_coherent(&dev->dev,
- dma_get_required_mask(&dev->dev));
+ dma_set_max_seg_size(&dev->dev, UINT_MAX);
+ dma_set_coherent_mask(&dev->dev, dma_get_required_mask(&dev->dev));
dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT)
| BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL)
@@ -1205,7 +1179,7 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name)
rxe->tfm = tfm;
rdma_set_device_sysfs_group(dev, &rxe_attr_group);
- err = ib_register_device(dev, ibdev_name);
+ err = ib_register_device(dev, ibdev_name, NULL);
if (err)
pr_warn("%s failed with error %d\n", __func__, err);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index c664c7f36ab5..3414b341b709 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -1,34 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#ifndef RXE_VERBS_H
@@ -322,12 +295,8 @@ struct rxe_mem {
struct ib_mw ibmw;
};
- struct rxe_pd *pd;
struct ib_umem *umem;
- u32 lkey;
- u32 rkey;
-
enum rxe_mem_state state;
enum rxe_mem_type type;
u64 va;
@@ -465,6 +434,21 @@ static inline struct rxe_mem *to_rmw(struct ib_mw *mw)
return mw ? container_of(mw, struct rxe_mem, ibmw) : NULL;
}
+static inline struct rxe_pd *mr_pd(struct rxe_mem *mr)
+{
+ return to_rpd(mr->ibmr.pd);
+}
+
+static inline u32 mr_lkey(struct rxe_mem *mr)
+{
+ return mr->ibmr.lkey;
+}
+
+static inline u32 mr_rkey(struct rxe_mem *mr)
+{
+ return mr->ibmr.rkey;
+}
+
int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name);
void rxe_mc_cleanup(struct rxe_pool_entry *arg);
diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c
index d862bec84376..ca8bc7296867 100644
--- a/drivers/infiniband/sw/siw/siw_main.c
+++ b/drivers/infiniband/sw/siw/siw_main.c
@@ -69,7 +69,7 @@ static int siw_device_register(struct siw_device *sdev, const char *name)
sdev->vendor_part_id = dev_id++;
- rv = ib_register_device(base_dev, name);
+ rv = ib_register_device(base_dev, name, NULL);
if (rv) {
pr_warn("siw: device registration error %d\n", rv);
return rv;
@@ -382,10 +382,10 @@ static struct siw_device *siw_device_create(struct net_device *netdev)
*/
base_dev->phys_port_cnt = 1;
base_dev->dev.parent = parent;
- base_dev->dev.dma_ops = &dma_virt_ops;
base_dev->dev.dma_parms = &sdev->dma_parms;
- sdev->dma_parms = (struct device_dma_parameters)
- { .max_segment_size = SZ_2G };
+ dma_set_max_seg_size(&base_dev->dev, UINT_MAX);
+ dma_set_coherent_mask(&base_dev->dev,
+ dma_get_required_mask(&base_dev->dev));
base_dev->num_comp_vectors = num_possible_cpus();
xa_init_flags(&sdev->qp_xa, XA_FLAGS_ALLOC1);
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index adafa1b8bebe..7cf3242ffb41 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -234,12 +234,13 @@ int siw_alloc_pd(struct ib_pd *pd, struct ib_udata *udata)
return 0;
}
-void siw_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
+int siw_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
{
struct siw_device *sdev = to_siw_dev(pd->device);
siw_dbg_pd(pd, "free PD\n");
atomic_dec(&sdev->num_pd);
+ return 0;
}
void siw_qp_get_ref(struct ib_qp *base_qp)
@@ -1055,7 +1056,7 @@ int siw_post_receive(struct ib_qp *base_qp, const struct ib_recv_wr *wr,
return rv > 0 ? 0 : rv;
}
-void siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata)
+int siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata)
{
struct siw_cq *cq = to_siw_cq(base_cq);
struct siw_device *sdev = to_siw_dev(base_cq->device);
@@ -1073,6 +1074,7 @@ void siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata)
atomic_dec(&sdev->num_cq);
vfree(cq->queue);
+ return 0;
}
/*
@@ -1690,7 +1692,7 @@ int siw_query_srq(struct ib_srq *base_srq, struct ib_srq_attr *attrs)
* QP anymore - the code trusts the RDMA core environment to keep track
* of QP references.
*/
-void siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata)
+int siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata)
{
struct siw_srq *srq = to_siw_srq(base_srq);
struct siw_device *sdev = to_siw_dev(base_srq->device);
@@ -1702,6 +1704,7 @@ void siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata)
rdma_user_mmap_entry_remove(srq->srq_entry);
vfree(srq->recvq);
atomic_dec(&sdev->num_srq);
+ return 0;
}
/*
diff --git a/drivers/infiniband/sw/siw/siw_verbs.h b/drivers/infiniband/sw/siw/siw_verbs.h
index d9572275a6b6..637454529357 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.h
+++ b/drivers/infiniband/sw/siw/siw_verbs.h
@@ -49,7 +49,7 @@ int siw_query_port(struct ib_device *base_dev, u8 port,
int siw_query_gid(struct ib_device *base_dev, u8 port, int idx,
union ib_gid *gid);
int siw_alloc_pd(struct ib_pd *base_pd, struct ib_udata *udata);
-void siw_dealloc_pd(struct ib_pd *base_pd, struct ib_udata *udata);
+int siw_dealloc_pd(struct ib_pd *base_pd, struct ib_udata *udata);
struct ib_qp *siw_create_qp(struct ib_pd *base_pd,
struct ib_qp_init_attr *attr,
struct ib_udata *udata);
@@ -62,7 +62,7 @@ int siw_post_send(struct ib_qp *base_qp, const struct ib_send_wr *wr,
const struct ib_send_wr **bad_wr);
int siw_post_receive(struct ib_qp *base_qp, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr);
-void siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata);
+int siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata);
int siw_poll_cq(struct ib_cq *base_cq, int num_entries, struct ib_wc *wc);
int siw_req_notify_cq(struct ib_cq *base_cq, enum ib_cq_notify_flags flags);
struct ib_mr *siw_reg_user_mr(struct ib_pd *base_pd, u64 start, u64 len,
@@ -78,7 +78,7 @@ int siw_create_srq(struct ib_srq *base_srq, struct ib_srq_init_attr *attr,
int siw_modify_srq(struct ib_srq *base_srq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask mask, struct ib_udata *udata);
int siw_query_srq(struct ib_srq *base_srq, struct ib_srq_attr *attr);
-void siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata);
+int siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata);
int siw_post_srq_recv(struct ib_srq *base_srq, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr);
int siw_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 7c41fb040f7c..8f0b598a46ec 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -1647,17 +1647,13 @@ int ipoib_cm_dev_init(struct net_device *dev)
void ipoib_cm_dev_cleanup(struct net_device *dev)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
- int ret;
if (!priv->cm.srq)
return;
ipoib_dbg(priv, "Cleanup ipoib connected mode.\n");
- ret = ib_destroy_srq(priv->cm.srq);
- if (ret)
- ipoib_warn(priv, "ib_destroy_srq failed: %d\n", ret);
-
+ ib_destroy_srq(priv->cm.srq);
priv->cm.srq = NULL;
if (!priv->cm.srq_ring)
return;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
index 64c19f6fa931..12ba7a0fe0b5 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
@@ -124,35 +124,14 @@ static int ipoib_mcg_seq_show(struct seq_file *file, void *iter_ptr)
return 0;
}
-static const struct seq_operations ipoib_mcg_seq_ops = {
+static const struct seq_operations ipoib_mcg_sops = {
.start = ipoib_mcg_seq_start,
.next = ipoib_mcg_seq_next,
.stop = ipoib_mcg_seq_stop,
.show = ipoib_mcg_seq_show,
};
-static int ipoib_mcg_open(struct inode *inode, struct file *file)
-{
- struct seq_file *seq;
- int ret;
-
- ret = seq_open(file, &ipoib_mcg_seq_ops);
- if (ret)
- return ret;
-
- seq = file->private_data;
- seq->private = inode->i_private;
-
- return 0;
-}
-
-static const struct file_operations ipoib_mcg_fops = {
- .owner = THIS_MODULE,
- .open = ipoib_mcg_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release
-};
+DEFINE_SEQ_ATTRIBUTE(ipoib_mcg);
static void *ipoib_path_seq_start(struct seq_file *file, loff_t *pos)
{
@@ -229,35 +208,14 @@ static int ipoib_path_seq_show(struct seq_file *file, void *iter_ptr)
return 0;
}
-static const struct seq_operations ipoib_path_seq_ops = {
+static const struct seq_operations ipoib_path_sops = {
.start = ipoib_path_seq_start,
.next = ipoib_path_seq_next,
.stop = ipoib_path_seq_stop,
.show = ipoib_path_seq_show,
};
-static int ipoib_path_open(struct inode *inode, struct file *file)
-{
- struct seq_file *seq;
- int ret;
-
- ret = seq_open(file, &ipoib_path_seq_ops);
- if (ret)
- return ret;
-
- seq = file->private_data;
- seq->private = inode->i_private;
-
- return 0;
-}
-
-static const struct file_operations ipoib_path_fops = {
- .owner = THIS_MODULE,
- .open = ipoib_path_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release
-};
+DEFINE_SEQ_ATTRIBUTE(ipoib_path);
void ipoib_create_debug_files(struct net_device *dev)
{
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index f772fe8c5b66..abfab89423f4 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -2480,6 +2480,8 @@ static struct net_device *ipoib_add_port(const char *format,
/* call event handler to ensure pkey in sync */
queue_work(ipoib_workqueue, &priv->flush_heavy);
+ ndev->rtnl_link_ops = ipoib_get_link_ops();
+
result = register_netdev(ndev);
if (result) {
pr_warn("%s: couldn't register ipoib port %d; error %d\n",
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
index 38c984d16996..d5a90a66b45c 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
@@ -144,6 +144,16 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,
return 0;
}
+static void ipoib_del_child_link(struct net_device *dev, struct list_head *head)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+
+ if (!priv->parent)
+ return;
+
+ unregister_netdevice_queue(dev, head);
+}
+
static size_t ipoib_get_size(const struct net_device *dev)
{
return nla_total_size(2) + /* IFLA_IPOIB_PKEY */
@@ -158,6 +168,7 @@ static struct rtnl_link_ops ipoib_link_ops __read_mostly = {
.priv_size = sizeof(struct ipoib_dev_priv),
.setup = ipoib_setup_common,
.newlink = ipoib_new_child_link,
+ .dellink = ipoib_del_child_link,
.changelink = ipoib_changelink,
.get_size = ipoib_get_size,
.fill_info = ipoib_fill_info,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 30865605e098..4c50a87ed7cc 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -195,6 +195,8 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
}
priv = ipoib_priv(ndev);
+ ndev->rtnl_link_ops = ipoib_get_link_ops();
+
result = __ipoib_vlan_add(ppriv, priv, pkey, IPOIB_LEGACY_CHILD);
if (result && ndev->reg_state == NETREG_UNINITIALIZED)
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 695f701dc43d..436e17f1d0e5 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -1141,12 +1141,7 @@ isert_handle_iscsi_dataout(struct isert_conn *isert_conn,
* multiple data-outs on the same command can arrive -
* so post the buffer before hand
*/
- rc = isert_post_recv(isert_conn, rx_desc);
- if (rc) {
- isert_err("ib_post_recv failed with %d\n", rc);
- return rc;
- }
- return 0;
+ return isert_post_recv(isert_conn, rx_desc);
}
static int
@@ -1723,10 +1718,8 @@ isert_post_response(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd)
int ret;
ret = isert_post_recv(isert_conn, isert_cmd->rx_desc);
- if (ret) {
- isert_err("ib_post_recv failed with %d\n", ret);
+ if (ret)
return ret;
- }
ret = ib_post_send(isert_conn->qp, &isert_cmd->tx_desc.send_wr, NULL);
if (ret) {
@@ -2098,10 +2091,8 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
&isert_cmd->tx_desc.send_wr);
rc = isert_post_recv(isert_conn, isert_cmd->rx_desc);
- if (rc) {
- isert_err("ib_post_recv failed with %d\n", rc);
+ if (rc)
return rc;
- }
chain_wr = &isert_cmd->tx_desc.send_wr;
}
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c
index 298b747d0330..ac4c49cbf153 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c
@@ -312,7 +312,7 @@ static struct attribute *rtrs_clt_stats_attrs[] = {
NULL
};
-static struct attribute_group rtrs_clt_stats_attr_group = {
+static const struct attribute_group rtrs_clt_stats_attr_group = {
.attrs = rtrs_clt_stats_attrs,
};
@@ -388,7 +388,7 @@ static struct attribute *rtrs_clt_sess_attrs[] = {
NULL,
};
-static struct attribute_group rtrs_clt_sess_attr_group = {
+static const struct attribute_group rtrs_clt_sess_attr_group = {
.attrs = rtrs_clt_sess_attrs,
};
@@ -460,7 +460,7 @@ static struct attribute *rtrs_clt_attrs[] = {
NULL,
};
-static struct attribute_group rtrs_clt_attr_group = {
+static const struct attribute_group rtrs_clt_attr_group = {
.attrs = rtrs_clt_attrs,
};
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-pri.h b/drivers/infiniband/ulp/rtrs/rtrs-pri.h
index 0a93c87ef92b..b8e43dc4d95a 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-pri.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs-pri.h
@@ -115,7 +115,6 @@ struct rtrs_sess {
/* rtrs information unit */
struct rtrs_iu {
- struct list_head list;
struct ib_cqe cqe;
dma_addr_t dma_addr;
void *buf;
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
index cf6a2be61695..07fbb063555d 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
@@ -135,7 +135,7 @@ static struct attribute *rtrs_srv_sess_attrs[] = {
NULL,
};
-static struct attribute_group rtrs_srv_sess_attr_group = {
+static const struct attribute_group rtrs_srv_sess_attr_group = {
.attrs = rtrs_srv_sess_attrs,
};
@@ -148,7 +148,7 @@ static struct attribute *rtrs_srv_stats_attrs[] = {
NULL,
};
-static struct attribute_group rtrs_srv_stats_attr_group = {
+static const struct attribute_group rtrs_srv_stats_attr_group = {
.attrs = rtrs_srv_stats_attrs,
};
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
index 28f6414dfa3d..d6f93601712e 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
@@ -16,6 +16,7 @@
#include "rtrs-srv.h"
#include "rtrs-log.h"
#include <rdma/ib_cm.h>
+#include <rdma/ib_verbs.h>
MODULE_DESCRIPTION("RDMA Transport Server");
MODULE_LICENSE("GPL");
@@ -31,6 +32,7 @@ MODULE_LICENSE("GPL");
static struct rtrs_rdma_dev_pd dev_pd;
static mempool_t *chunk_pool;
struct class *rtrs_dev_class;
+static struct rtrs_srv_ib_ctx ib_ctx;
static int __read_mostly max_chunk_size = DEFAULT_MAX_CHUNK_SIZE;
static int __read_mostly sess_queue_depth = DEFAULT_SESS_QUEUE_DEPTH;
@@ -2042,6 +2044,70 @@ static void free_srv_ctx(struct rtrs_srv_ctx *ctx)
kfree(ctx);
}
+static int rtrs_srv_add_one(struct ib_device *device)
+{
+ struct rtrs_srv_ctx *ctx;
+ int ret = 0;
+
+ mutex_lock(&ib_ctx.ib_dev_mutex);
+ if (ib_ctx.ib_dev_count)
+ goto out;
+
+ /*
+ * Since our CM IDs are NOT bound to any ib device we will create them
+ * only once
+ */
+ ctx = ib_ctx.srv_ctx;
+ ret = rtrs_srv_rdma_init(ctx, ib_ctx.port);
+ if (ret) {
+ /*
+ * We errored out here.
+ * According to the ib code, if we encounter an error here then the
+ * error code is ignored, and no more calls to our ops are made.
+ */
+ pr_err("Failed to initialize RDMA connection");
+ goto err_out;
+ }
+
+out:
+ /*
+ * Keep a track on the number of ib devices added
+ */
+ ib_ctx.ib_dev_count++;
+
+err_out:
+ mutex_unlock(&ib_ctx.ib_dev_mutex);
+ return ret;
+}
+
+static void rtrs_srv_remove_one(struct ib_device *device, void *client_data)
+{
+ struct rtrs_srv_ctx *ctx;
+
+ mutex_lock(&ib_ctx.ib_dev_mutex);
+ ib_ctx.ib_dev_count--;
+
+ if (ib_ctx.ib_dev_count)
+ goto out;
+
+ /*
+ * Since our CM IDs are NOT bound to any ib device we will remove them
+ * only once, when the last device is removed
+ */
+ ctx = ib_ctx.srv_ctx;
+ rdma_destroy_id(ctx->cm_id_ip);
+ rdma_destroy_id(ctx->cm_id_ib);
+
+out:
+ mutex_unlock(&ib_ctx.ib_dev_mutex);
+}
+
+static struct ib_client rtrs_srv_client = {
+ .name = "rtrs_server",
+ .add = rtrs_srv_add_one,
+ .remove = rtrs_srv_remove_one
+};
+
/**
* rtrs_srv_open() - open RTRS server context
* @ops: callback functions
@@ -2060,7 +2126,11 @@ struct rtrs_srv_ctx *rtrs_srv_open(struct rtrs_srv_ops *ops, u16 port)
if (!ctx)
return ERR_PTR(-ENOMEM);
- err = rtrs_srv_rdma_init(ctx, port);
+ mutex_init(&ib_ctx.ib_dev_mutex);
+ ib_ctx.srv_ctx = ctx;
+ ib_ctx.port = port;
+
+ err = ib_register_client(&rtrs_srv_client);
if (err) {
free_srv_ctx(ctx);
return ERR_PTR(err);
@@ -2099,8 +2169,8 @@ static void close_ctx(struct rtrs_srv_ctx *ctx)
*/
void rtrs_srv_close(struct rtrs_srv_ctx *ctx)
{
- rdma_destroy_id(ctx->cm_id_ip);
- rdma_destroy_id(ctx->cm_id_ib);
+ ib_unregister_client(&rtrs_srv_client);
+ mutex_destroy(&ib_ctx.ib_dev_mutex);
close_ctx(ctx);
free_srv_ctx(ctx);
}
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.h b/drivers/infiniband/ulp/rtrs/rtrs-srv.h
index dc95b0932f0d..08b0b8a6eebe 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.h
@@ -118,6 +118,13 @@ struct rtrs_srv_ctx {
struct list_head srv_list;
};
+struct rtrs_srv_ib_ctx {
+ struct rtrs_srv_ctx *srv_ctx;
+ u16 port;
+ struct mutex ib_dev_mutex;
+ int ib_dev_count;
+};
+
extern struct class *rtrs_dev_class;
void close_sess(struct rtrs_srv_sess *sess);
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index 2d70d56d8e0d..404b40af31cb 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -1136,7 +1136,7 @@ error:
static void free_iommu(struct intel_iommu *iommu)
{
- if (intel_iommu_enabled && iommu->iommu.ops) {
+ if (intel_iommu_enabled && !iommu->drhd->ignored) {
iommu_device_unregister(&iommu->iommu);
iommu_device_sysfs_remove(&iommu->iommu);
}
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index 96684581a25d..94fb63a7b357 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -638,7 +638,7 @@ static void smu_expose_childs(struct work_struct *unused)
{
struct device_node *np;
- for (np = NULL; (np = of_get_next_child(smu->of_node, np)) != NULL;)
+ for_each_child_of_node(smu->of_node, np)
if (of_device_is_compatible(np, "smu-sensors"))
of_platform_device_create(np, "smu-sensors",
&smu->of_dev->dev);
@@ -1015,7 +1015,7 @@ static struct smu_sdbp_header *smu_create_sdb_partition(int id)
/* Note: Only allowed to return error code in pointers (using ERR_PTR)
* when interruptible is 1
*/
-const struct smu_sdbp_header *__smu_get_sdb_partition(int id,
+static const struct smu_sdbp_header *__smu_get_sdb_partition(int id,
unsigned int *size, int interruptible)
{
char pname[32];
diff --git a/drivers/macintosh/windfarm_lm75_sensor.c b/drivers/macintosh/windfarm_lm75_sensor.c
index 1e5fa09845e7..29f48c2028b6 100644
--- a/drivers/macintosh/windfarm_lm75_sensor.c
+++ b/drivers/macintosh/windfarm_lm75_sensor.c
@@ -152,8 +152,6 @@ static int wf_lm75_remove(struct i2c_client *client)
{
struct wf_lm75_sensor *lm = i2c_get_clientdata(client);
- DBG("wf_lm75: i2c detatch called for %s\n", lm->sens.name);
-
/* Mark client detached */
lm->i2c = NULL;
diff --git a/drivers/macintosh/windfarm_lm87_sensor.c b/drivers/macintosh/windfarm_lm87_sensor.c
index d011899c0a8a..9fab0b47cd3d 100644
--- a/drivers/macintosh/windfarm_lm87_sensor.c
+++ b/drivers/macintosh/windfarm_lm87_sensor.c
@@ -149,8 +149,6 @@ static int wf_lm87_remove(struct i2c_client *client)
{
struct wf_lm87_sensor *lm = i2c_get_clientdata(client);
- DBG("wf_lm87: i2c detatch called for %s\n", lm->sens.name);
-
/* Mark client detached */
lm->i2c = NULL;
diff --git a/drivers/macintosh/windfarm_smu_sat.c b/drivers/macintosh/windfarm_smu_sat.c
index cb75dc035616..e46e1153a0b4 100644
--- a/drivers/macintosh/windfarm_smu_sat.c
+++ b/drivers/macintosh/windfarm_smu_sat.c
@@ -216,8 +216,7 @@ static int wf_sat_probe(struct i2c_client *client,
vsens[0] = vsens[1] = -1;
isens[0] = isens[1] = -1;
- child = NULL;
- while ((child = of_get_next_child(dev, child)) != NULL) {
+ for_each_child_of_node(dev, child) {
reg = of_get_property(child, "reg", NULL);
loc = of_get_property(child, "location", NULL);
if (reg == NULL || loc == NULL)
diff --git a/drivers/macintosh/windfarm_smu_sensors.c b/drivers/macintosh/windfarm_smu_sensors.c
index 3e6059eaa138..c8706cfb83fd 100644
--- a/drivers/macintosh/windfarm_smu_sensors.c
+++ b/drivers/macintosh/windfarm_smu_sensors.c
@@ -421,8 +421,7 @@ static int __init smu_sensors_init(void)
return -ENODEV;
/* Look for sensors subdir */
- for (sensors = NULL;
- (sensors = of_get_next_child(smu, sensors)) != NULL;)
+ for_each_child_of_node(smu, sensors)
if (of_node_name_eq(sensors, "sensors"))
break;
diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile
index 60d224b723a1..2e06e02b2e03 100644
--- a/drivers/mailbox/Makefile
+++ b/drivers/mailbox/Makefile
@@ -5,7 +5,7 @@ obj-$(CONFIG_MAILBOX) += mailbox.o
obj-$(CONFIG_MAILBOX_TEST) += mailbox-test.o
-obj-$(CONFIG_ARM_MHU) += arm_mhu.o
+obj-$(CONFIG_ARM_MHU) += arm_mhu.o arm_mhu_db.o
obj-$(CONFIG_IMX_MBOX) += imx-mailbox.o
diff --git a/drivers/mailbox/arm_mhu.c b/drivers/mailbox/arm_mhu.c
index 9da236552bd7..b7fbf276eb62 100644
--- a/drivers/mailbox/arm_mhu.c
+++ b/drivers/mailbox/arm_mhu.c
@@ -113,6 +113,9 @@ static int mhu_probe(struct amba_device *adev, const struct amba_id *id)
struct device *dev = &adev->dev;
int mhu_reg[MHU_CHANS] = {MHU_LP_OFFSET, MHU_HP_OFFSET, MHU_SEC_OFFSET};
+ if (!of_device_is_compatible(dev->of_node, "arm,mhu"))
+ return -ENODEV;
+
/* Allocate memory for device */
mhu = devm_kzalloc(dev, sizeof(*mhu), GFP_KERNEL);
if (!mhu)
diff --git a/drivers/mailbox/arm_mhu_db.c b/drivers/mailbox/arm_mhu_db.c
new file mode 100644
index 000000000000..275efe4cca0c
--- /dev/null
+++ b/drivers/mailbox/arm_mhu_db.c
@@ -0,0 +1,354 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2013-2015 Fujitsu Semiconductor Ltd.
+ * Copyright (C) 2015 Linaro Ltd.
+ * Based on ARM MHU driver by Jassi Brar <jaswinder.singh@linaro.org>
+ * Copyright (C) 2020 ARM Ltd.
+ */
+
+#include <linux/amba/bus.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/mailbox_controller.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+
+#define INTR_STAT_OFS 0x0
+#define INTR_SET_OFS 0x8
+#define INTR_CLR_OFS 0x10
+
+#define MHU_LP_OFFSET 0x0
+#define MHU_HP_OFFSET 0x20
+#define MHU_SEC_OFFSET 0x200
+#define TX_REG_OFFSET 0x100
+
+#define MHU_CHANS 3 /* Secure, Non-Secure High and Low Priority */
+#define MHU_CHAN_MAX 20 /* Max channels to save on unused RAM */
+#define MHU_NUM_DOORBELLS 32
+
+struct mhu_db_link {
+ unsigned int irq;
+ void __iomem *tx_reg;
+ void __iomem *rx_reg;
+};
+
+struct arm_mhu {
+ void __iomem *base;
+ struct mhu_db_link mlink[MHU_CHANS];
+ struct mbox_controller mbox;
+ struct device *dev;
+};
+
+/**
+ * ARM MHU Mailbox allocated channel information
+ *
+ * @mhu: Pointer to parent mailbox device
+ * @pchan: Physical channel within which this doorbell resides in
+ * @doorbell: doorbell number pertaining to this channel
+ */
+struct mhu_db_channel {
+ struct arm_mhu *mhu;
+ unsigned int pchan;
+ unsigned int doorbell;
+};
+
+static inline struct mbox_chan *
+mhu_db_mbox_to_channel(struct mbox_controller *mbox, unsigned int pchan,
+ unsigned int doorbell)
+{
+ int i;
+ struct mhu_db_channel *chan_info;
+
+ for (i = 0; i < mbox->num_chans; i++) {
+ chan_info = mbox->chans[i].con_priv;
+ if (chan_info && chan_info->pchan == pchan &&
+ chan_info->doorbell == doorbell)
+ return &mbox->chans[i];
+ }
+
+ return NULL;
+}
+
+static void mhu_db_mbox_clear_irq(struct mbox_chan *chan)
+{
+ struct mhu_db_channel *chan_info = chan->con_priv;
+ void __iomem *base = chan_info->mhu->mlink[chan_info->pchan].rx_reg;
+
+ writel_relaxed(BIT(chan_info->doorbell), base + INTR_CLR_OFS);
+}
+
+static unsigned int mhu_db_mbox_irq_to_pchan_num(struct arm_mhu *mhu, int irq)
+{
+ unsigned int pchan;
+
+ for (pchan = 0; pchan < MHU_CHANS; pchan++)
+ if (mhu->mlink[pchan].irq == irq)
+ break;
+ return pchan;
+}
+
+static struct mbox_chan *
+mhu_db_mbox_irq_to_channel(struct arm_mhu *mhu, unsigned int pchan)
+{
+ unsigned long bits;
+ unsigned int doorbell;
+ struct mbox_chan *chan = NULL;
+ struct mbox_controller *mbox = &mhu->mbox;
+ void __iomem *base = mhu->mlink[pchan].rx_reg;
+
+ bits = readl_relaxed(base + INTR_STAT_OFS);
+ if (!bits)
+ /* No IRQs fired in specified physical channel */
+ return NULL;
+
+ /* An IRQ has fired, find the associated channel */
+ for (doorbell = 0; bits; doorbell++) {
+ if (!test_and_clear_bit(doorbell, &bits))
+ continue;
+
+ chan = mhu_db_mbox_to_channel(mbox, pchan, doorbell);
+ if (chan)
+ break;
+ dev_err(mbox->dev,
+ "Channel not registered: pchan: %d doorbell: %d\n",
+ pchan, doorbell);
+ }
+
+ return chan;
+}
+
+static irqreturn_t mhu_db_mbox_rx_handler(int irq, void *data)
+{
+ struct mbox_chan *chan;
+ struct arm_mhu *mhu = data;
+ unsigned int pchan = mhu_db_mbox_irq_to_pchan_num(mhu, irq);
+
+ while (NULL != (chan = mhu_db_mbox_irq_to_channel(mhu, pchan))) {
+ mbox_chan_received_data(chan, NULL);
+ mhu_db_mbox_clear_irq(chan);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static bool mhu_db_last_tx_done(struct mbox_chan *chan)
+{
+ struct mhu_db_channel *chan_info = chan->con_priv;
+ void __iomem *base = chan_info->mhu->mlink[chan_info->pchan].tx_reg;
+
+ if (readl_relaxed(base + INTR_STAT_OFS) & BIT(chan_info->doorbell))
+ return false;
+
+ return true;
+}
+
+static int mhu_db_send_data(struct mbox_chan *chan, void *data)
+{
+ struct mhu_db_channel *chan_info = chan->con_priv;
+ void __iomem *base = chan_info->mhu->mlink[chan_info->pchan].tx_reg;
+
+ /* Send event to co-processor */
+ writel_relaxed(BIT(chan_info->doorbell), base + INTR_SET_OFS);
+
+ return 0;
+}
+
+static int mhu_db_startup(struct mbox_chan *chan)
+{
+ mhu_db_mbox_clear_irq(chan);
+ return 0;
+}
+
+static void mhu_db_shutdown(struct mbox_chan *chan)
+{
+ struct mhu_db_channel *chan_info = chan->con_priv;
+ struct mbox_controller *mbox = &chan_info->mhu->mbox;
+ int i;
+
+ for (i = 0; i < mbox->num_chans; i++)
+ if (chan == &mbox->chans[i])
+ break;
+
+ if (mbox->num_chans == i) {
+ dev_warn(mbox->dev, "Request to free non-existent channel\n");
+ return;
+ }
+
+ /* Reset channel */
+ mhu_db_mbox_clear_irq(chan);
+ kfree(chan->con_priv);
+ chan->con_priv = NULL;
+}
+
+static struct mbox_chan *mhu_db_mbox_xlate(struct mbox_controller *mbox,
+ const struct of_phandle_args *spec)
+{
+ struct arm_mhu *mhu = dev_get_drvdata(mbox->dev);
+ struct mhu_db_channel *chan_info;
+ struct mbox_chan *chan;
+ unsigned int pchan = spec->args[0];
+ unsigned int doorbell = spec->args[1];
+ int i;
+
+ /* Bounds checking */
+ if (pchan >= MHU_CHANS || doorbell >= MHU_NUM_DOORBELLS) {
+ dev_err(mbox->dev,
+ "Invalid channel requested pchan: %d doorbell: %d\n",
+ pchan, doorbell);
+ return ERR_PTR(-EINVAL);
+ }
+
+ /* Is requested channel free? */
+ chan = mhu_db_mbox_to_channel(mbox, pchan, doorbell);
+ if (chan) {
+ dev_err(mbox->dev, "Channel in use: pchan: %d doorbell: %d\n",
+ pchan, doorbell);
+ return ERR_PTR(-EBUSY);
+ }
+
+ /* Find the first free slot */
+ for (i = 0; i < mbox->num_chans; i++)
+ if (!mbox->chans[i].con_priv)
+ break;
+
+ if (mbox->num_chans == i) {
+ dev_err(mbox->dev, "No free channels left\n");
+ return ERR_PTR(-EBUSY);
+ }
+
+ chan = &mbox->chans[i];
+
+ chan_info = devm_kzalloc(mbox->dev, sizeof(*chan_info), GFP_KERNEL);
+ if (!chan_info)
+ return ERR_PTR(-ENOMEM);
+
+ chan_info->mhu = mhu;
+ chan_info->pchan = pchan;
+ chan_info->doorbell = doorbell;
+
+ chan->con_priv = chan_info;
+
+ dev_dbg(mbox->dev, "mbox: created channel phys: %d doorbell: %d\n",
+ pchan, doorbell);
+
+ return chan;
+}
+
+static const struct mbox_chan_ops mhu_db_ops = {
+ .send_data = mhu_db_send_data,
+ .startup = mhu_db_startup,
+ .shutdown = mhu_db_shutdown,
+ .last_tx_done = mhu_db_last_tx_done,
+};
+
+static int mhu_db_probe(struct amba_device *adev, const struct amba_id *id)
+{
+ u32 cell_count;
+ int i, err, max_chans;
+ struct arm_mhu *mhu;
+ struct mbox_chan *chans;
+ struct device *dev = &adev->dev;
+ struct device_node *np = dev->of_node;
+ int mhu_reg[MHU_CHANS] = {
+ MHU_LP_OFFSET, MHU_HP_OFFSET, MHU_SEC_OFFSET,
+ };
+
+ if (!of_device_is_compatible(np, "arm,mhu-doorbell"))
+ return -ENODEV;
+
+ err = of_property_read_u32(np, "#mbox-cells", &cell_count);
+ if (err) {
+ dev_err(dev, "failed to read #mbox-cells in '%pOF'\n", np);
+ return err;
+ }
+
+ if (cell_count == 2) {
+ max_chans = MHU_CHAN_MAX;
+ } else {
+ dev_err(dev, "incorrect value of #mbox-cells in '%pOF'\n", np);
+ return -EINVAL;
+ }
+
+ mhu = devm_kzalloc(dev, sizeof(*mhu), GFP_KERNEL);
+ if (!mhu)
+ return -ENOMEM;
+
+ mhu->base = devm_ioremap_resource(dev, &adev->res);
+ if (IS_ERR(mhu->base)) {
+ dev_err(dev, "ioremap failed\n");
+ return PTR_ERR(mhu->base);
+ }
+
+ chans = devm_kcalloc(dev, max_chans, sizeof(*chans), GFP_KERNEL);
+ if (!chans)
+ return -ENOMEM;
+
+ mhu->dev = dev;
+ mhu->mbox.dev = dev;
+ mhu->mbox.chans = chans;
+ mhu->mbox.num_chans = max_chans;
+ mhu->mbox.txdone_irq = false;
+ mhu->mbox.txdone_poll = true;
+ mhu->mbox.txpoll_period = 1;
+
+ mhu->mbox.of_xlate = mhu_db_mbox_xlate;
+ amba_set_drvdata(adev, mhu);
+
+ mhu->mbox.ops = &mhu_db_ops;
+
+ err = devm_mbox_controller_register(dev, &mhu->mbox);
+ if (err) {
+ dev_err(dev, "Failed to register mailboxes %d\n", err);
+ return err;
+ }
+
+ for (i = 0; i < MHU_CHANS; i++) {
+ int irq = mhu->mlink[i].irq = adev->irq[i];
+
+ if (irq <= 0) {
+ dev_dbg(dev, "No IRQ found for Channel %d\n", i);
+ continue;
+ }
+
+ mhu->mlink[i].rx_reg = mhu->base + mhu_reg[i];
+ mhu->mlink[i].tx_reg = mhu->mlink[i].rx_reg + TX_REG_OFFSET;
+
+ err = devm_request_threaded_irq(dev, irq, NULL,
+ mhu_db_mbox_rx_handler,
+ IRQF_ONESHOT, "mhu_db_link", mhu);
+ if (err) {
+ dev_err(dev, "Can't claim IRQ %d\n", irq);
+ mbox_controller_unregister(&mhu->mbox);
+ return err;
+ }
+ }
+
+ dev_info(dev, "ARM MHU Doorbell mailbox registered\n");
+ return 0;
+}
+
+static struct amba_id mhu_ids[] = {
+ {
+ .id = 0x1bb098,
+ .mask = 0xffffff,
+ },
+ { 0, 0 },
+};
+MODULE_DEVICE_TABLE(amba, mhu_ids);
+
+static struct amba_driver arm_mhu_db_driver = {
+ .drv = {
+ .name = "mhu-doorbell",
+ },
+ .id_table = mhu_ids,
+ .probe = mhu_db_probe,
+};
+module_amba_driver(arm_mhu_db_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("ARM MHU Doorbell Driver");
+MODULE_AUTHOR("Sudeep Holla <sudeep.holla@arm.com>");
diff --git a/drivers/mailbox/bcm-pdc-mailbox.c b/drivers/mailbox/bcm-pdc-mailbox.c
index 53945ca5d785..5b375985f7b8 100644
--- a/drivers/mailbox/bcm-pdc-mailbox.c
+++ b/drivers/mailbox/bcm-pdc-mailbox.c
@@ -962,9 +962,9 @@ static irqreturn_t pdc_irq_handler(int irq, void *data)
* a DMA receive interrupt. Reenables the receive interrupt.
* @data: PDC state structure
*/
-static void pdc_tasklet_cb(unsigned long data)
+static void pdc_tasklet_cb(struct tasklet_struct *t)
{
- struct pdc_state *pdcs = (struct pdc_state *)data;
+ struct pdc_state *pdcs = from_tasklet(pdcs, t, rx_tasklet);
pdc_receive(pdcs);
@@ -1589,7 +1589,7 @@ static int pdc_probe(struct platform_device *pdev)
pdc_hw_init(pdcs);
/* Init tasklet for deferred DMA rx processing */
- tasklet_init(&pdcs->rx_tasklet, pdc_tasklet_cb, (unsigned long)pdcs);
+ tasklet_setup(&pdcs->rx_tasklet, pdc_tasklet_cb);
err = pdc_interrupts_init(pdcs);
if (err)
diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c
index 0b821a5b2db8..3e7d4b20ab34 100644
--- a/drivers/mailbox/mailbox.c
+++ b/drivers/mailbox/mailbox.c
@@ -82,9 +82,12 @@ static void msg_submit(struct mbox_chan *chan)
exit:
spin_unlock_irqrestore(&chan->lock, flags);
- if (!err && (chan->txdone_method & TXDONE_BY_POLL))
- /* kick start the timer immediately to avoid delays */
- hrtimer_start(&chan->mbox->poll_hrt, 0, HRTIMER_MODE_REL);
+ /* kick start the timer immediately to avoid delays */
+ if (!err && (chan->txdone_method & TXDONE_BY_POLL)) {
+ /* but only if not already active */
+ if (!hrtimer_active(&chan->mbox->poll_hrt))
+ hrtimer_start(&chan->mbox->poll_hrt, 0, HRTIMER_MODE_REL);
+ }
}
static void tx_tick(struct mbox_chan *chan, int r)
@@ -122,11 +125,10 @@ static enum hrtimer_restart txdone_hrtimer(struct hrtimer *hrtimer)
struct mbox_chan *chan = &mbox->chans[i];
if (chan->active_req && chan->cl) {
+ resched = true;
txdone = chan->mbox->ops->last_tx_done(chan);
if (txdone)
tx_tick(chan, 0);
- else
- resched = true;
}
}
diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c b/drivers/mailbox/mtk-cmdq-mailbox.c
index 484d4438cd83..5665b6ea8119 100644
--- a/drivers/mailbox/mtk-cmdq-mailbox.c
+++ b/drivers/mailbox/mtk-cmdq-mailbox.c
@@ -69,7 +69,7 @@ struct cmdq_task {
struct cmdq {
struct mbox_controller mbox;
void __iomem *base;
- u32 irq;
+ int irq;
u32 thread_nr;
u32 irq_mask;
struct cmdq_thread *thread;
@@ -525,10 +525,8 @@ static int cmdq_probe(struct platform_device *pdev)
}
cmdq->irq = platform_get_irq(pdev, 0);
- if (!cmdq->irq) {
- dev_err(dev, "failed to get irq\n");
- return -EINVAL;
- }
+ if (cmdq->irq < 0)
+ return cmdq->irq;
plat_data = (struct gce_plat *)of_device_get_match_data(dev);
if (!plat_data) {
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 25a9dd9c0c1b..2ba899f5659f 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -393,8 +393,8 @@ int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid,
*capp_unit_id = get_capp_unit_id(np, *phb_index);
of_node_put(np);
if (!*capp_unit_id) {
- pr_err("cxl: invalid capp unit id (phb_index: %d)\n",
- *phb_index);
+ pr_err("cxl: No capp unit found for PHB[%lld,%d]. Make sure the adapter is on a capi-compatible slot\n",
+ *chipid, *phb_index);
return -ENODEV;
}
diff --git a/drivers/misc/ocxl/Kconfig b/drivers/misc/ocxl/Kconfig
index 947294f6d7f4..c9b0a27caf64 100644
--- a/drivers/misc/ocxl/Kconfig
+++ b/drivers/misc/ocxl/Kconfig
@@ -9,7 +9,7 @@ config OCXL_BASE
config OCXL
tristate "OpenCAPI coherent accelerator support"
- depends on PPC_POWERNV && PCI && EEH && HOTPLUG_PCI_POWERNV
+ depends on HOTPLUG_PCI_POWERNV
select OCXL_BASE
default m
help
diff --git a/drivers/misc/ocxl/afu_irq.c b/drivers/misc/ocxl/afu_irq.c
index 70f8f1c3929d..ecdcfae025b7 100644
--- a/drivers/misc/ocxl/afu_irq.c
+++ b/drivers/misc/ocxl/afu_irq.c
@@ -2,6 +2,7 @@
// Copyright 2017 IBM Corp.
#include <linux/interrupt.h>
#include <asm/pnv-ocxl.h>
+#include <asm/xive.h>
#include "ocxl_internal.h"
#include "trace.h"
@@ -10,7 +11,6 @@ struct afu_irq {
int hw_irq;
unsigned int virq;
char *name;
- u64 trigger_page;
irqreturn_t (*handler)(void *private);
void (*free_private)(void *private);
void *private;
@@ -124,8 +124,7 @@ int ocxl_afu_irq_alloc(struct ocxl_context *ctx, int *irq_id)
goto err_unlock;
}
- rc = ocxl_link_irq_alloc(ctx->afu->fn->link, &irq->hw_irq,
- &irq->trigger_page);
+ rc = ocxl_link_irq_alloc(ctx->afu->fn->link, &irq->hw_irq);
if (rc)
goto err_idr;
@@ -196,13 +195,16 @@ void ocxl_afu_irq_free_all(struct ocxl_context *ctx)
u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, int irq_id)
{
+ struct xive_irq_data *xd;
struct afu_irq *irq;
u64 addr = 0;
mutex_lock(&ctx->irq_lock);
irq = idr_find(&ctx->irq_idr, irq_id);
- if (irq)
- addr = irq->trigger_page;
+ if (irq) {
+ xd = irq_get_handler_data(irq->virq);
+ addr = xd ? xd->trig_page : 0;
+ }
mutex_unlock(&ctx->irq_lock);
return addr;
}
diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c
index 58d111afd9f6..fd73d3bc0eb6 100644
--- a/drivers/misc/ocxl/link.c
+++ b/drivers/misc/ocxl/link.c
@@ -6,6 +6,7 @@
#include <linux/mmu_context.h>
#include <asm/copro.h>
#include <asm/pnv-ocxl.h>
+#include <asm/xive.h>
#include <misc/ocxl.h>
#include "ocxl_internal.h"
#include "trace.h"
@@ -682,23 +683,21 @@ unlock:
}
EXPORT_SYMBOL_GPL(ocxl_link_remove_pe);
-int ocxl_link_irq_alloc(void *link_handle, int *hw_irq, u64 *trigger_addr)
+int ocxl_link_irq_alloc(void *link_handle, int *hw_irq)
{
struct ocxl_link *link = (struct ocxl_link *) link_handle;
- int rc, irq;
- u64 addr;
+ int irq;
if (atomic_dec_if_positive(&link->irq_available) < 0)
return -ENOSPC;
- rc = pnv_ocxl_alloc_xive_irq(&irq, &addr);
- if (rc) {
+ irq = xive_native_alloc_irq();
+ if (!irq) {
atomic_inc(&link->irq_available);
- return rc;
+ return -ENXIO;
}
*hw_irq = irq;
- *trigger_addr = addr;
return 0;
}
EXPORT_SYMBOL_GPL(ocxl_link_irq_alloc);
@@ -707,7 +706,7 @@ void ocxl_link_free_irq(void *link_handle, int hw_irq)
{
struct ocxl_link *link = (struct ocxl_link *) link_handle;
- pnv_ocxl_free_xive_irq(hw_irq);
+ xive_native_free_irq(hw_irq);
atomic_inc(&link->irq_available);
}
EXPORT_SYMBOL_GPL(ocxl_link_free_irq);
diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig
index f96287c4b789..0f4c2d823de8 100644
--- a/drivers/mtd/devices/Kconfig
+++ b/drivers/mtd/devices/Kconfig
@@ -91,7 +91,7 @@ config MTD_MCHP23K256
config MTD_SPEAR_SMI
tristate "SPEAR MTD NOR Support through SMI controller"
- depends on PLAT_SPEAR
+ depends on PLAT_SPEAR || COMPILE_TEST
default y
help
This enable SNOR support on SPEAR platforms using SMI controller
diff --git a/drivers/mtd/devices/lart.c b/drivers/mtd/devices/lart.c
index 56f50d27b7fd..aecd441e4183 100644
--- a/drivers/mtd/devices/lart.c
+++ b/drivers/mtd/devices/lart.c
@@ -436,7 +436,10 @@ static int flash_read (struct mtd_info *mtd,loff_t from,size_t len,size_t *retle
{
int gap = BUSWIDTH - (from & (BUSWIDTH - 1));
- while (len && gap--) *buf++ = read8 (from++), len--;
+ while (len && gap--) {
+ *buf++ = read8 (from++);
+ len--;
+ }
}
/* now we read dwords until we reach a non-dword boundary */
@@ -518,7 +521,10 @@ static int flash_write (struct mtd_info *mtd,loff_t to,size_t len,size_t *retlen
i = n = 0;
while (gap--) tmp[i++] = 0xFF;
- while (len && i < BUSWIDTH) tmp[i++] = buf[n++], len--;
+ while (len && i < BUSWIDTH) {
+ tmp[i++] = buf[n++];
+ len--;
+ }
while (i < BUSWIDTH) tmp[i++] = 0xFF;
if (!write_dword (aligned,*((__u32 *) tmp))) return (-EIO);
diff --git a/drivers/mtd/devices/spear_smi.c b/drivers/mtd/devices/spear_smi.c
index 79dcca16481d..2e00862389dd 100644
--- a/drivers/mtd/devices/spear_smi.c
+++ b/drivers/mtd/devices/spear_smi.c
@@ -793,7 +793,7 @@ static int spear_smi_probe_config_dt(struct platform_device *pdev,
struct device_node *np)
{
struct spear_smi_plat_data *pdata = dev_get_platdata(&pdev->dev);
- struct device_node *pp = NULL;
+ struct device_node *pp;
const __be32 *addr;
u32 val;
int len;
@@ -812,7 +812,7 @@ static int spear_smi_probe_config_dt(struct platform_device *pdev,
return -ENOMEM;
/* Fill structs for each subnode (flash device) */
- while ((pp = of_get_next_child(np, pp))) {
+ for_each_child_of_node(np, pp) {
pdata->np[i] = pp;
/* Read base-addr and size from DT */
diff --git a/drivers/mtd/hyperbus/Kconfig b/drivers/mtd/hyperbus/Kconfig
index a4d8968d133d..46c7e407e378 100644
--- a/drivers/mtd/hyperbus/Kconfig
+++ b/drivers/mtd/hyperbus/Kconfig
@@ -22,4 +22,11 @@ config HBMC_AM654
This is the driver for HyperBus controller on TI's AM65x and
other SoCs
+config RPCIF_HYPERBUS
+ tristate "Renesas RPC-IF HyperBus driver"
+ depends on RENESAS_RPCIF
+ depends on MTD_CFI_BE_BYTE_SWAP
+ help
+ This option includes Renesas RPC-IF HyperBus support.
+
endif # MTD_HYPERBUS
diff --git a/drivers/mtd/hyperbus/Makefile b/drivers/mtd/hyperbus/Makefile
index 8a936e066f48..5fc2b5124542 100644
--- a/drivers/mtd/hyperbus/Makefile
+++ b/drivers/mtd/hyperbus/Makefile
@@ -2,3 +2,4 @@
obj-$(CONFIG_MTD_HYPERBUS) += hyperbus-core.o
obj-$(CONFIG_HBMC_AM654) += hbmc-am654.o
+obj-$(CONFIG_RPCIF_HYPERBUS) += rpc-if.o
diff --git a/drivers/mtd/hyperbus/hbmc-am654.c b/drivers/mtd/hyperbus/hbmc-am654.c
index e0e33f6bf513..a3439b791eeb 100644
--- a/drivers/mtd/hyperbus/hbmc-am654.c
+++ b/drivers/mtd/hyperbus/hbmc-am654.c
@@ -3,6 +3,10 @@
// Copyright (C) 2019 Texas Instruments Incorporated - https://www.ti.com/
// Author: Vignesh Raghavendra <vigneshr@ti.com>
+#include <linux/completion.h>
+#include <linux/dma-direction.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
#include <linux/err.h>
#include <linux/kernel.h>
#include <linux/module.h>
@@ -13,11 +17,18 @@
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/platform_device.h>
-#include <linux/pm_runtime.h>
+#include <linux/sched/task_stack.h>
#include <linux/types.h>
#define AM654_HBMC_CALIB_COUNT 25
+struct am654_hbmc_device_priv {
+ struct completion rx_dma_complete;
+ phys_addr_t device_base;
+ struct hyperbus_ctlr *ctlr;
+ struct dma_chan *rx_chan;
+};
+
struct am654_hbmc_priv {
struct hyperbus_ctlr ctlr;
struct hyperbus_device hbdev;
@@ -52,13 +63,103 @@ static int am654_hbmc_calibrate(struct hyperbus_device *hbdev)
return ret;
}
+static void am654_hbmc_dma_callback(void *param)
+{
+ struct am654_hbmc_device_priv *priv = param;
+
+ complete(&priv->rx_dma_complete);
+}
+
+static int am654_hbmc_dma_read(struct am654_hbmc_device_priv *priv, void *to,
+ unsigned long from, ssize_t len)
+
+{
+ enum dma_ctrl_flags flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
+ struct dma_chan *rx_chan = priv->rx_chan;
+ struct dma_async_tx_descriptor *tx;
+ dma_addr_t dma_dst, dma_src;
+ dma_cookie_t cookie;
+ int ret;
+
+ if (!priv->rx_chan || !virt_addr_valid(to) || object_is_on_stack(to))
+ return -EINVAL;
+
+ dma_dst = dma_map_single(rx_chan->device->dev, to, len, DMA_FROM_DEVICE);
+ if (dma_mapping_error(rx_chan->device->dev, dma_dst)) {
+ dev_dbg(priv->ctlr->dev, "DMA mapping failed\n");
+ return -EIO;
+ }
+
+ dma_src = priv->device_base + from;
+ tx = dmaengine_prep_dma_memcpy(rx_chan, dma_dst, dma_src, len, flags);
+ if (!tx) {
+ dev_err(priv->ctlr->dev, "device_prep_dma_memcpy error\n");
+ ret = -EIO;
+ goto unmap_dma;
+ }
+
+ reinit_completion(&priv->rx_dma_complete);
+ tx->callback = am654_hbmc_dma_callback;
+ tx->callback_param = priv;
+ cookie = dmaengine_submit(tx);
+
+ ret = dma_submit_error(cookie);
+ if (ret) {
+ dev_err(priv->ctlr->dev, "dma_submit_error %d\n", cookie);
+ goto unmap_dma;
+ }
+
+ dma_async_issue_pending(rx_chan);
+ if (!wait_for_completion_timeout(&priv->rx_dma_complete, msecs_to_jiffies(len + 1000))) {
+ dmaengine_terminate_sync(rx_chan);
+ dev_err(priv->ctlr->dev, "DMA wait_for_completion_timeout\n");
+ ret = -ETIMEDOUT;
+ }
+
+unmap_dma:
+ dma_unmap_single(rx_chan->device->dev, dma_dst, len, DMA_FROM_DEVICE);
+ return ret;
+}
+
+static void am654_hbmc_read(struct hyperbus_device *hbdev, void *to,
+ unsigned long from, ssize_t len)
+{
+ struct am654_hbmc_device_priv *priv = hbdev->priv;
+
+ if (len < SZ_1K || am654_hbmc_dma_read(priv, to, from, len))
+ memcpy_fromio(to, hbdev->map.virt + from, len);
+}
+
static const struct hyperbus_ops am654_hbmc_ops = {
.calibrate = am654_hbmc_calibrate,
+ .copy_from = am654_hbmc_read,
};
+static int am654_hbmc_request_mmap_dma(struct am654_hbmc_device_priv *priv)
+{
+ struct dma_chan *rx_chan;
+ dma_cap_mask_t mask;
+
+ dma_cap_zero(mask);
+ dma_cap_set(DMA_MEMCPY, mask);
+
+ rx_chan = dma_request_chan_by_mask(&mask);
+ if (IS_ERR(rx_chan)) {
+ if (PTR_ERR(rx_chan) == -EPROBE_DEFER)
+ return -EPROBE_DEFER;
+ dev_dbg(priv->ctlr->dev, "No DMA channel available\n");
+ return 0;
+ }
+ priv->rx_chan = rx_chan;
+ init_completion(&priv->rx_dma_complete);
+
+ return 0;
+}
+
static int am654_hbmc_probe(struct platform_device *pdev)
{
struct device_node *np = pdev->dev.of_node;
+ struct am654_hbmc_device_priv *dev_priv;
struct device *dev = &pdev->dev;
struct am654_hbmc_priv *priv;
struct resource res;
@@ -70,7 +171,8 @@ static int am654_hbmc_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, priv);
- ret = of_address_to_resource(np, 0, &res);
+ priv->hbdev.np = of_get_next_child(np, NULL);
+ ret = of_address_to_resource(priv->hbdev.np, 0, &res);
if (ret)
return ret;
@@ -88,13 +190,6 @@ static int am654_hbmc_probe(struct platform_device *pdev)
priv->mux_ctrl = control;
}
- pm_runtime_enable(dev);
- ret = pm_runtime_get_sync(dev);
- if (ret < 0) {
- pm_runtime_put_noidle(dev);
- goto disable_pm;
- }
-
priv->hbdev.map.size = resource_size(&res);
priv->hbdev.map.virt = devm_ioremap_resource(dev, &res);
if (IS_ERR(priv->hbdev.map.virt))
@@ -103,17 +198,32 @@ static int am654_hbmc_probe(struct platform_device *pdev)
priv->ctlr.dev = dev;
priv->ctlr.ops = &am654_hbmc_ops;
priv->hbdev.ctlr = &priv->ctlr;
- priv->hbdev.np = of_get_next_child(dev->of_node, NULL);
+
+ dev_priv = devm_kzalloc(dev, sizeof(*dev_priv), GFP_KERNEL);
+ if (!dev_priv) {
+ ret = -ENOMEM;
+ goto disable_mux;
+ }
+
+ priv->hbdev.priv = dev_priv;
+ dev_priv->device_base = res.start;
+ dev_priv->ctlr = &priv->ctlr;
+
+ ret = am654_hbmc_request_mmap_dma(dev_priv);
+ if (ret)
+ goto disable_mux;
+
ret = hyperbus_register_device(&priv->hbdev);
if (ret) {
dev_err(dev, "failed to register controller\n");
- pm_runtime_put_sync(&pdev->dev);
- goto disable_pm;
+ goto release_dma;
}
return 0;
-disable_pm:
- pm_runtime_disable(dev);
+release_dma:
+ if (dev_priv->rx_chan)
+ dma_release_channel(dev_priv->rx_chan);
+disable_mux:
if (priv->mux_ctrl)
mux_control_deselect(priv->mux_ctrl);
return ret;
@@ -122,13 +232,15 @@ disable_pm:
static int am654_hbmc_remove(struct platform_device *pdev)
{
struct am654_hbmc_priv *priv = platform_get_drvdata(pdev);
+ struct am654_hbmc_device_priv *dev_priv = priv->hbdev.priv;
int ret;
ret = hyperbus_unregister_device(&priv->hbdev);
if (priv->mux_ctrl)
mux_control_deselect(priv->mux_ctrl);
- pm_runtime_put_sync(&pdev->dev);
- pm_runtime_disable(&pdev->dev);
+
+ if (dev_priv->rx_chan)
+ dma_release_channel(dev_priv->rx_chan);
return ret;
}
diff --git a/drivers/mtd/hyperbus/rpc-if.c b/drivers/mtd/hyperbus/rpc-if.c
new file mode 100644
index 000000000000..ecb050ba95cd
--- /dev/null
+++ b/drivers/mtd/hyperbus/rpc-if.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Linux driver for RPC-IF HyperFlash
+ *
+ * Copyright (C) 2019-2020 Cogent Embedded, Inc.
+ */
+
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mtd/hyperbus.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mux/consumer.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/types.h>
+
+#include <memory/renesas-rpc-if.h>
+
+struct rpcif_hyperbus {
+ struct rpcif rpc;
+ struct hyperbus_ctlr ctlr;
+ struct hyperbus_device hbdev;
+};
+
+static const struct rpcif_op rpcif_op_tmpl = {
+ .cmd = {
+ .buswidth = 8,
+ .ddr = true,
+ },
+ .ocmd = {
+ .buswidth = 8,
+ .ddr = true,
+ },
+ .addr = {
+ .nbytes = 1,
+ .buswidth = 8,
+ .ddr = true,
+ },
+ .data = {
+ .buswidth = 8,
+ .ddr = true,
+ },
+};
+
+static void rpcif_hb_prepare_read(struct rpcif *rpc, void *to,
+ unsigned long from, ssize_t len)
+{
+ struct rpcif_op op = rpcif_op_tmpl;
+
+ op.cmd.opcode = HYPERBUS_RW_READ | HYPERBUS_AS_MEM;
+ op.addr.val = from >> 1;
+ op.dummy.buswidth = 1;
+ op.dummy.ncycles = 15;
+ op.data.dir = RPCIF_DATA_IN;
+ op.data.nbytes = len;
+ op.data.buf.in = to;
+
+ rpcif_prepare(rpc, &op, NULL, NULL);
+}
+
+static void rpcif_hb_prepare_write(struct rpcif *rpc, unsigned long to,
+ void *from, ssize_t len)
+{
+ struct rpcif_op op = rpcif_op_tmpl;
+
+ op.cmd.opcode = HYPERBUS_RW_WRITE | HYPERBUS_AS_MEM;
+ op.addr.val = to >> 1;
+ op.data.dir = RPCIF_DATA_OUT;
+ op.data.nbytes = len;
+ op.data.buf.out = from;
+
+ rpcif_prepare(rpc, &op, NULL, NULL);
+}
+
+static u16 rpcif_hb_read16(struct hyperbus_device *hbdev, unsigned long addr)
+{
+ struct rpcif_hyperbus *hyperbus =
+ container_of(hbdev, struct rpcif_hyperbus, hbdev);
+ map_word data;
+
+ rpcif_hb_prepare_read(&hyperbus->rpc, &data, addr, 2);
+
+ rpcif_manual_xfer(&hyperbus->rpc);
+
+ return data.x[0];
+}
+
+static void rpcif_hb_write16(struct hyperbus_device *hbdev, unsigned long addr,
+ u16 data)
+{
+ struct rpcif_hyperbus *hyperbus =
+ container_of(hbdev, struct rpcif_hyperbus, hbdev);
+
+ rpcif_hb_prepare_write(&hyperbus->rpc, addr, &data, 2);
+
+ rpcif_manual_xfer(&hyperbus->rpc);
+}
+
+static void rpcif_hb_copy_from(struct hyperbus_device *hbdev, void *to,
+ unsigned long from, ssize_t len)
+{
+ struct rpcif_hyperbus *hyperbus =
+ container_of(hbdev, struct rpcif_hyperbus, hbdev);
+
+ rpcif_hb_prepare_read(&hyperbus->rpc, to, from, len);
+
+ rpcif_dirmap_read(&hyperbus->rpc, from, len, to);
+}
+
+static const struct hyperbus_ops rpcif_hb_ops = {
+ .read16 = rpcif_hb_read16,
+ .write16 = rpcif_hb_write16,
+ .copy_from = rpcif_hb_copy_from,
+};
+
+static int rpcif_hb_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct rpcif_hyperbus *hyperbus;
+ int error;
+
+ hyperbus = devm_kzalloc(dev, sizeof(*hyperbus), GFP_KERNEL);
+ if (!hyperbus)
+ return -ENOMEM;
+
+ rpcif_sw_init(&hyperbus->rpc, pdev->dev.parent);
+
+ platform_set_drvdata(pdev, hyperbus);
+
+ rpcif_enable_rpm(&hyperbus->rpc);
+
+ rpcif_hw_init(&hyperbus->rpc, true);
+
+ hyperbus->hbdev.map.size = hyperbus->rpc.size;
+ hyperbus->hbdev.map.virt = hyperbus->rpc.dirmap;
+
+ hyperbus->ctlr.dev = dev;
+ hyperbus->ctlr.ops = &rpcif_hb_ops;
+ hyperbus->hbdev.ctlr = &hyperbus->ctlr;
+ hyperbus->hbdev.np = of_get_next_child(pdev->dev.parent->of_node, NULL);
+ error = hyperbus_register_device(&hyperbus->hbdev);
+ if (error)
+ rpcif_disable_rpm(&hyperbus->rpc);
+
+ return error;
+}
+
+static int rpcif_hb_remove(struct platform_device *pdev)
+{
+ struct rpcif_hyperbus *hyperbus = platform_get_drvdata(pdev);
+ int error = hyperbus_unregister_device(&hyperbus->hbdev);
+ struct rpcif *rpc = dev_get_drvdata(pdev->dev.parent);
+
+ rpcif_disable_rpm(rpc);
+ return error;
+}
+
+static struct platform_driver rpcif_platform_driver = {
+ .probe = rpcif_hb_probe,
+ .remove = rpcif_hb_remove,
+ .driver = {
+ .name = "rpc-if-hyperflash",
+ },
+};
+
+module_platform_driver(rpcif_platform_driver);
+
+MODULE_DESCRIPTION("Renesas RPC-IF HyperFlash driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mtd/lpddr/lpddr2_nvm.c b/drivers/mtd/lpddr/lpddr2_nvm.c
index 0f1547f09d08..72f5c7b30079 100644
--- a/drivers/mtd/lpddr/lpddr2_nvm.c
+++ b/drivers/mtd/lpddr/lpddr2_nvm.c
@@ -393,6 +393,17 @@ static int lpddr2_nvm_lock(struct mtd_info *mtd, loff_t start_add,
return lpddr2_nvm_do_block_op(mtd, start_add, len, LPDDR2_NVM_LOCK);
}
+static const struct mtd_info lpddr2_nvm_mtd_info = {
+ .type = MTD_RAM,
+ .writesize = 1,
+ .flags = (MTD_CAP_NVRAM | MTD_POWERUP_LOCK),
+ ._read = lpddr2_nvm_read,
+ ._write = lpddr2_nvm_write,
+ ._erase = lpddr2_nvm_erase,
+ ._unlock = lpddr2_nvm_unlock,
+ ._lock = lpddr2_nvm_lock,
+};
+
/*
* lpddr2_nvm driver probe method
*/
@@ -433,6 +444,7 @@ static int lpddr2_nvm_probe(struct platform_device *pdev)
.pfow_base = OW_BASE_ADDRESS,
.fldrv_priv = pcm_data,
};
+
if (IS_ERR(map->virt))
return PTR_ERR(map->virt);
@@ -444,22 +456,13 @@ static int lpddr2_nvm_probe(struct platform_device *pdev)
return PTR_ERR(pcm_data->ctl_regs);
/* Populate mtd_info data structure */
- *mtd = (struct mtd_info) {
- .dev = { .parent = &pdev->dev },
- .name = pdev->dev.init_name,
- .type = MTD_RAM,
- .priv = map,
- .size = resource_size(add_range),
- .erasesize = ERASE_BLOCKSIZE * pcm_data->bus_width,
- .writesize = 1,
- .writebufsize = WRITE_BUFFSIZE * pcm_data->bus_width,
- .flags = (MTD_CAP_NVRAM | MTD_POWERUP_LOCK),
- ._read = lpddr2_nvm_read,
- ._write = lpddr2_nvm_write,
- ._erase = lpddr2_nvm_erase,
- ._unlock = lpddr2_nvm_unlock,
- ._lock = lpddr2_nvm_lock,
- };
+ *mtd = lpddr2_nvm_mtd_info;
+ mtd->dev.parent = &pdev->dev;
+ mtd->name = pdev->dev.init_name;
+ mtd->priv = map;
+ mtd->size = resource_size(add_range);
+ mtd->erasesize = ERASE_BLOCKSIZE * pcm_data->bus_width;
+ mtd->writebufsize = WRITE_BUFFSIZE * pcm_data->bus_width;
/* Verify the presence of the device looking for PFOW string */
if (!lpddr2_nvm_pfow_present(map)) {
diff --git a/drivers/mtd/lpddr/lpddr_cmds.c b/drivers/mtd/lpddr/lpddr_cmds.c
index fb1cbc9a2870..ee063baed136 100644
--- a/drivers/mtd/lpddr/lpddr_cmds.c
+++ b/drivers/mtd/lpddr/lpddr_cmds.c
@@ -94,6 +94,34 @@ struct mtd_info *lpddr_cmdset(struct map_info *map)
}
EXPORT_SYMBOL(lpddr_cmdset);
+static void print_drs_error(unsigned int dsr)
+{
+ int prog_status = (dsr & DSR_RPS) >> 8;
+
+ if (!(dsr & DSR_AVAILABLE))
+ pr_notice("DSR.15: (0) Device not Available\n");
+ if ((prog_status & 0x03) == 0x03)
+ pr_notice("DSR.9,8: (11) Attempt to program invalid half with 41h command\n");
+ else if (prog_status & 0x02)
+ pr_notice("DSR.9,8: (10) Object Mode Program attempt in region with Control Mode data\n");
+ else if (prog_status & 0x01)
+ pr_notice("DSR.9,8: (01) Program attempt in region with Object Mode data\n");
+ if (!(dsr & DSR_READY_STATUS))
+ pr_notice("DSR.7: (0) Device is Busy\n");
+ if (dsr & DSR_ESS)
+ pr_notice("DSR.6: (1) Erase Suspended\n");
+ if (dsr & DSR_ERASE_STATUS)
+ pr_notice("DSR.5: (1) Erase/Blank check error\n");
+ if (dsr & DSR_PROGRAM_STATUS)
+ pr_notice("DSR.4: (1) Program Error\n");
+ if (dsr & DSR_VPPS)
+ pr_notice("DSR.3: (1) Vpp low detect, operation aborted\n");
+ if (dsr & DSR_PSS)
+ pr_notice("DSR.2: (1) Program suspended\n");
+ if (dsr & DSR_DPS)
+ pr_notice("DSR.1: (1) Aborted Erase/Program attempt on locked block\n");
+}
+
static int wait_for_ready(struct map_info *map, struct flchip *chip,
unsigned int chip_op_time)
{
diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
index fd37553f1b07..6650acbc961e 100644
--- a/drivers/mtd/maps/Kconfig
+++ b/drivers/mtd/maps/Kconfig
@@ -75,6 +75,17 @@ config MTD_PHYSMAP_OF
physically into the CPU's memory. The mapping description here is
taken from OF device tree.
+config MTD_PHYSMAP_BT1_ROM
+ bool "Baikal-T1 Boot ROMs OF-based physical memory map handling"
+ depends on MTD_PHYSMAP_OF
+ depends on MIPS_BAIKAL_T1 || COMPILE_TEST
+ select MTD_COMPLEX_MAPPINGS
+ select MULTIPLEXER
+ select MUX_MMIO
+ help
+ This provides some extra DT physmap parsing for the Baikal-T1
+ platforms, some detection and setting up ROMs-specific accessors.
+
config MTD_PHYSMAP_VERSATILE
bool "ARM Versatile OF-based physical memory map handling"
depends on MTD_PHYSMAP_OF
diff --git a/drivers/mtd/maps/Makefile b/drivers/mtd/maps/Makefile
index c0da86a5d26f..79f018cf412f 100644
--- a/drivers/mtd/maps/Makefile
+++ b/drivers/mtd/maps/Makefile
@@ -18,6 +18,7 @@ obj-$(CONFIG_MTD_CK804XROM) += ck804xrom.o
obj-$(CONFIG_MTD_TSUNAMI) += tsunami_flash.o
obj-$(CONFIG_MTD_PXA2XX) += pxa2xx-flash.o
physmap-objs-y += physmap-core.o
+physmap-objs-$(CONFIG_MTD_PHYSMAP_BT1_ROM) += physmap-bt1-rom.o
physmap-objs-$(CONFIG_MTD_PHYSMAP_VERSATILE) += physmap-versatile.o
physmap-objs-$(CONFIG_MTD_PHYSMAP_GEMINI) += physmap-gemini.o
physmap-objs-$(CONFIG_MTD_PHYSMAP_IXP4XX) += physmap-ixp4xx.o
diff --git a/drivers/mtd/maps/physmap-bt1-rom.c b/drivers/mtd/maps/physmap-bt1-rom.c
new file mode 100644
index 000000000000..27cfe1c63a2e
--- /dev/null
+++ b/drivers/mtd/maps/physmap-bt1-rom.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 BAIKAL ELECTRONICS, JSC
+ *
+ * Authors:
+ * Serge Semin <Sergey.Semin@baikalelectronics.ru>
+ *
+ * Baikal-T1 Physically Mapped Internal ROM driver
+ */
+#include <linux/bits.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/mtd/map.h>
+#include <linux/mtd/xip.h>
+#include <linux/mux/consumer.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include "physmap-bt1-rom.h"
+
+/*
+ * Baikal-T1 SoC ROMs are only accessible by the dword-aligned instructions.
+ * We have to take this into account when implementing the data read-methods.
+ * Note there is no need in bothering with endianness, since both Baikal-T1
+ * CPU and MMIO are LE.
+ */
+static map_word __xipram bt1_rom_map_read(struct map_info *map,
+ unsigned long ofs)
+{
+ void __iomem *src = map->virt + ofs;
+ unsigned long shift;
+ map_word ret;
+ u32 data;
+
+ /* Read data within offset dword. */
+ shift = (unsigned long)src & 0x3;
+ data = readl_relaxed(src - shift);
+ if (!shift) {
+ ret.x[0] = data;
+ return ret;
+ }
+ ret.x[0] = data >> (shift * BITS_PER_BYTE);
+
+ /* Read data from the next dword. */
+ shift = 4 - shift;
+ if (ofs + shift >= map->size)
+ return ret;
+
+ data = readl_relaxed(src + shift);
+ ret.x[0] |= data << (shift * BITS_PER_BYTE);
+
+ return ret;
+}
+
+static void __xipram bt1_rom_map_copy_from(struct map_info *map,
+ void *to, unsigned long from,
+ ssize_t len)
+{
+ void __iomem *src = map->virt + from;
+ ssize_t shift, chunk;
+ u32 data;
+
+ if (len <= 0 || from >= map->size)
+ return;
+
+ /* Make sure we don't go over the map limit. */
+ len = min_t(ssize_t, map->size - from, len);
+
+ /*
+ * Since requested data size can be pretty big we have to implement
+ * the copy procedure as optimal as possible. That's why it's split
+ * up into the next three stages: unaligned head, aligned body,
+ * unaligned tail.
+ */
+ shift = (ssize_t)src & 0x3;
+ if (shift) {
+ chunk = min_t(ssize_t, 4 - shift, len);
+ data = readl_relaxed(src - shift);
+ memcpy(to, &data + shift, chunk);
+ src += chunk;
+ to += chunk;
+ len -= chunk;
+ }
+
+ while (len >= 4) {
+ data = readl_relaxed(src);
+ memcpy(to, &data, 4);
+ src += 4;
+ to += 4;
+ len -= 4;
+ }
+
+ if (len) {
+ data = readl_relaxed(src);
+ memcpy(to, &data, len);
+ }
+}
+
+int of_flash_probe_bt1_rom(struct platform_device *pdev,
+ struct device_node *np,
+ struct map_info *map)
+{
+ struct device *dev = &pdev->dev;
+
+ /* It's supposed to be read-only MTD. */
+ if (!of_device_is_compatible(np, "mtd-rom")) {
+ dev_info(dev, "No mtd-rom compatible string\n");
+ return 0;
+ }
+
+ /* Multiplatform guard. */
+ if (!of_device_is_compatible(np, "baikal,bt1-int-rom"))
+ return 0;
+
+ /* Sanity check the device parameters retrieved from DTB. */
+ if (map->bankwidth != 4)
+ dev_warn(dev, "Bank width is supposed to be 32 bits wide\n");
+
+ map->read = bt1_rom_map_read;
+ map->copy_from = bt1_rom_map_copy_from;
+
+ return 0;
+}
diff --git a/drivers/mtd/maps/physmap-bt1-rom.h b/drivers/mtd/maps/physmap-bt1-rom.h
new file mode 100644
index 000000000000..6782899598a4
--- /dev/null
+++ b/drivers/mtd/maps/physmap-bt1-rom.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include <linux/mtd/map.h>
+#include <linux/of.h>
+
+#ifdef CONFIG_MTD_PHYSMAP_BT1_ROM
+int of_flash_probe_bt1_rom(struct platform_device *pdev,
+ struct device_node *np,
+ struct map_info *map);
+#else
+static inline
+int of_flash_probe_bt1_rom(struct platform_device *pdev,
+ struct device_node *np,
+ struct map_info *map)
+{
+ return 0;
+}
+#endif
diff --git a/drivers/mtd/maps/physmap-core.c b/drivers/mtd/maps/physmap-core.c
index 8f7f966fa9a7..001ed5deb622 100644
--- a/drivers/mtd/maps/physmap-core.c
+++ b/drivers/mtd/maps/physmap-core.c
@@ -41,6 +41,7 @@
#include <linux/pm_runtime.h>
#include <linux/gpio/consumer.h>
+#include "physmap-bt1-rom.h"
#include "physmap-gemini.h"
#include "physmap-ixp4xx.h"
#include "physmap-versatile.h"
@@ -371,6 +372,10 @@ static int physmap_flash_of_init(struct platform_device *dev)
info->maps[i].bankwidth = bankwidth;
info->maps[i].device_node = dp;
+ err = of_flash_probe_bt1_rom(dev, dp, &info->maps[i]);
+ if (err)
+ return err;
+
err = of_flash_probe_gemini(dev, dp, &info->maps[i]);
if (err)
return err;
@@ -515,7 +520,8 @@ static int physmap_flash_probe(struct platform_device *dev)
dev_notice(&dev->dev, "physmap platform flash device: %pR\n",
res);
- info->maps[i].name = dev_name(&dev->dev);
+ if (!info->maps[i].name)
+ info->maps[i].name = dev_name(&dev->dev);
if (!info->maps[i].phys)
info->maps[i].phys = res->start;
diff --git a/drivers/mtd/maps/vmu-flash.c b/drivers/mtd/maps/vmu-flash.c
index 177bf134e189..a7ec947a3ebb 100644
--- a/drivers/mtd/maps/vmu-flash.c
+++ b/drivers/mtd/maps/vmu-flash.c
@@ -40,7 +40,7 @@ struct memcard {
u32 blocklen;
u32 writecnt;
u32 readcnt;
- u32 removeable;
+ u32 removable;
int partition;
int read;
unsigned char *blockread;
@@ -619,7 +619,7 @@ static int vmu_connect(struct maple_device *mdev)
card->blocklen = ((basic_flash_data >> 16 & 0xFF) + 1) << 5;
card->writecnt = basic_flash_data >> 12 & 0xF;
card->readcnt = basic_flash_data >> 8 & 0xF;
- card->removeable = basic_flash_data >> 7 & 1;
+ card->removable = basic_flash_data >> 7 & 1;
card->partition = 0;
@@ -772,7 +772,6 @@ static void vmu_file_error(struct maple_device *mdev, void *recvbuf)
static int probe_maple_vmu(struct device *dev)
{
- int error;
struct maple_device *mdev = to_maple_dev(dev);
struct maple_driver *mdrv = to_maple_driver(dev->driver);
@@ -780,11 +779,7 @@ static int probe_maple_vmu(struct device *dev)
mdev->fileerr_handler = vmu_file_error;
mdev->driver = mdrv;
- error = vmu_connect(mdev);
- if (error)
- return error;
-
- return 0;
+ return vmu_connect(mdev);
}
static int remove_maple_vmu(struct device *dev)
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index 1d6c9e7e7b7d..6e4d0017c0bd 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -103,6 +103,47 @@ concat_read(struct mtd_info *mtd, loff_t from, size_t len,
}
static int
+concat_panic_write(struct mtd_info *mtd, loff_t to, size_t len,
+ size_t * retlen, const u_char * buf)
+{
+ struct mtd_concat *concat = CONCAT(mtd);
+ int err = -EINVAL;
+ int i;
+ for (i = 0; i < concat->num_subdev; i++) {
+ struct mtd_info *subdev = concat->subdev[i];
+ size_t size, retsize;
+
+ if (to >= subdev->size) {
+ to -= subdev->size;
+ continue;
+ }
+ if (to + len > subdev->size)
+ size = subdev->size - to;
+ else
+ size = len;
+
+ err = mtd_panic_write(subdev, to, size, &retsize, buf);
+ if (err == -EOPNOTSUPP) {
+ printk(KERN_ERR "mtdconcat: Cannot write from panic without panic_write\n");
+ return err;
+ }
+ if (err)
+ break;
+
+ *retlen += retsize;
+ len -= size;
+ if (len == 0)
+ break;
+
+ err = -EINVAL;
+ buf += size;
+ to = 0;
+ }
+ return err;
+}
+
+
+static int
concat_write(struct mtd_info *mtd, loff_t to, size_t len,
size_t * retlen, const u_char * buf)
{
@@ -648,6 +689,8 @@ struct mtd_info *mtd_concat_create(struct mtd_info *subdev[], /* subdevices to c
concat->mtd._block_isbad = concat_block_isbad;
if (subdev[0]->_block_markbad)
concat->mtd._block_markbad = concat_block_markbad;
+ if (subdev[0]->_panic_write)
+ concat->mtd._panic_write = concat_panic_write;
concat->mtd.ecc_stats.badblocks = subdev[0]->ecc_stats.badblocks;
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index b5e5d3140f57..e9e163ae9d86 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -335,7 +335,7 @@ static const struct device_type mtd_devtype = {
.release = mtd_release,
};
-static int mtd_partid_show(struct seq_file *s, void *p)
+static int mtd_partid_debug_show(struct seq_file *s, void *p)
{
struct mtd_info *mtd = s->private;
@@ -344,19 +344,9 @@ static int mtd_partid_show(struct seq_file *s, void *p)
return 0;
}
-static int mtd_partid_debugfs_open(struct inode *inode, struct file *file)
-{
- return single_open(file, mtd_partid_show, inode->i_private);
-}
-
-static const struct file_operations mtd_partid_debug_fops = {
- .open = mtd_partid_debugfs_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(mtd_partid_debug);
-static int mtd_partname_show(struct seq_file *s, void *p)
+static int mtd_partname_debug_show(struct seq_file *s, void *p)
{
struct mtd_info *mtd = s->private;
@@ -365,17 +355,7 @@ static int mtd_partname_show(struct seq_file *s, void *p)
return 0;
}
-static int mtd_partname_debugfs_open(struct inode *inode, struct file *file)
-{
- return single_open(file, mtd_partname_show, inode->i_private);
-}
-
-static const struct file_operations mtd_partname_debug_fops = {
- .open = mtd_partname_debugfs_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(mtd_partname_debug);
static struct dentry *dfs_dir_mtd;
diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index 4ced68be7ed7..774970bfcf85 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c
@@ -279,12 +279,13 @@ static void mtdoops_do_dump(struct kmsg_dumper *dumper,
kmsg_dump_get_buffer(dumper, true, cxt->oops_buf + MTDOOPS_HEADER_SIZE,
record_size - MTDOOPS_HEADER_SIZE, NULL);
- /* Panics must be written immediately */
- if (reason != KMSG_DUMP_OOPS)
+ if (reason != KMSG_DUMP_OOPS) {
+ /* Panics must be written immediately */
mtdoops_write(cxt, 1);
-
- /* For other cases, schedule work to write it "nicely" */
- schedule_work(&cxt->work_write);
+ } else {
+ /* For other cases, schedule work to write it "nicely" */
+ schedule_work(&cxt->work_write);
+ }
}
static void mtdoops_notify_add(struct mtd_info *mtd)
diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index c1a45b071165..4a9aed4f0104 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -9,4 +9,12 @@ source "drivers/mtd/nand/onenand/Kconfig"
source "drivers/mtd/nand/raw/Kconfig"
source "drivers/mtd/nand/spi/Kconfig"
+menu "ECC engine support"
+
+config MTD_NAND_ECC
+ bool
+ depends on MTD_NAND_CORE
+
+endmenu
+
endmenu
diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index 7ecd80c0a66e..981372953b56 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -6,3 +6,5 @@ obj-$(CONFIG_MTD_NAND_CORE) += nandcore.o
obj-y += onenand/
obj-y += raw/
obj-y += spi/
+
+nandcore-$(CONFIG_MTD_NAND_ECC) += ecc.o
diff --git a/drivers/mtd/nand/ecc.c b/drivers/mtd/nand/ecc.c
new file mode 100644
index 000000000000..4a56e6c0da67
--- /dev/null
+++ b/drivers/mtd/nand/ecc.c
@@ -0,0 +1,484 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Generic Error-Correcting Code (ECC) engine
+ *
+ * Copyright (C) 2019 Macronix
+ * Author:
+ * Miquèl RAYNAL <miquel.raynal@bootlin.com>
+ *
+ *
+ * This file describes the abstraction of any NAND ECC engine. It has been
+ * designed to fit most cases, including parallel NANDs and SPI-NANDs.
+ *
+ * There are three main situations where instantiating this ECC engine makes
+ * sense:
+ * - external: The ECC engine is outside the NAND pipeline, typically this
+ * is a software ECC engine, or an hardware engine that is
+ * outside the NAND controller pipeline.
+ * - pipelined: The ECC engine is inside the NAND pipeline, ie. on the
+ * controller's side. This is the case of most of the raw NAND
+ * controllers. In the pipeline case, the ECC bytes are
+ * generated/data corrected on the fly when a page is
+ * written/read.
+ * - ondie: The ECC engine is inside the NAND pipeline, on the chip's side.
+ * Some NAND chips can correct themselves the data.
+ *
+ * Besides the initial setup and final cleanups, the interfaces are rather
+ * simple:
+ * - prepare: Prepare an I/O request. Enable/disable the ECC engine based on
+ * the I/O request type. In case of software correction or external
+ * engine, this step may involve to derive the ECC bytes and place
+ * them in the OOB area before a write.
+ * - finish: Finish an I/O request. Correct the data in case of a read
+ * request and report the number of corrected bits/uncorrectable
+ * errors. Most likely empty for write operations, unless you have
+ * hardware specific stuff to do, like shutting down the engine to
+ * save power.
+ *
+ * The I/O request should be enclosed in a prepare()/finish() pair of calls
+ * and will behave differently depending on the requested I/O type:
+ * - raw: Correction disabled
+ * - ecc: Correction enabled
+ *
+ * The request direction is impacting the logic as well:
+ * - read: Load data from the NAND chip
+ * - write: Store data in the NAND chip
+ *
+ * Mixing all this combinations together gives the following behavior.
+ * Those are just examples, drivers are free to add custom steps in their
+ * prepare/finish hook.
+ *
+ * [external ECC engine]
+ * - external + prepare + raw + read: do nothing
+ * - external + finish + raw + read: do nothing
+ * - external + prepare + raw + write: do nothing
+ * - external + finish + raw + write: do nothing
+ * - external + prepare + ecc + read: do nothing
+ * - external + finish + ecc + read: calculate expected ECC bytes, extract
+ * ECC bytes from OOB buffer, correct
+ * and report any bitflip/error
+ * - external + prepare + ecc + write: calculate ECC bytes and store them at
+ * the right place in the OOB buffer based
+ * on the OOB layout
+ * - external + finish + ecc + write: do nothing
+ *
+ * [pipelined ECC engine]
+ * - pipelined + prepare + raw + read: disable the controller's ECC engine if
+ * activated
+ * - pipelined + finish + raw + read: do nothing
+ * - pipelined + prepare + raw + write: disable the controller's ECC engine if
+ * activated
+ * - pipelined + finish + raw + write: do nothing
+ * - pipelined + prepare + ecc + read: enable the controller's ECC engine if
+ * deactivated
+ * - pipelined + finish + ecc + read: check the status, report any
+ * error/bitflip
+ * - pipelined + prepare + ecc + write: enable the controller's ECC engine if
+ * deactivated
+ * - pipelined + finish + ecc + write: do nothing
+ *
+ * [ondie ECC engine]
+ * - ondie + prepare + raw + read: send commands to disable the on-chip ECC
+ * engine if activated
+ * - ondie + finish + raw + read: do nothing
+ * - ondie + prepare + raw + write: send commands to disable the on-chip ECC
+ * engine if activated
+ * - ondie + finish + raw + write: do nothing
+ * - ondie + prepare + ecc + read: send commands to enable the on-chip ECC
+ * engine if deactivated
+ * - ondie + finish + ecc + read: send commands to check the status, report
+ * any error/bitflip
+ * - ondie + prepare + ecc + write: send commands to enable the on-chip ECC
+ * engine if deactivated
+ * - ondie + finish + ecc + write: do nothing
+ */
+
+#include <linux/module.h>
+#include <linux/mtd/nand.h>
+
+/**
+ * nand_ecc_init_ctx - Init the ECC engine context
+ * @nand: the NAND device
+ *
+ * On success, the caller is responsible of calling @nand_ecc_cleanup_ctx().
+ */
+int nand_ecc_init_ctx(struct nand_device *nand)
+{
+ if (!nand->ecc.engine->ops->init_ctx)
+ return 0;
+
+ return nand->ecc.engine->ops->init_ctx(nand);
+}
+EXPORT_SYMBOL(nand_ecc_init_ctx);
+
+/**
+ * nand_ecc_cleanup_ctx - Cleanup the ECC engine context
+ * @nand: the NAND device
+ */
+void nand_ecc_cleanup_ctx(struct nand_device *nand)
+{
+ if (nand->ecc.engine->ops->cleanup_ctx)
+ nand->ecc.engine->ops->cleanup_ctx(nand);
+}
+EXPORT_SYMBOL(nand_ecc_cleanup_ctx);
+
+/**
+ * nand_ecc_prepare_io_req - Prepare an I/O request
+ * @nand: the NAND device
+ * @req: the I/O request
+ */
+int nand_ecc_prepare_io_req(struct nand_device *nand,
+ struct nand_page_io_req *req)
+{
+ if (!nand->ecc.engine->ops->prepare_io_req)
+ return 0;
+
+ return nand->ecc.engine->ops->prepare_io_req(nand, req);
+}
+EXPORT_SYMBOL(nand_ecc_prepare_io_req);
+
+/**
+ * nand_ecc_finish_io_req - Finish an I/O request
+ * @nand: the NAND device
+ * @req: the I/O request
+ */
+int nand_ecc_finish_io_req(struct nand_device *nand,
+ struct nand_page_io_req *req)
+{
+ if (!nand->ecc.engine->ops->finish_io_req)
+ return 0;
+
+ return nand->ecc.engine->ops->finish_io_req(nand, req);
+}
+EXPORT_SYMBOL(nand_ecc_finish_io_req);
+
+/* Define default OOB placement schemes for large and small page devices */
+static int nand_ooblayout_ecc_sp(struct mtd_info *mtd, int section,
+ struct mtd_oob_region *oobregion)
+{
+ struct nand_device *nand = mtd_to_nanddev(mtd);
+ unsigned int total_ecc_bytes = nand->ecc.ctx.total;
+
+ if (section > 1)
+ return -ERANGE;
+
+ if (!section) {
+ oobregion->offset = 0;
+ if (mtd->oobsize == 16)
+ oobregion->length = 4;
+ else
+ oobregion->length = 3;
+ } else {
+ if (mtd->oobsize == 8)
+ return -ERANGE;
+
+ oobregion->offset = 6;
+ oobregion->length = total_ecc_bytes - 4;
+ }
+
+ return 0;
+}
+
+static int nand_ooblayout_free_sp(struct mtd_info *mtd, int section,
+ struct mtd_oob_region *oobregion)
+{
+ if (section > 1)
+ return -ERANGE;
+
+ if (mtd->oobsize == 16) {
+ if (section)
+ return -ERANGE;
+
+ oobregion->length = 8;
+ oobregion->offset = 8;
+ } else {
+ oobregion->length = 2;
+ if (!section)
+ oobregion->offset = 3;
+ else
+ oobregion->offset = 6;
+ }
+
+ return 0;
+}
+
+static const struct mtd_ooblayout_ops nand_ooblayout_sp_ops = {
+ .ecc = nand_ooblayout_ecc_sp,
+ .free = nand_ooblayout_free_sp,
+};
+
+const struct mtd_ooblayout_ops *nand_get_small_page_ooblayout(void)
+{
+ return &nand_ooblayout_sp_ops;
+}
+EXPORT_SYMBOL_GPL(nand_get_small_page_ooblayout);
+
+static int nand_ooblayout_ecc_lp(struct mtd_info *mtd, int section,
+ struct mtd_oob_region *oobregion)
+{
+ struct nand_device *nand = mtd_to_nanddev(mtd);
+ unsigned int total_ecc_bytes = nand->ecc.ctx.total;
+
+ if (section || !total_ecc_bytes)
+ return -ERANGE;
+
+ oobregion->length = total_ecc_bytes;
+ oobregion->offset = mtd->oobsize - oobregion->length;
+
+ return 0;
+}
+
+static int nand_ooblayout_free_lp(struct mtd_info *mtd, int section,
+ struct mtd_oob_region *oobregion)
+{
+ struct nand_device *nand = mtd_to_nanddev(mtd);
+ unsigned int total_ecc_bytes = nand->ecc.ctx.total;
+
+ if (section)
+ return -ERANGE;
+
+ oobregion->length = mtd->oobsize - total_ecc_bytes - 2;
+ oobregion->offset = 2;
+
+ return 0;
+}
+
+static const struct mtd_ooblayout_ops nand_ooblayout_lp_ops = {
+ .ecc = nand_ooblayout_ecc_lp,
+ .free = nand_ooblayout_free_lp,
+};
+
+const struct mtd_ooblayout_ops *nand_get_large_page_ooblayout(void)
+{
+ return &nand_ooblayout_lp_ops;
+}
+EXPORT_SYMBOL_GPL(nand_get_large_page_ooblayout);
+
+/*
+ * Support the old "large page" layout used for 1-bit Hamming ECC where ECC
+ * are placed at a fixed offset.
+ */
+static int nand_ooblayout_ecc_lp_hamming(struct mtd_info *mtd, int section,
+ struct mtd_oob_region *oobregion)
+{
+ struct nand_device *nand = mtd_to_nanddev(mtd);
+ unsigned int total_ecc_bytes = nand->ecc.ctx.total;
+
+ if (section)
+ return -ERANGE;
+
+ switch (mtd->oobsize) {
+ case 64:
+ oobregion->offset = 40;
+ break;
+ case 128:
+ oobregion->offset = 80;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ oobregion->length = total_ecc_bytes;
+ if (oobregion->offset + oobregion->length > mtd->oobsize)
+ return -ERANGE;
+
+ return 0;
+}
+
+static int nand_ooblayout_free_lp_hamming(struct mtd_info *mtd, int section,
+ struct mtd_oob_region *oobregion)
+{
+ struct nand_device *nand = mtd_to_nanddev(mtd);
+ unsigned int total_ecc_bytes = nand->ecc.ctx.total;
+ int ecc_offset = 0;
+
+ if (section < 0 || section > 1)
+ return -ERANGE;
+
+ switch (mtd->oobsize) {
+ case 64:
+ ecc_offset = 40;
+ break;
+ case 128:
+ ecc_offset = 80;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (section == 0) {
+ oobregion->offset = 2;
+ oobregion->length = ecc_offset - 2;
+ } else {
+ oobregion->offset = ecc_offset + total_ecc_bytes;
+ oobregion->length = mtd->oobsize - oobregion->offset;
+ }
+
+ return 0;
+}
+
+static const struct mtd_ooblayout_ops nand_ooblayout_lp_hamming_ops = {
+ .ecc = nand_ooblayout_ecc_lp_hamming,
+ .free = nand_ooblayout_free_lp_hamming,
+};
+
+const struct mtd_ooblayout_ops *nand_get_large_page_hamming_ooblayout(void)
+{
+ return &nand_ooblayout_lp_hamming_ops;
+}
+EXPORT_SYMBOL_GPL(nand_get_large_page_hamming_ooblayout);
+
+static enum nand_ecc_engine_type
+of_get_nand_ecc_engine_type(struct device_node *np)
+{
+ struct device_node *eng_np;
+
+ if (of_property_read_bool(np, "nand-no-ecc-engine"))
+ return NAND_ECC_ENGINE_TYPE_NONE;
+
+ if (of_property_read_bool(np, "nand-use-soft-ecc-engine"))
+ return NAND_ECC_ENGINE_TYPE_SOFT;
+
+ eng_np = of_parse_phandle(np, "nand-ecc-engine", 0);
+ of_node_put(eng_np);
+
+ if (eng_np) {
+ if (eng_np == np)
+ return NAND_ECC_ENGINE_TYPE_ON_DIE;
+ else
+ return NAND_ECC_ENGINE_TYPE_ON_HOST;
+ }
+
+ return NAND_ECC_ENGINE_TYPE_INVALID;
+}
+
+static const char * const nand_ecc_placement[] = {
+ [NAND_ECC_PLACEMENT_OOB] = "oob",
+ [NAND_ECC_PLACEMENT_INTERLEAVED] = "interleaved",
+};
+
+static enum nand_ecc_placement of_get_nand_ecc_placement(struct device_node *np)
+{
+ enum nand_ecc_placement placement;
+ const char *pm;
+ int err;
+
+ err = of_property_read_string(np, "nand-ecc-placement", &pm);
+ if (!err) {
+ for (placement = NAND_ECC_PLACEMENT_OOB;
+ placement < ARRAY_SIZE(nand_ecc_placement); placement++) {
+ if (!strcasecmp(pm, nand_ecc_placement[placement]))
+ return placement;
+ }
+ }
+
+ return NAND_ECC_PLACEMENT_UNKNOWN;
+}
+
+static const char * const nand_ecc_algos[] = {
+ [NAND_ECC_ALGO_HAMMING] = "hamming",
+ [NAND_ECC_ALGO_BCH] = "bch",
+ [NAND_ECC_ALGO_RS] = "rs",
+};
+
+static enum nand_ecc_algo of_get_nand_ecc_algo(struct device_node *np)
+{
+ enum nand_ecc_algo ecc_algo;
+ const char *pm;
+ int err;
+
+ err = of_property_read_string(np, "nand-ecc-algo", &pm);
+ if (!err) {
+ for (ecc_algo = NAND_ECC_ALGO_HAMMING;
+ ecc_algo < ARRAY_SIZE(nand_ecc_algos);
+ ecc_algo++) {
+ if (!strcasecmp(pm, nand_ecc_algos[ecc_algo]))
+ return ecc_algo;
+ }
+ }
+
+ return NAND_ECC_ALGO_UNKNOWN;
+}
+
+static int of_get_nand_ecc_step_size(struct device_node *np)
+{
+ int ret;
+ u32 val;
+
+ ret = of_property_read_u32(np, "nand-ecc-step-size", &val);
+ return ret ? ret : val;
+}
+
+static int of_get_nand_ecc_strength(struct device_node *np)
+{
+ int ret;
+ u32 val;
+
+ ret = of_property_read_u32(np, "nand-ecc-strength", &val);
+ return ret ? ret : val;
+}
+
+void of_get_nand_ecc_user_config(struct nand_device *nand)
+{
+ struct device_node *dn = nanddev_get_of_node(nand);
+ int strength, size;
+
+ nand->ecc.user_conf.engine_type = of_get_nand_ecc_engine_type(dn);
+ nand->ecc.user_conf.algo = of_get_nand_ecc_algo(dn);
+ nand->ecc.user_conf.placement = of_get_nand_ecc_placement(dn);
+
+ strength = of_get_nand_ecc_strength(dn);
+ if (strength >= 0)
+ nand->ecc.user_conf.strength = strength;
+
+ size = of_get_nand_ecc_step_size(dn);
+ if (size >= 0)
+ nand->ecc.user_conf.step_size = size;
+
+ if (of_property_read_bool(dn, "nand-ecc-maximize"))
+ nand->ecc.user_conf.flags |= NAND_ECC_MAXIMIZE_STRENGTH;
+}
+EXPORT_SYMBOL(of_get_nand_ecc_user_config);
+
+/**
+ * nand_ecc_is_strong_enough - Check if the chip configuration meets the
+ * datasheet requirements.
+ *
+ * @nand: Device to check
+ *
+ * If our configuration corrects A bits per B bytes and the minimum
+ * required correction level is X bits per Y bytes, then we must ensure
+ * both of the following are true:
+ *
+ * (1) A / B >= X / Y
+ * (2) A >= X
+ *
+ * Requirement (1) ensures we can correct for the required bitflip density.
+ * Requirement (2) ensures we can correct even when all bitflips are clumped
+ * in the same sector.
+ */
+bool nand_ecc_is_strong_enough(struct nand_device *nand)
+{
+ const struct nand_ecc_props *reqs = nanddev_get_ecc_requirements(nand);
+ const struct nand_ecc_props *conf = nanddev_get_ecc_conf(nand);
+ struct mtd_info *mtd = nanddev_to_mtd(nand);
+ int corr, ds_corr;
+
+ if (conf->step_size == 0 || reqs->step_size == 0)
+ /* Not enough information */
+ return true;
+
+ /*
+ * We get the number of corrected bits per page to compare
+ * the correction density.
+ */
+ corr = (mtd->writesize * conf->strength) / conf->step_size;
+ ds_corr = (mtd->writesize * reqs->strength) / reqs->step_size;
+
+ return corr >= ds_corr && conf->strength >= reqs->strength;
+}
+EXPORT_SYMBOL(nand_ecc_is_strong_enough);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Miquel Raynal <miquel.raynal@bootlin.com>");
+MODULE_DESCRIPTION("Generic ECC engine");
diff --git a/drivers/mtd/nand/onenand/onenand_base.c b/drivers/mtd/nand/onenand/onenand_base.c
index ec18ade33262..188b8061e1f7 100644
--- a/drivers/mtd/nand/onenand/onenand_base.c
+++ b/drivers/mtd/nand/onenand/onenand_base.c
@@ -1052,16 +1052,11 @@ static int onenand_transfer_auto_oob(struct mtd_info *mtd, uint8_t *buf, int col
int thislen)
{
struct onenand_chip *this = mtd->priv;
- int ret;
this->read_bufferram(mtd, ONENAND_SPARERAM, this->oob_buf, 0,
mtd->oobsize);
- ret = mtd_ooblayout_get_databytes(mtd, buf, this->oob_buf,
- column, thislen);
- if (ret)
- return ret;
-
- return 0;
+ return mtd_ooblayout_get_databytes(mtd, buf, this->oob_buf,
+ column, thislen);
}
/**
diff --git a/drivers/mtd/nand/onenand/onenand_omap2.c b/drivers/mtd/nand/onenand/onenand_omap2.c
index aa9368bf7a0c..d8c0bd002c2b 100644
--- a/drivers/mtd/nand/onenand/onenand_omap2.c
+++ b/drivers/mtd/nand/onenand/onenand_omap2.c
@@ -494,11 +494,8 @@ static int omap2_onenand_probe(struct platform_device *pdev)
c->int_gpiod = devm_gpiod_get_optional(dev, "int", GPIOD_IN);
if (IS_ERR(c->int_gpiod)) {
- r = PTR_ERR(c->int_gpiod);
/* Just try again if this happens */
- if (r != -EPROBE_DEFER)
- dev_err(dev, "error getting gpio: %d\n", r);
- return r;
+ return dev_err_probe(dev, PTR_ERR(c->int_gpiod), "error getting gpio\n");
}
if (c->int_gpiod) {
diff --git a/drivers/mtd/nand/raw/Kconfig b/drivers/mtd/nand/raw/Kconfig
index 1203775023ad..6c46f25b57e2 100644
--- a/drivers/mtd/nand/raw/Kconfig
+++ b/drivers/mtd/nand/raw/Kconfig
@@ -13,6 +13,7 @@ config MTD_NAND_ECC_SW_HAMMING_SMC
menuconfig MTD_RAW_NAND
tristate "Raw/Parallel NAND Device Support"
select MTD_NAND_CORE
+ select MTD_NAND_ECC
select MTD_NAND_ECC_SW_HAMMING
help
This enables support for accessing all type of raw/parallel
diff --git a/drivers/mtd/nand/raw/ams-delta.c b/drivers/mtd/nand/raw/ams-delta.c
index fdba155416d2..d3c5cc513c8f 100644
--- a/drivers/mtd/nand/raw/ams-delta.c
+++ b/drivers/mtd/nand/raw/ams-delta.c
@@ -260,8 +260,8 @@ static int gpio_nand_probe(struct platform_device *pdev)
return err;
}
- this->ecc.mode = NAND_ECC_SOFT;
- this->ecc.algo = NAND_ECC_HAMMING;
+ this->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+ this->ecc.algo = NAND_ECC_ALGO_HAMMING;
platform_set_drvdata(pdev, priv);
@@ -400,12 +400,14 @@ static int gpio_nand_remove(struct platform_device *pdev)
return 0;
}
+#ifdef CONFIG_OF
static const struct of_device_id gpio_nand_of_id_table[] = {
{
/* sentinel */
},
};
MODULE_DEVICE_TABLE(of, gpio_nand_of_id_table);
+#endif
static const struct platform_device_id gpio_nand_plat_id_table[] = {
{
diff --git a/drivers/mtd/nand/raw/arasan-nand-controller.c b/drivers/mtd/nand/raw/arasan-nand-controller.c
index 12c643e97c85..fbb4ea751be8 100644
--- a/drivers/mtd/nand/raw/arasan-nand-controller.c
+++ b/drivers/mtd/nand/raw/arasan-nand-controller.c
@@ -980,10 +980,10 @@ static int anfc_init_hw_ecc_controller(struct arasan_nfc *nfc,
return -EINVAL;
}
- mtd_set_ooblayout(mtd, &nand_ooblayout_lp_ops);
+ mtd_set_ooblayout(mtd, nand_get_large_page_ooblayout());
ecc->steps = mtd->writesize / ecc->size;
- ecc->algo = NAND_ECC_BCH;
+ ecc->algo = NAND_ECC_ALGO_BCH;
anand->ecc_bits = bch_gf_mag * ecc->strength;
ecc->bytes = DIV_ROUND_UP(anand->ecc_bits, 8);
anand->ecc_total = DIV_ROUND_UP(anand->ecc_bits * ecc->steps, 8);
@@ -1056,17 +1056,17 @@ static int anfc_attach_chip(struct nand_chip *chip)
chip->ecc.read_page_raw = nand_monolithic_read_page_raw;
chip->ecc.write_page_raw = nand_monolithic_write_page_raw;
- switch (chip->ecc.mode) {
- case NAND_ECC_NONE:
- case NAND_ECC_SOFT:
- case NAND_ECC_ON_DIE:
+ switch (chip->ecc.engine_type) {
+ case NAND_ECC_ENGINE_TYPE_NONE:
+ case NAND_ECC_ENGINE_TYPE_SOFT:
+ case NAND_ECC_ENGINE_TYPE_ON_DIE:
break;
- case NAND_ECC_HW:
+ case NAND_ECC_ENGINE_TYPE_ON_HOST:
ret = anfc_init_hw_ecc_controller(nfc, chip);
break;
default:
dev_err(nfc->dev, "Unsupported ECC mode: %d\n",
- chip->ecc.mode);
+ chip->ecc.engine_type);
return -EINVAL;
}
diff --git a/drivers/mtd/nand/raw/atmel/nand-controller.c b/drivers/mtd/nand/raw/atmel/nand-controller.c
index c9818f548d07..e6ceec8f50dc 100644
--- a/drivers/mtd/nand/raw/atmel/nand-controller.c
+++ b/drivers/mtd/nand/raw/atmel/nand-controller.c
@@ -202,6 +202,8 @@ struct atmel_nand_controller_ops {
int (*ecc_init)(struct nand_chip *chip);
int (*setup_interface)(struct atmel_nand *nand, int csline,
const struct nand_interface_config *conf);
+ int (*exec_op)(struct atmel_nand *nand,
+ const struct nand_operation *op, bool check_only);
};
struct atmel_nand_controller_caps {
@@ -259,6 +261,7 @@ struct atmel_hsmc_nand_controller {
struct regmap *io;
struct atmel_nfc_op op;
struct completion complete;
+ u32 cfg;
int irq;
/* Only used when instantiating from legacy DT bindings. */
@@ -414,29 +417,62 @@ err:
return -EIO;
}
-static u8 atmel_nand_read_byte(struct nand_chip *chip)
+static int atmel_nfc_exec_op(struct atmel_hsmc_nand_controller *nc, bool poll)
{
- struct atmel_nand *nand = to_atmel_nand(chip);
+ u8 *addrs = nc->op.addrs;
+ unsigned int op = 0;
+ u32 addr, val;
+ int i, ret;
- return ioread8(nand->activecs->io.virt);
-}
+ nc->op.wait = ATMEL_HSMC_NFC_SR_CMDDONE;
-static void atmel_nand_write_byte(struct nand_chip *chip, u8 byte)
-{
- struct atmel_nand *nand = to_atmel_nand(chip);
+ for (i = 0; i < nc->op.ncmds; i++)
+ op |= ATMEL_NFC_CMD(i, nc->op.cmds[i]);
- if (chip->options & NAND_BUSWIDTH_16)
- iowrite16(byte | (byte << 8), nand->activecs->io.virt);
- else
- iowrite8(byte, nand->activecs->io.virt);
+ if (nc->op.naddrs == ATMEL_NFC_MAX_ADDR_CYCLES)
+ regmap_write(nc->base.smc, ATMEL_HSMC_NFC_ADDR, *addrs++);
+
+ op |= ATMEL_NFC_CSID(nc->op.cs) |
+ ATMEL_NFC_ACYCLE(nc->op.naddrs);
+
+ if (nc->op.ncmds > 1)
+ op |= ATMEL_NFC_VCMD2;
+
+ addr = addrs[0] | (addrs[1] << 8) | (addrs[2] << 16) |
+ (addrs[3] << 24);
+
+ if (nc->op.data != ATMEL_NFC_NO_DATA) {
+ op |= ATMEL_NFC_DATAEN;
+ nc->op.wait |= ATMEL_HSMC_NFC_SR_XFRDONE;
+
+ if (nc->op.data == ATMEL_NFC_WRITE_DATA)
+ op |= ATMEL_NFC_NFCWR;
+ }
+
+ /* Clear all flags. */
+ regmap_read(nc->base.smc, ATMEL_HSMC_NFC_SR, &val);
+
+ /* Send the command. */
+ regmap_write(nc->io, op, addr);
+
+ ret = atmel_nfc_wait(nc, poll, 0);
+ if (ret)
+ dev_err(nc->base.dev,
+ "Failed to send NAND command (err = %d)!",
+ ret);
+
+ /* Reset the op state. */
+ memset(&nc->op, 0, sizeof(nc->op));
+
+ return ret;
}
-static void atmel_nand_read_buf(struct nand_chip *chip, u8 *buf, int len)
+static void atmel_nand_data_in(struct atmel_nand *nand, void *buf,
+ unsigned int len, bool force_8bit)
{
- struct atmel_nand *nand = to_atmel_nand(chip);
struct atmel_nand_controller *nc;
- nc = to_nand_controller(chip->controller);
+ nc = to_nand_controller(nand->base.controller);
/*
* If the controller supports DMA, the buffer address is DMA-able and
@@ -444,23 +480,23 @@ static void atmel_nand_read_buf(struct nand_chip *chip, u8 *buf, int len)
* a DMA transfer. If it fails, fallback to PIO mode.
*/
if (nc->dmac && virt_addr_valid(buf) &&
- len >= MIN_DMA_LEN &&
+ len >= MIN_DMA_LEN && !force_8bit &&
!atmel_nand_dma_transfer(nc, buf, nand->activecs->io.dma, len,
DMA_FROM_DEVICE))
return;
- if (chip->options & NAND_BUSWIDTH_16)
+ if ((nand->base.options & NAND_BUSWIDTH_16) && !force_8bit)
ioread16_rep(nand->activecs->io.virt, buf, len / 2);
else
ioread8_rep(nand->activecs->io.virt, buf, len);
}
-static void atmel_nand_write_buf(struct nand_chip *chip, const u8 *buf, int len)
+static void atmel_nand_data_out(struct atmel_nand *nand, const void *buf,
+ unsigned int len, bool force_8bit)
{
- struct atmel_nand *nand = to_atmel_nand(chip);
struct atmel_nand_controller *nc;
- nc = to_nand_controller(chip->controller);
+ nc = to_nand_controller(nand->base.controller);
/*
* If the controller supports DMA, the buffer address is DMA-able and
@@ -468,179 +504,213 @@ static void atmel_nand_write_buf(struct nand_chip *chip, const u8 *buf, int len)
* a DMA transfer. If it fails, fallback to PIO mode.
*/
if (nc->dmac && virt_addr_valid(buf) &&
- len >= MIN_DMA_LEN &&
+ len >= MIN_DMA_LEN && !force_8bit &&
!atmel_nand_dma_transfer(nc, (void *)buf, nand->activecs->io.dma,
len, DMA_TO_DEVICE))
return;
- if (chip->options & NAND_BUSWIDTH_16)
+ if ((nand->base.options & NAND_BUSWIDTH_16) && !force_8bit)
iowrite16_rep(nand->activecs->io.virt, buf, len / 2);
else
iowrite8_rep(nand->activecs->io.virt, buf, len);
}
-static int atmel_nand_dev_ready(struct nand_chip *chip)
+static int atmel_nand_waitrdy(struct atmel_nand *nand, unsigned int timeout_ms)
{
- struct atmel_nand *nand = to_atmel_nand(chip);
+ if (nand->activecs->rb.type == ATMEL_NAND_NO_RB)
+ return nand_soft_waitrdy(&nand->base, timeout_ms);
- return gpiod_get_value(nand->activecs->rb.gpio);
+ return nand_gpio_waitrdy(&nand->base, nand->activecs->rb.gpio,
+ timeout_ms);
}
-static void atmel_nand_select_chip(struct nand_chip *chip, int cs)
+static int atmel_hsmc_nand_waitrdy(struct atmel_nand *nand,
+ unsigned int timeout_ms)
{
- struct atmel_nand *nand = to_atmel_nand(chip);
-
- if (cs < 0 || cs >= nand->numcs) {
- nand->activecs = NULL;
- chip->legacy.dev_ready = NULL;
- return;
- }
+ struct atmel_hsmc_nand_controller *nc;
+ u32 status, mask;
- nand->activecs = &nand->cs[cs];
+ if (nand->activecs->rb.type != ATMEL_NAND_NATIVE_RB)
+ return atmel_nand_waitrdy(nand, timeout_ms);
- if (nand->activecs->rb.type == ATMEL_NAND_GPIO_RB)
- chip->legacy.dev_ready = atmel_nand_dev_ready;
+ nc = to_hsmc_nand_controller(nand->base.controller);
+ mask = ATMEL_HSMC_NFC_SR_RBEDGE(nand->activecs->rb.id);
+ return regmap_read_poll_timeout_atomic(nc->base.smc, ATMEL_HSMC_NFC_SR,
+ status, status & mask,
+ 10, timeout_ms * 1000);
}
-static int atmel_hsmc_nand_dev_ready(struct nand_chip *chip)
+static void atmel_nand_select_target(struct atmel_nand *nand,
+ unsigned int cs)
{
- struct atmel_nand *nand = to_atmel_nand(chip);
- struct atmel_hsmc_nand_controller *nc;
- u32 status;
-
- nc = to_hsmc_nand_controller(chip->controller);
-
- regmap_read(nc->base.smc, ATMEL_HSMC_NFC_SR, &status);
-
- return status & ATMEL_HSMC_NFC_SR_RBEDGE(nand->activecs->rb.id);
+ nand->activecs = &nand->cs[cs];
}
-static void atmel_hsmc_nand_select_chip(struct nand_chip *chip, int cs)
+static void atmel_hsmc_nand_select_target(struct atmel_nand *nand,
+ unsigned int cs)
{
- struct mtd_info *mtd = nand_to_mtd(chip);
- struct atmel_nand *nand = to_atmel_nand(chip);
+ struct mtd_info *mtd = nand_to_mtd(&nand->base);
struct atmel_hsmc_nand_controller *nc;
+ u32 cfg = ATMEL_HSMC_NFC_CFG_PAGESIZE(mtd->writesize) |
+ ATMEL_HSMC_NFC_CFG_SPARESIZE(mtd->oobsize) |
+ ATMEL_HSMC_NFC_CFG_RSPARE;
- nc = to_hsmc_nand_controller(chip->controller);
-
- atmel_nand_select_chip(chip, cs);
-
- if (!nand->activecs) {
- regmap_write(nc->base.smc, ATMEL_HSMC_NFC_CTRL,
- ATMEL_HSMC_NFC_CTRL_DIS);
+ nand->activecs = &nand->cs[cs];
+ nc = to_hsmc_nand_controller(nand->base.controller);
+ if (nc->cfg == cfg)
return;
- }
-
- if (nand->activecs->rb.type == ATMEL_NAND_NATIVE_RB)
- chip->legacy.dev_ready = atmel_hsmc_nand_dev_ready;
regmap_update_bits(nc->base.smc, ATMEL_HSMC_NFC_CFG,
ATMEL_HSMC_NFC_CFG_PAGESIZE_MASK |
ATMEL_HSMC_NFC_CFG_SPARESIZE_MASK |
ATMEL_HSMC_NFC_CFG_RSPARE |
ATMEL_HSMC_NFC_CFG_WSPARE,
- ATMEL_HSMC_NFC_CFG_PAGESIZE(mtd->writesize) |
- ATMEL_HSMC_NFC_CFG_SPARESIZE(mtd->oobsize) |
- ATMEL_HSMC_NFC_CFG_RSPARE);
- regmap_write(nc->base.smc, ATMEL_HSMC_NFC_CTRL,
- ATMEL_HSMC_NFC_CTRL_EN);
+ cfg);
+ nc->cfg = cfg;
}
-static int atmel_nfc_exec_op(struct atmel_hsmc_nand_controller *nc, bool poll)
+static int atmel_smc_nand_exec_instr(struct atmel_nand *nand,
+ const struct nand_op_instr *instr)
{
- u8 *addrs = nc->op.addrs;
- unsigned int op = 0;
- u32 addr, val;
- int i, ret;
-
- nc->op.wait = ATMEL_HSMC_NFC_SR_CMDDONE;
-
- for (i = 0; i < nc->op.ncmds; i++)
- op |= ATMEL_NFC_CMD(i, nc->op.cmds[i]);
-
- if (nc->op.naddrs == ATMEL_NFC_MAX_ADDR_CYCLES)
- regmap_write(nc->base.smc, ATMEL_HSMC_NFC_ADDR, *addrs++);
-
- op |= ATMEL_NFC_CSID(nc->op.cs) |
- ATMEL_NFC_ACYCLE(nc->op.naddrs);
-
- if (nc->op.ncmds > 1)
- op |= ATMEL_NFC_VCMD2;
-
- addr = addrs[0] | (addrs[1] << 8) | (addrs[2] << 16) |
- (addrs[3] << 24);
-
- if (nc->op.data != ATMEL_NFC_NO_DATA) {
- op |= ATMEL_NFC_DATAEN;
- nc->op.wait |= ATMEL_HSMC_NFC_SR_XFRDONE;
+ struct atmel_nand_controller *nc;
+ unsigned int i;
- if (nc->op.data == ATMEL_NFC_WRITE_DATA)
- op |= ATMEL_NFC_NFCWR;
+ nc = to_nand_controller(nand->base.controller);
+ switch (instr->type) {
+ case NAND_OP_CMD_INSTR:
+ writeb(instr->ctx.cmd.opcode,
+ nand->activecs->io.virt + nc->caps->cle_offs);
+ return 0;
+ case NAND_OP_ADDR_INSTR:
+ for (i = 0; i < instr->ctx.addr.naddrs; i++)
+ writeb(instr->ctx.addr.addrs[i],
+ nand->activecs->io.virt + nc->caps->ale_offs);
+ return 0;
+ case NAND_OP_DATA_IN_INSTR:
+ atmel_nand_data_in(nand, instr->ctx.data.buf.in,
+ instr->ctx.data.len,
+ instr->ctx.data.force_8bit);
+ return 0;
+ case NAND_OP_DATA_OUT_INSTR:
+ atmel_nand_data_out(nand, instr->ctx.data.buf.out,
+ instr->ctx.data.len,
+ instr->ctx.data.force_8bit);
+ return 0;
+ case NAND_OP_WAITRDY_INSTR:
+ return atmel_nand_waitrdy(nand,
+ instr->ctx.waitrdy.timeout_ms);
+ default:
+ break;
}
- /* Clear all flags. */
- regmap_read(nc->base.smc, ATMEL_HSMC_NFC_SR, &val);
+ return -EINVAL;
+}
- /* Send the command. */
- regmap_write(nc->io, op, addr);
+static int atmel_smc_nand_exec_op(struct atmel_nand *nand,
+ const struct nand_operation *op,
+ bool check_only)
+{
+ unsigned int i;
+ int ret = 0;
- ret = atmel_nfc_wait(nc, poll, 0);
- if (ret)
- dev_err(nc->base.dev,
- "Failed to send NAND command (err = %d)!",
- ret);
+ if (check_only)
+ return 0;
- /* Reset the op state. */
- memset(&nc->op, 0, sizeof(nc->op));
+ atmel_nand_select_target(nand, op->cs);
+ gpiod_set_value(nand->activecs->csgpio, 0);
+ for (i = 0; i < op->ninstrs; i++) {
+ ret = atmel_smc_nand_exec_instr(nand, &op->instrs[i]);
+ if (ret)
+ break;
+ }
+ gpiod_set_value(nand->activecs->csgpio, 1);
return ret;
}
-static void atmel_hsmc_nand_cmd_ctrl(struct nand_chip *chip, int dat,
- unsigned int ctrl)
+static int atmel_hsmc_exec_cmd_addr(struct nand_chip *chip,
+ const struct nand_subop *subop)
{
struct atmel_nand *nand = to_atmel_nand(chip);
struct atmel_hsmc_nand_controller *nc;
+ unsigned int i, j;
nc = to_hsmc_nand_controller(chip->controller);
- if (ctrl & NAND_ALE) {
- if (nc->op.naddrs == ATMEL_NFC_MAX_ADDR_CYCLES)
- return;
+ nc->op.cs = nand->activecs->id;
+ for (i = 0; i < subop->ninstrs; i++) {
+ const struct nand_op_instr *instr = &subop->instrs[i];
- nc->op.addrs[nc->op.naddrs++] = dat;
- } else if (ctrl & NAND_CLE) {
- if (nc->op.ncmds > 1)
- return;
+ if (instr->type == NAND_OP_CMD_INSTR) {
+ nc->op.cmds[nc->op.ncmds++] = instr->ctx.cmd.opcode;
+ continue;
+ }
- nc->op.cmds[nc->op.ncmds++] = dat;
+ for (j = nand_subop_get_addr_start_off(subop, i);
+ j < nand_subop_get_num_addr_cyc(subop, i); j++) {
+ nc->op.addrs[nc->op.naddrs] = instr->ctx.addr.addrs[j];
+ nc->op.naddrs++;
+ }
}
- if (dat == NAND_CMD_NONE) {
- nc->op.cs = nand->activecs->id;
- atmel_nfc_exec_op(nc, true);
- }
+ return atmel_nfc_exec_op(nc, true);
}
-static void atmel_nand_cmd_ctrl(struct nand_chip *chip, int cmd,
- unsigned int ctrl)
+static int atmel_hsmc_exec_rw(struct nand_chip *chip,
+ const struct nand_subop *subop)
{
+ const struct nand_op_instr *instr = subop->instrs;
struct atmel_nand *nand = to_atmel_nand(chip);
- struct atmel_nand_controller *nc;
- nc = to_nand_controller(chip->controller);
+ if (instr->type == NAND_OP_DATA_IN_INSTR)
+ atmel_nand_data_in(nand, instr->ctx.data.buf.in,
+ instr->ctx.data.len,
+ instr->ctx.data.force_8bit);
+ else
+ atmel_nand_data_out(nand, instr->ctx.data.buf.out,
+ instr->ctx.data.len,
+ instr->ctx.data.force_8bit);
- if ((ctrl & NAND_CTRL_CHANGE) && nand->activecs->csgpio) {
- if (ctrl & NAND_NCE)
- gpiod_set_value(nand->activecs->csgpio, 0);
- else
- gpiod_set_value(nand->activecs->csgpio, 1);
- }
+ return 0;
+}
+
+static int atmel_hsmc_exec_waitrdy(struct nand_chip *chip,
+ const struct nand_subop *subop)
+{
+ const struct nand_op_instr *instr = subop->instrs;
+ struct atmel_nand *nand = to_atmel_nand(chip);
- if (ctrl & NAND_ALE)
- writeb(cmd, nand->activecs->io.virt + nc->caps->ale_offs);
- else if (ctrl & NAND_CLE)
- writeb(cmd, nand->activecs->io.virt + nc->caps->cle_offs);
+ return atmel_hsmc_nand_waitrdy(nand, instr->ctx.waitrdy.timeout_ms);
+}
+
+static const struct nand_op_parser atmel_hsmc_op_parser = NAND_OP_PARSER(
+ NAND_OP_PARSER_PATTERN(atmel_hsmc_exec_cmd_addr,
+ NAND_OP_PARSER_PAT_CMD_ELEM(true),
+ NAND_OP_PARSER_PAT_ADDR_ELEM(true, 5),
+ NAND_OP_PARSER_PAT_CMD_ELEM(true)),
+ NAND_OP_PARSER_PATTERN(atmel_hsmc_exec_rw,
+ NAND_OP_PARSER_PAT_DATA_IN_ELEM(false, 0)),
+ NAND_OP_PARSER_PATTERN(atmel_hsmc_exec_rw,
+ NAND_OP_PARSER_PAT_DATA_OUT_ELEM(false, 0)),
+ NAND_OP_PARSER_PATTERN(atmel_hsmc_exec_waitrdy,
+ NAND_OP_PARSER_PAT_WAITRDY_ELEM(false)),
+);
+
+static int atmel_hsmc_nand_exec_op(struct atmel_nand *nand,
+ const struct nand_operation *op,
+ bool check_only)
+{
+ int ret;
+
+ if (check_only)
+ return nand_op_parser_exec_op(&nand->base,
+ &atmel_hsmc_op_parser, op, true);
+
+ atmel_hsmc_nand_select_target(nand, op->cs);
+ ret = nand_op_parser_exec_op(&nand->base, &atmel_hsmc_op_parser, op,
+ false);
+
+ return ret;
}
static void atmel_nfc_copy_to_sram(struct nand_chip *chip, const u8 *buf,
@@ -838,7 +908,7 @@ static int atmel_nand_pmecc_write_pg(struct nand_chip *chip, const u8 *buf,
if (ret)
return ret;
- atmel_nand_write_buf(chip, buf, mtd->writesize);
+ nand_write_data_op(chip, buf, mtd->writesize, false);
ret = atmel_nand_pmecc_generate_eccbytes(chip, raw);
if (ret) {
@@ -848,7 +918,7 @@ static int atmel_nand_pmecc_write_pg(struct nand_chip *chip, const u8 *buf,
atmel_nand_pmecc_disable(chip, raw);
- atmel_nand_write_buf(chip, chip->oob_poi, mtd->oobsize);
+ nand_write_data_op(chip, chip->oob_poi, mtd->oobsize, false);
return nand_prog_page_end_op(chip);
}
@@ -878,11 +948,17 @@ static int atmel_nand_pmecc_read_pg(struct nand_chip *chip, u8 *buf,
if (ret)
return ret;
- atmel_nand_read_buf(chip, buf, mtd->writesize);
- atmel_nand_read_buf(chip, chip->oob_poi, mtd->oobsize);
+ ret = nand_read_data_op(chip, buf, mtd->writesize, false, false);
+ if (ret)
+ goto out_disable;
+
+ ret = nand_read_data_op(chip, chip->oob_poi, mtd->oobsize, false, false);
+ if (ret)
+ goto out_disable;
ret = atmel_nand_pmecc_correct_data(chip, buf, raw);
+out_disable:
atmel_nand_pmecc_disable(chip, raw);
return ret;
@@ -907,8 +983,9 @@ static int atmel_hsmc_nand_pmecc_write_pg(struct nand_chip *chip,
struct mtd_info *mtd = nand_to_mtd(chip);
struct atmel_nand *nand = to_atmel_nand(chip);
struct atmel_hsmc_nand_controller *nc;
- int ret, status;
+ int ret;
+ atmel_hsmc_nand_select_target(nand, chip->cur_cs);
nc = to_hsmc_nand_controller(chip->controller);
atmel_nfc_copy_to_sram(chip, buf, false);
@@ -939,21 +1016,9 @@ static int atmel_hsmc_nand_pmecc_write_pg(struct nand_chip *chip,
if (ret)
return ret;
- atmel_nand_write_buf(chip, chip->oob_poi, mtd->oobsize);
-
- nc->op.cmds[0] = NAND_CMD_PAGEPROG;
- nc->op.ncmds = 1;
- nc->op.cs = nand->activecs->id;
- ret = atmel_nfc_exec_op(nc, false);
- if (ret)
- dev_err(nc->base.dev, "Failed to program NAND page (err = %d)\n",
- ret);
-
- status = chip->legacy.waitfunc(chip);
- if (status & NAND_STATUS_FAIL)
- return -EIO;
+ nand_write_data_op(chip, chip->oob_poi, mtd->oobsize, false);
- return ret;
+ return nand_prog_page_end_op(chip);
}
static int atmel_hsmc_nand_pmecc_write_page(struct nand_chip *chip,
@@ -981,6 +1046,7 @@ static int atmel_hsmc_nand_pmecc_read_pg(struct nand_chip *chip, u8 *buf,
struct atmel_hsmc_nand_controller *nc;
int ret;
+ atmel_hsmc_nand_select_target(nand, chip->cur_cs);
nc = to_hsmc_nand_controller(chip->controller);
/*
@@ -988,12 +1054,9 @@ static int atmel_hsmc_nand_pmecc_read_pg(struct nand_chip *chip, u8 *buf,
* connected to a native SoC R/B pin. If that's not the case, fallback
* to the non-optimized one.
*/
- if (nand->activecs->rb.type != ATMEL_NAND_NATIVE_RB) {
- nand_read_page_op(chip, page, 0, NULL, 0);
-
+ if (nand->activecs->rb.type != ATMEL_NAND_NATIVE_RB)
return atmel_nand_pmecc_read_pg(chip, buf, oob_required, page,
raw);
- }
nc->op.cmds[nc->op.ncmds++] = NAND_CMD_READ0;
@@ -1043,7 +1106,10 @@ static int atmel_hsmc_nand_pmecc_read_page_raw(struct nand_chip *chip,
static int atmel_nand_pmecc_init(struct nand_chip *chip)
{
+ const struct nand_ecc_props *requirements =
+ nanddev_get_ecc_requirements(&chip->base);
struct mtd_info *mtd = nand_to_mtd(chip);
+ struct nand_device *nanddev = mtd_to_nanddev(mtd);
struct atmel_nand *nand = to_atmel_nand(chip);
struct atmel_nand_controller *nc;
struct atmel_pmecc_user_req req;
@@ -1068,19 +1134,19 @@ static int atmel_nand_pmecc_init(struct nand_chip *chip)
chip->ecc.size = val;
}
- if (chip->ecc.options & NAND_ECC_MAXIMIZE)
+ if (nanddev->ecc.user_conf.flags & NAND_ECC_MAXIMIZE_STRENGTH)
req.ecc.strength = ATMEL_PMECC_MAXIMIZE_ECC_STRENGTH;
else if (chip->ecc.strength)
req.ecc.strength = chip->ecc.strength;
- else if (chip->base.eccreq.strength)
- req.ecc.strength = chip->base.eccreq.strength;
+ else if (requirements->strength)
+ req.ecc.strength = requirements->strength;
else
req.ecc.strength = ATMEL_PMECC_MAXIMIZE_ECC_STRENGTH;
if (chip->ecc.size)
req.ecc.sectorsize = chip->ecc.size;
- else if (chip->base.eccreq.step_size)
- req.ecc.sectorsize = chip->base.eccreq.step_size;
+ else if (requirements->step_size)
+ req.ecc.sectorsize = requirements->step_size;
else
req.ecc.sectorsize = ATMEL_PMECC_SECTOR_SIZE_AUTO;
@@ -1099,14 +1165,14 @@ static int atmel_nand_pmecc_init(struct nand_chip *chip)
if (IS_ERR(nand->pmecc))
return PTR_ERR(nand->pmecc);
- chip->ecc.algo = NAND_ECC_BCH;
+ chip->ecc.algo = NAND_ECC_ALGO_BCH;
chip->ecc.size = req.ecc.sectorsize;
chip->ecc.bytes = req.ecc.bytes / req.ecc.nsectors;
chip->ecc.strength = req.ecc.strength;
chip->options |= NAND_NO_SUBPAGE_WRITE;
- mtd_set_ooblayout(mtd, &nand_ooblayout_lp_ops);
+ mtd_set_ooblayout(mtd, nand_get_large_page_ooblayout());
return 0;
}
@@ -1118,15 +1184,15 @@ static int atmel_nand_ecc_init(struct nand_chip *chip)
nc = to_nand_controller(chip->controller);
- switch (chip->ecc.mode) {
- case NAND_ECC_NONE:
- case NAND_ECC_SOFT:
+ switch (chip->ecc.engine_type) {
+ case NAND_ECC_ENGINE_TYPE_NONE:
+ case NAND_ECC_ENGINE_TYPE_SOFT:
/*
* Nothing to do, the core will initialize everything for us.
*/
break;
- case NAND_ECC_HW:
+ case NAND_ECC_ENGINE_TYPE_ON_HOST:
ret = atmel_nand_pmecc_init(chip);
if (ret)
return ret;
@@ -1140,7 +1206,7 @@ static int atmel_nand_ecc_init(struct nand_chip *chip)
default:
/* Other modes are not supported. */
dev_err(nc->dev, "Unsupported ECC mode: %d\n",
- chip->ecc.mode);
+ chip->ecc.engine_type);
return -ENOTSUPP;
}
@@ -1155,7 +1221,7 @@ static int atmel_hsmc_nand_ecc_init(struct nand_chip *chip)
if (ret)
return ret;
- if (chip->ecc.mode != NAND_ECC_HW)
+ if (chip->ecc.engine_type != NAND_ECC_ENGINE_TYPE_ON_HOST)
return 0;
/* Adjust the ECC operations for the HSMC IP. */
@@ -1467,6 +1533,18 @@ static int atmel_nand_setup_interface(struct nand_chip *chip, int csline,
return nc->caps->ops->setup_interface(nand, csline, conf);
}
+static int atmel_nand_exec_op(struct nand_chip *chip,
+ const struct nand_operation *op,
+ bool check_only)
+{
+ struct atmel_nand *nand = to_atmel_nand(chip);
+ struct atmel_nand_controller *nc;
+
+ nc = to_nand_controller(nand->base.controller);
+
+ return nc->caps->ops->exec_op(nand, op, check_only);
+}
+
static void atmel_nand_init(struct atmel_nand_controller *nc,
struct atmel_nand *nand)
{
@@ -1476,19 +1554,9 @@ static void atmel_nand_init(struct atmel_nand_controller *nc,
mtd->dev.parent = nc->dev;
nand->base.controller = &nc->base;
- chip->legacy.cmd_ctrl = atmel_nand_cmd_ctrl;
- chip->legacy.read_byte = atmel_nand_read_byte;
- chip->legacy.write_byte = atmel_nand_write_byte;
- chip->legacy.read_buf = atmel_nand_read_buf;
- chip->legacy.write_buf = atmel_nand_write_buf;
- chip->legacy.select_chip = atmel_nand_select_chip;
-
if (!nc->mck || !nc->caps->ops->setup_interface)
chip->options |= NAND_KEEP_TIMINGS;
- /* Some NANDs require a longer delay than the default one (20us). */
- chip->legacy.chip_delay = 40;
-
/*
* Use a bounce buffer when the buffer passed by the MTD user is not
* suitable for DMA.
@@ -1498,7 +1566,7 @@ static void atmel_nand_init(struct atmel_nand_controller *nc,
/* Default to HW ECC if pmecc is available. */
if (nc->pmecc)
- chip->ecc.mode = NAND_ECC_HW;
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
}
static void atmel_smc_nand_init(struct atmel_nand_controller *nc,
@@ -1527,18 +1595,6 @@ static void atmel_smc_nand_init(struct atmel_nand_controller *nc,
smc_nc->ebi_csa->nfd0_on_d16);
}
-static void atmel_hsmc_nand_init(struct atmel_nand_controller *nc,
- struct atmel_nand *nand)
-{
- struct nand_chip *chip = &nand->base;
-
- atmel_nand_init(nc, nand);
-
- /* Overload some methods for the HSMC controller. */
- chip->legacy.cmd_ctrl = atmel_hsmc_nand_cmd_ctrl;
- chip->legacy.select_chip = atmel_hsmc_nand_select_chip;
-}
-
static int atmel_nand_controller_remove_nand(struct atmel_nand *nand)
{
struct nand_chip *chip = &nand->base;
@@ -1957,6 +2013,7 @@ static int atmel_nand_attach_chip(struct nand_chip *chip)
static const struct nand_controller_ops atmel_nand_controller_ops = {
.attach_chip = atmel_nand_attach_chip,
.setup_interface = atmel_nand_setup_interface,
+ .exec_op = atmel_nand_exec_op,
};
static int atmel_nand_controller_init(struct atmel_nand_controller *nc,
@@ -1976,13 +2033,9 @@ static int atmel_nand_controller_init(struct atmel_nand_controller *nc,
platform_set_drvdata(pdev, nc);
nc->pmecc = devm_atmel_pmecc_get(dev);
- if (IS_ERR(nc->pmecc)) {
- ret = PTR_ERR(nc->pmecc);
- if (ret != -EPROBE_DEFER)
- dev_err(dev, "Could not get PMECC object (err = %d)\n",
- ret);
- return ret;
- }
+ if (IS_ERR(nc->pmecc))
+ return dev_err_probe(dev, PTR_ERR(nc->pmecc),
+ "Could not get PMECC object\n");
if (nc->caps->has_dma && !atmel_nand_avoid_dma) {
dma_cap_mask_t mask;
@@ -2248,6 +2301,9 @@ atmel_hsmc_nand_controller_remove(struct atmel_nand_controller *nc)
return ret;
hsmc_nc = container_of(nc, struct atmel_hsmc_nand_controller, base);
+ regmap_write(hsmc_nc->base.smc, ATMEL_HSMC_NFC_CTRL,
+ ATMEL_HSMC_NFC_CTRL_DIS);
+
if (hsmc_nc->sram.pool)
gen_pool_free(hsmc_nc->sram.pool,
(unsigned long)hsmc_nc->sram.virt,
@@ -2300,6 +2356,8 @@ static int atmel_hsmc_nand_controller_probe(struct platform_device *pdev,
/* Initial NFC configuration. */
regmap_write(nc->base.smc, ATMEL_HSMC_NFC_CFG,
ATMEL_HSMC_NFC_CFG_DTO_MAX);
+ regmap_write(nc->base.smc, ATMEL_HSMC_NFC_CTRL,
+ ATMEL_HSMC_NFC_CTRL_EN);
ret = atmel_nand_controller_add_nands(&nc->base);
if (ret)
@@ -2317,8 +2375,9 @@ static const struct atmel_nand_controller_ops atmel_hsmc_nc_ops = {
.probe = atmel_hsmc_nand_controller_probe,
.remove = atmel_hsmc_nand_controller_remove,
.ecc_init = atmel_hsmc_nand_ecc_init,
- .nand_init = atmel_hsmc_nand_init,
+ .nand_init = atmel_nand_init,
.setup_interface = atmel_hsmc_nand_setup_interface,
+ .exec_op = atmel_hsmc_nand_exec_op,
};
static const struct atmel_nand_controller_caps atmel_sama5_nc_caps = {
@@ -2385,6 +2444,7 @@ static const struct atmel_nand_controller_ops at91rm9200_nc_ops = {
.remove = atmel_smc_nand_controller_remove,
.ecc_init = atmel_nand_ecc_init,
.nand_init = atmel_smc_nand_init,
+ .exec_op = atmel_smc_nand_exec_op,
};
static const struct atmel_nand_controller_caps atmel_rm9200_nc_caps = {
@@ -2400,6 +2460,7 @@ static const struct atmel_nand_controller_ops atmel_smc_nc_ops = {
.ecc_init = atmel_nand_ecc_init,
.nand_init = atmel_smc_nand_init,
.setup_interface = atmel_smc_nand_setup_interface,
+ .exec_op = atmel_smc_nand_exec_op,
};
static const struct atmel_nand_controller_caps atmel_sam9260_nc_caps = {
diff --git a/drivers/mtd/nand/raw/au1550nd.c b/drivers/mtd/nand/raw/au1550nd.c
index d865200ccd08..79b057400fe9 100644
--- a/drivers/mtd/nand/raw/au1550nd.c
+++ b/drivers/mtd/nand/raw/au1550nd.c
@@ -294,8 +294,8 @@ static int au1550nd_probe(struct platform_device *pdev)
nand_controller_init(&ctx->controller);
ctx->controller.ops = &au1550nd_ops;
this->controller = &ctx->controller;
- this->ecc.mode = NAND_ECC_SOFT;
- this->ecc.algo = NAND_ECC_HAMMING;
+ this->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+ this->ecc.algo = NAND_ECC_ALGO_HAMMING;
if (pd->devwidth)
this->options |= NAND_BUSWIDTH_16;
diff --git a/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c b/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
index 591775173034..8bb17c5a66c3 100644
--- a/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
+++ b/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
@@ -391,7 +391,8 @@ int bcm47xxnflash_ops_bcm4706_init(struct bcm47xxnflash *b47n)
nand_chip->legacy.chip_delay = 50;
b47n->nand_chip.bbt_options = NAND_BBT_USE_FLASH;
- b47n->nand_chip.ecc.mode = NAND_ECC_NONE; /* TODO: implement ECC */
+ /* TODO: implement ECC */
+ b47n->nand_chip.ecc.engine_type = NAND_ECC_ENGINE_TYPE_NONE;
/* Enable NAND flash access */
bcma_cc_set32(b47n->cc, BCMA_CC_4706_FLASHSCFG,
diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
index a4033d32a710..2da39ab89286 100644
--- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
@@ -2532,6 +2532,8 @@ static int brcmnand_setup_dev(struct brcmnand_host *host)
{
struct mtd_info *mtd = nand_to_mtd(&host->chip);
struct nand_chip *chip = &host->chip;
+ const struct nand_ecc_props *requirements =
+ nanddev_get_ecc_requirements(&chip->base);
struct brcmnand_controller *ctrl = host->ctrl;
struct brcmnand_cfg *cfg = &host->hwcfg;
char msg[128];
@@ -2565,34 +2567,34 @@ static int brcmnand_setup_dev(struct brcmnand_host *host)
cfg->col_adr_bytes = 2;
cfg->blk_adr_bytes = get_blk_adr_bytes(mtd->size, mtd->writesize);
- if (chip->ecc.mode != NAND_ECC_HW) {
+ if (chip->ecc.engine_type != NAND_ECC_ENGINE_TYPE_ON_HOST) {
dev_err(ctrl->dev, "only HW ECC supported; selected: %d\n",
- chip->ecc.mode);
+ chip->ecc.engine_type);
return -EINVAL;
}
- if (chip->ecc.algo == NAND_ECC_UNKNOWN) {
+ if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN) {
if (chip->ecc.strength == 1 && chip->ecc.size == 512)
/* Default to Hamming for 1-bit ECC, if unspecified */
- chip->ecc.algo = NAND_ECC_HAMMING;
+ chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
else
/* Otherwise, BCH */
- chip->ecc.algo = NAND_ECC_BCH;
+ chip->ecc.algo = NAND_ECC_ALGO_BCH;
}
- if (chip->ecc.algo == NAND_ECC_HAMMING && (chip->ecc.strength != 1 ||
- chip->ecc.size != 512)) {
+ if (chip->ecc.algo == NAND_ECC_ALGO_HAMMING &&
+ (chip->ecc.strength != 1 || chip->ecc.size != 512)) {
dev_err(ctrl->dev, "invalid Hamming params: %d bits per %d bytes\n",
chip->ecc.strength, chip->ecc.size);
return -EINVAL;
}
- if (chip->ecc.mode != NAND_ECC_NONE &&
+ if (chip->ecc.engine_type != NAND_ECC_ENGINE_TYPE_NONE &&
(!chip->ecc.size || !chip->ecc.strength)) {
- if (chip->base.eccreq.step_size && chip->base.eccreq.strength) {
+ if (requirements->step_size && requirements->strength) {
/* use detected ECC parameters */
- chip->ecc.size = chip->base.eccreq.step_size;
- chip->ecc.strength = chip->base.eccreq.strength;
+ chip->ecc.size = requirements->step_size;
+ chip->ecc.strength = requirements->strength;
dev_info(ctrl->dev, "Using ECC step-size %d, strength %d\n",
chip->ecc.size, chip->ecc.strength);
}
@@ -2600,7 +2602,7 @@ static int brcmnand_setup_dev(struct brcmnand_host *host)
switch (chip->ecc.size) {
case 512:
- if (chip->ecc.algo == NAND_ECC_HAMMING)
+ if (chip->ecc.algo == NAND_ECC_ALGO_HAMMING)
cfg->ecc_level = 15;
else
cfg->ecc_level = chip->ecc.strength;
@@ -2728,7 +2730,7 @@ static int brcmnand_init_cs(struct brcmnand_host *host, struct device_node *dn)
chip->legacy.read_buf = brcmnand_read_buf;
chip->legacy.write_buf = brcmnand_write_buf;
- chip->ecc.mode = NAND_ECC_HW;
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
chip->ecc.read_page = brcmnand_read_page;
chip->ecc.write_page = brcmnand_write_page;
chip->ecc.read_page_raw = brcmnand_read_page_raw;
diff --git a/drivers/mtd/nand/raw/cadence-nand-controller.c b/drivers/mtd/nand/raw/cadence-nand-controller.c
index 71516af85f23..b46786cd53e0 100644
--- a/drivers/mtd/nand/raw/cadence-nand-controller.c
+++ b/drivers/mtd/nand/raw/cadence-nand-controller.c
@@ -2611,7 +2611,7 @@ static int cadence_nand_attach_chip(struct nand_chip *chip)
chip->bbt_options |= NAND_BBT_USE_FLASH;
chip->bbt_options |= NAND_BBT_NO_OOB;
- chip->ecc.mode = NAND_ECC_HW;
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
chip->options |= NAND_NO_SUBPAGE_WRITE;
@@ -2757,7 +2757,7 @@ static int cadence_nand_chip_init(struct cdns_nand_ctrl *cdns_ctrl,
* Default to HW ECC engine mode. If the nand-ecc-mode property is given
* in the DT node, this entry will be overwritten in nand_scan_ident().
*/
- chip->ecc.mode = NAND_ECC_HW;
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
ret = nand_scan(chip, cdns_chip->nsels);
if (ret) {
@@ -2980,18 +2980,14 @@ static int cadence_nand_dt_probe(struct platform_device *ofdev)
dev_info(cdns_ctrl->dev, "IRQ: nr %d\n", cdns_ctrl->irq);
cdns_ctrl->reg = devm_platform_ioremap_resource(ofdev, 0);
- if (IS_ERR(cdns_ctrl->reg)) {
- dev_err(&ofdev->dev, "devm_ioremap_resource res 0 failed\n");
+ if (IS_ERR(cdns_ctrl->reg))
return PTR_ERR(cdns_ctrl->reg);
- }
res = platform_get_resource(ofdev, IORESOURCE_MEM, 1);
cdns_ctrl->io.dma = res->start;
cdns_ctrl->io.virt = devm_ioremap_resource(&ofdev->dev, res);
- if (IS_ERR(cdns_ctrl->io.virt)) {
- dev_err(cdns_ctrl->dev, "devm_ioremap_resource res 1 failed\n");
+ if (IS_ERR(cdns_ctrl->io.virt))
return PTR_ERR(cdns_ctrl->io.virt);
- }
dt->clk = devm_clk_get(cdns_ctrl->dev, "nf_clk");
if (IS_ERR(dt->clk))
diff --git a/drivers/mtd/nand/raw/cafe_nand.c b/drivers/mtd/nand/raw/cafe_nand.c
index 92173790f20b..2b94f385a1a8 100644
--- a/drivers/mtd/nand/raw/cafe_nand.c
+++ b/drivers/mtd/nand/raw/cafe_nand.c
@@ -629,7 +629,8 @@ static int cafe_nand_attach_chip(struct nand_chip *chip)
goto out_free_dma;
}
- cafe->nand.ecc.mode = NAND_ECC_HW_SYNDROME;
+ cafe->nand.ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
+ cafe->nand.ecc.placement = NAND_ECC_PLACEMENT_INTERLEAVED;
cafe->nand.ecc.size = mtd->writesize;
cafe->nand.ecc.bytes = 14;
cafe->nand.ecc.strength = 4;
diff --git a/drivers/mtd/nand/raw/cs553x_nand.c b/drivers/mtd/nand/raw/cs553x_nand.c
index 9472bf798ed5..b7f3f6347761 100644
--- a/drivers/mtd/nand/raw/cs553x_nand.c
+++ b/drivers/mtd/nand/raw/cs553x_nand.c
@@ -286,7 +286,7 @@ static int __init cs553x_init_one(int cs, int mmio, unsigned long adr)
goto out_mtd;
}
- this->ecc.mode = NAND_ECC_HW;
+ this->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
this->ecc.size = 256;
this->ecc.bytes = 3;
this->ecc.hwctl = cs_enable_hwecc;
diff --git a/drivers/mtd/nand/raw/davinci_nand.c b/drivers/mtd/nand/raw/davinci_nand.c
index d975a62caaa5..427f320fb79b 100644
--- a/drivers/mtd/nand/raw/davinci_nand.c
+++ b/drivers/mtd/nand/raw/davinci_nand.c
@@ -168,7 +168,7 @@ static int nand_davinci_correct_1bit(struct nand_chip *chip, u_char *dat,
/*
* 4-bit hardware ECC ... context maintained over entire AEMIF
*
- * This is a syndrome engine, but we avoid NAND_ECC_HW_SYNDROME
+ * This is a syndrome engine, but we avoid NAND_ECC_PLACEMENT_INTERLEAVED
* since that forces use of a problematic "infix OOB" layout.
* Among other things, it trashes manufacturer bad block markers.
* Also, and specific to this hardware, it ECC-protects the "prepad"
@@ -530,11 +530,11 @@ static struct davinci_nand_pdata
if (!of_property_read_string(pdev->dev.of_node,
"ti,davinci-ecc-mode", &mode)) {
if (!strncmp("none", mode, 4))
- pdata->ecc_mode = NAND_ECC_NONE;
+ pdata->engine_type = NAND_ECC_ENGINE_TYPE_NONE;
if (!strncmp("soft", mode, 4))
- pdata->ecc_mode = NAND_ECC_SOFT;
+ pdata->engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
if (!strncmp("hw", mode, 2))
- pdata->ecc_mode = NAND_ECC_HW;
+ pdata->engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
}
if (!of_property_read_u32(pdev->dev.of_node,
"ti,davinci-ecc-bits", &prop))
@@ -585,21 +585,21 @@ static int davinci_nand_attach_chip(struct nand_chip *chip)
if (IS_ERR(pdata))
return PTR_ERR(pdata);
- switch (info->chip.ecc.mode) {
- case NAND_ECC_NONE:
+ switch (info->chip.ecc.engine_type) {
+ case NAND_ECC_ENGINE_TYPE_NONE:
pdata->ecc_bits = 0;
break;
- case NAND_ECC_SOFT:
+ case NAND_ECC_ENGINE_TYPE_SOFT:
pdata->ecc_bits = 0;
/*
- * This driver expects Hamming based ECC when ecc_mode is set
- * to NAND_ECC_SOFT. Force ecc.algo to NAND_ECC_HAMMING to
- * avoid adding an extra ->ecc_algo field to
- * davinci_nand_pdata.
+ * This driver expects Hamming based ECC when engine_type is set
+ * to NAND_ECC_ENGINE_TYPE_SOFT. Force ecc.algo to
+ * NAND_ECC_ALGO_HAMMING to avoid adding an extra ->ecc_algo
+ * field to davinci_nand_pdata.
*/
- info->chip.ecc.algo = NAND_ECC_HAMMING;
+ info->chip.ecc.algo = NAND_ECC_ALGO_HAMMING;
break;
- case NAND_ECC_HW:
+ case NAND_ECC_ENGINE_TYPE_ON_HOST:
if (pdata->ecc_bits == 4) {
int chunks = mtd->writesize / 512;
@@ -629,7 +629,7 @@ static int davinci_nand_attach_chip(struct nand_chip *chip)
info->chip.ecc.hwctl = nand_davinci_hwctl_4bit;
info->chip.ecc.bytes = 10;
info->chip.ecc.options = NAND_ECC_GENERIC_ERASED_CHECK;
- info->chip.ecc.algo = NAND_ECC_BCH;
+ info->chip.ecc.algo = NAND_ECC_ALGO_BCH;
/*
* Update ECC layout if needed ... for 1-bit HW ECC, the
@@ -645,7 +645,8 @@ static int davinci_nand_attach_chip(struct nand_chip *chip)
mtd_set_ooblayout(mtd,
&hwecc4_small_ooblayout_ops);
} else if (chunks == 4 || chunks == 8) {
- mtd_set_ooblayout(mtd, &nand_ooblayout_lp_ops);
+ mtd_set_ooblayout(mtd,
+ nand_get_large_page_ooblayout());
info->chip.ecc.read_page = nand_davinci_read_page_hwecc_oob_first;
} else {
return -EIO;
@@ -656,7 +657,7 @@ static int davinci_nand_attach_chip(struct nand_chip *chip)
info->chip.ecc.correct = nand_davinci_correct_1bit;
info->chip.ecc.hwctl = nand_davinci_hwctl_1bit;
info->chip.ecc.bytes = 3;
- info->chip.ecc.algo = NAND_ECC_HAMMING;
+ info->chip.ecc.algo = NAND_ECC_ALGO_HAMMING;
}
info->chip.ecc.size = 512;
info->chip.ecc.strength = pdata->ecc_bits;
@@ -850,7 +851,8 @@ static int nand_davinci_probe(struct platform_device *pdev)
info->mask_cle = pdata->mask_cle ? : MASK_CLE;
/* Use board-specific ECC config */
- info->chip.ecc.mode = pdata->ecc_mode;
+ info->chip.ecc.engine_type = pdata->engine_type;
+ info->chip.ecc.placement = pdata->ecc_placement;
spin_lock_irq(&davinci_nand_lock);
@@ -897,7 +899,7 @@ static int nand_davinci_remove(struct platform_device *pdev)
int ret;
spin_lock_irq(&davinci_nand_lock);
- if (info->chip.ecc.mode == NAND_ECC_HW_SYNDROME)
+ if (info->chip.ecc.placement == NAND_ECC_PLACEMENT_INTERLEAVED)
ecc4_busy = false;
spin_unlock_irq(&davinci_nand_lock);
diff --git a/drivers/mtd/nand/raw/denali.c b/drivers/mtd/nand/raw/denali.c
index 9d99dade95ce..fa2439cb4daa 100644
--- a/drivers/mtd/nand/raw/denali.c
+++ b/drivers/mtd/nand/raw/denali.c
@@ -1237,7 +1237,8 @@ int denali_chip_init(struct denali_controller *denali,
chip->bbt_options |= NAND_BBT_USE_FLASH;
chip->bbt_options |= NAND_BBT_NO_OOB;
chip->options |= NAND_NO_SUBPAGE_WRITE;
- chip->ecc.mode = NAND_ECC_HW_SYNDROME;
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
+ chip->ecc.placement = NAND_ECC_PLACEMENT_INTERLEAVED;
chip->ecc.read_page = denali_read_page;
chip->ecc.write_page = denali_write_page;
chip->ecc.read_page_raw = denali_read_page_raw;
diff --git a/drivers/mtd/nand/raw/denali_pci.c b/drivers/mtd/nand/raw/denali_pci.c
index 2f77ee55e1bf..20c085a30adc 100644
--- a/drivers/mtd/nand/raw/denali_pci.c
+++ b/drivers/mtd/nand/raw/denali_pci.c
@@ -100,7 +100,7 @@ static int denali_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
goto out_remove_denali;
}
- dchip->chip.ecc.options |= NAND_ECC_MAXIMIZE;
+ dchip->chip.base.ecc.user_conf.flags |= NAND_ECC_MAXIMIZE_STRENGTH;
dchip->nsels = nsels;
diff --git a/drivers/mtd/nand/raw/diskonchip.c b/drivers/mtd/nand/raw/diskonchip.c
index 43721863a0d8..94432a453e5e 100644
--- a/drivers/mtd/nand/raw/diskonchip.c
+++ b/drivers/mtd/nand/raw/diskonchip.c
@@ -1456,7 +1456,8 @@ static int __init doc_probe(unsigned long physadr)
nand->ecc.calculate = doc200x_calculate_ecc;
nand->ecc.correct = doc200x_correct_data;
- nand->ecc.mode = NAND_ECC_HW_SYNDROME;
+ nand->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
+ nand->ecc.placement = NAND_ECC_PLACEMENT_INTERLEAVED;
nand->ecc.size = 512;
nand->ecc.bytes = 6;
nand->ecc.strength = 2;
diff --git a/drivers/mtd/nand/raw/fsl_elbc_nand.c b/drivers/mtd/nand/raw/fsl_elbc_nand.c
index 088692b2e27a..b2af7f81fdf8 100644
--- a/drivers/mtd/nand/raw/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/raw/fsl_elbc_nand.c
@@ -244,7 +244,7 @@ static int fsl_elbc_run_command(struct mtd_info *mtd)
return -EIO;
}
- if (chip->ecc.mode != NAND_ECC_HW)
+ if (chip->ecc.engine_type != NAND_ECC_ENGINE_TYPE_ON_HOST)
return 0;
elbc_fcm_ctrl->max_bitflips = 0;
@@ -727,12 +727,12 @@ static int fsl_elbc_attach_chip(struct nand_chip *chip)
struct fsl_lbc_regs __iomem *lbc = ctrl->regs;
unsigned int al;
- switch (chip->ecc.mode) {
+ switch (chip->ecc.engine_type) {
/*
* if ECC was not chosen in DT, decide whether to use HW or SW ECC from
* CS Base Register
*/
- case NAND_ECC_NONE:
+ case NAND_ECC_ENGINE_TYPE_NONE:
/* If CS Base Register selects full hardware ECC then use it */
if ((in_be32(&lbc->bank[priv->bank].br) & BR_DECC) ==
BR_DECC_CHK_GEN) {
@@ -740,23 +740,23 @@ static int fsl_elbc_attach_chip(struct nand_chip *chip)
chip->ecc.write_page = fsl_elbc_write_page;
chip->ecc.write_subpage = fsl_elbc_write_subpage;
- chip->ecc.mode = NAND_ECC_HW;
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
mtd_set_ooblayout(mtd, &fsl_elbc_ooblayout_ops);
chip->ecc.size = 512;
chip->ecc.bytes = 3;
chip->ecc.strength = 1;
} else {
/* otherwise fall back to default software ECC */
- chip->ecc.mode = NAND_ECC_SOFT;
- chip->ecc.algo = NAND_ECC_HAMMING;
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+ chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
}
break;
/* if SW ECC was chosen in DT, we do not need to set anything here */
- case NAND_ECC_SOFT:
+ case NAND_ECC_ENGINE_TYPE_SOFT:
break;
- /* should we also implement NAND_ECC_HW to do as the code above? */
+ /* should we also implement *_ECC_ENGINE_CONTROLLER to do as above? */
default:
return -EINVAL;
}
@@ -786,8 +786,8 @@ static int fsl_elbc_attach_chip(struct nand_chip *chip)
chip->page_shift);
dev_dbg(priv->dev, "fsl_elbc_init: nand->phys_erase_shift = %d\n",
chip->phys_erase_shift);
- dev_dbg(priv->dev, "fsl_elbc_init: nand->ecc.mode = %d\n",
- chip->ecc.mode);
+ dev_dbg(priv->dev, "fsl_elbc_init: nand->ecc.engine_type = %d\n",
+ chip->ecc.engine_type);
dev_dbg(priv->dev, "fsl_elbc_init: nand->ecc.steps = %d\n",
chip->ecc.steps);
dev_dbg(priv->dev, "fsl_elbc_init: nand->ecc.bytes = %d\n",
diff --git a/drivers/mtd/nand/raw/fsl_ifc_nand.c b/drivers/mtd/nand/raw/fsl_ifc_nand.c
index 00ae7a910b03..0e7a9b64301e 100644
--- a/drivers/mtd/nand/raw/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/raw/fsl_ifc_nand.c
@@ -309,7 +309,7 @@ static void fsl_ifc_cmdfunc(struct nand_chip *chip, unsigned int command,
ifc_nand_ctrl->read_bytes = mtd->writesize + mtd->oobsize;
ifc_nand_ctrl->index += column;
- if (chip->ecc.mode == NAND_ECC_HW)
+ if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_ON_HOST)
ifc_nand_ctrl->eccread = 1;
fsl_ifc_do_read(chip, 0, mtd);
@@ -724,8 +724,8 @@ static int fsl_ifc_attach_chip(struct nand_chip *chip)
chip->page_shift);
dev_dbg(priv->dev, "%s: nand->phys_erase_shift = %d\n", __func__,
chip->phys_erase_shift);
- dev_dbg(priv->dev, "%s: nand->ecc.mode = %d\n", __func__,
- chip->ecc.mode);
+ dev_dbg(priv->dev, "%s: nand->ecc.engine_type = %d\n", __func__,
+ chip->ecc.engine_type);
dev_dbg(priv->dev, "%s: nand->ecc.steps = %d\n", __func__,
chip->ecc.steps);
dev_dbg(priv->dev, "%s: nand->ecc.bytes = %d\n", __func__,
@@ -912,7 +912,7 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv)
/* Must also set CSOR_NAND_ECC_ENC_EN if DEC_EN set */
if (csor & CSOR_NAND_ECC_DEC_EN) {
- chip->ecc.mode = NAND_ECC_HW;
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
mtd_set_ooblayout(mtd, &fsl_ifc_ooblayout_ops);
/* Hardware generates ECC per 512 Bytes */
@@ -925,8 +925,8 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv)
chip->ecc.strength = 8;
}
} else {
- chip->ecc.mode = NAND_ECC_SOFT;
- chip->ecc.algo = NAND_ECC_HAMMING;
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+ chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
}
ret = fsl_ifc_sram_init(priv);
diff --git a/drivers/mtd/nand/raw/fsl_upm.c b/drivers/mtd/nand/raw/fsl_upm.c
index 197850aeb261..d5813b9abc8e 100644
--- a/drivers/mtd/nand/raw/fsl_upm.c
+++ b/drivers/mtd/nand/raw/fsl_upm.c
@@ -47,8 +47,8 @@ static int fun_chip_init(struct fsl_upm_nand *fun,
int ret;
struct device_node *flash_np;
- fun->chip.ecc.mode = NAND_ECC_SOFT;
- fun->chip.ecc.algo = NAND_ECC_HAMMING;
+ fun->chip.ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+ fun->chip.ecc.algo = NAND_ECC_ALGO_HAMMING;
fun->chip.controller = &fun->base;
mtd->dev.parent = fun->dev;
diff --git a/drivers/mtd/nand/raw/fsmc_nand.c b/drivers/mtd/nand/raw/fsmc_nand.c
index 92ddc41d0ff0..4191831df182 100644
--- a/drivers/mtd/nand/raw/fsmc_nand.c
+++ b/drivers/mtd/nand/raw/fsmc_nand.c
@@ -900,8 +900,8 @@ static int fsmc_nand_attach_chip(struct nand_chip *nand)
return 0;
}
- switch (nand->ecc.mode) {
- case NAND_ECC_HW:
+ switch (nand->ecc.engine_type) {
+ case NAND_ECC_ENGINE_TYPE_ON_HOST:
dev_info(host->dev, "Using 1-bit HW ECC scheme\n");
nand->ecc.calculate = fsmc_read_hwecc_ecc1;
nand->ecc.correct = nand_correct_data;
@@ -910,14 +910,14 @@ static int fsmc_nand_attach_chip(struct nand_chip *nand)
nand->ecc.options |= NAND_ECC_SOFT_HAMMING_SM_ORDER;
break;
- case NAND_ECC_SOFT:
- if (nand->ecc.algo == NAND_ECC_BCH) {
+ case NAND_ECC_ENGINE_TYPE_SOFT:
+ if (nand->ecc.algo == NAND_ECC_ALGO_BCH) {
dev_info(host->dev,
"Using 4-bit SW BCH ECC scheme\n");
break;
}
- case NAND_ECC_ON_DIE:
+ case NAND_ECC_ENGINE_TYPE_ON_DIE:
break;
default:
@@ -929,7 +929,7 @@ static int fsmc_nand_attach_chip(struct nand_chip *nand)
* Don't set layout for BCH4 SW ECC. This will be
* generated later in nand_bch_init() later.
*/
- if (nand->ecc.mode == NAND_ECC_HW) {
+ if (nand->ecc.engine_type == NAND_ECC_ENGINE_TYPE_ON_HOST) {
switch (mtd->oobsize) {
case 16:
case 64:
@@ -1059,7 +1059,7 @@ static int __init fsmc_nand_probe(struct platform_device *pdev)
* Setup default ECC mode. nand_dt_init() called from nand_scan_ident()
* can overwrite this value if the DT provides a different value.
*/
- nand->ecc.mode = NAND_ECC_HW;
+ nand->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
nand->ecc.hwctl = fsmc_enable_hwecc;
nand->ecc.size = 512;
nand->badblockbits = 7;
diff --git a/drivers/mtd/nand/raw/gpio.c b/drivers/mtd/nand/raw/gpio.c
index 3bd847ccc3f3..4ec0a1e10867 100644
--- a/drivers/mtd/nand/raw/gpio.c
+++ b/drivers/mtd/nand/raw/gpio.c
@@ -342,8 +342,8 @@ static int gpio_nand_probe(struct platform_device *pdev)
gpiomtd->base.ops = &gpio_nand_ops;
nand_set_flash_node(chip, pdev->dev.of_node);
- chip->ecc.mode = NAND_ECC_SOFT;
- chip->ecc.algo = NAND_ECC_HAMMING;
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+ chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
chip->options = gpiomtd->plat.options;
chip->controller = &gpiomtd->base;
diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
index 5d4aee46cc55..dc8104e67506 100644
--- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
@@ -272,8 +272,8 @@ static int set_geometry_by_ecc_info(struct gpmi_nand_data *this,
default:
dev_err(this->dev,
"unsupported nand chip. ecc bits : %d, ecc size : %d\n",
- chip->base.eccreq.strength,
- chip->base.eccreq.step_size);
+ nanddev_get_ecc_requirements(&chip->base)->strength,
+ nanddev_get_ecc_requirements(&chip->base)->step_size);
return -EINVAL;
}
geo->ecc_chunk_size = ecc_step;
@@ -510,6 +510,8 @@ static int legacy_set_geometry(struct gpmi_nand_data *this)
static int common_nfc_set_geometry(struct gpmi_nand_data *this)
{
struct nand_chip *chip = &this->nand;
+ const struct nand_ecc_props *requirements =
+ nanddev_get_ecc_requirements(&chip->base);
if (chip->ecc.strength > 0 && chip->ecc.size > 0)
return set_geometry_by_ecc_info(this, chip->ecc.strength,
@@ -517,13 +519,12 @@ static int common_nfc_set_geometry(struct gpmi_nand_data *this)
if ((of_property_read_bool(this->dev->of_node, "fsl,use-minimum-ecc"))
|| legacy_set_geometry(this)) {
- if (!(chip->base.eccreq.strength > 0 &&
- chip->base.eccreq.step_size > 0))
+ if (!(requirements->strength > 0 && requirements->step_size > 0))
return -EINVAL;
return set_geometry_by_ecc_info(this,
- chip->base.eccreq.strength,
- chip->base.eccreq.step_size);
+ requirements->strength,
+ requirements->step_size);
}
return 0;
@@ -1003,10 +1004,8 @@ static int acquire_dma_channels(struct gpmi_nand_data *this)
/* request dma channel */
dma_chan = dma_request_chan(&pdev->dev, "rx-tx");
if (IS_ERR(dma_chan)) {
- ret = PTR_ERR(dma_chan);
- if (ret != -EPROBE_DEFER)
- dev_err(this->dev, "DMA channel request failed: %d\n",
- ret);
+ ret = dev_err_probe(this->dev, PTR_ERR(dma_chan),
+ "DMA channel request failed\n");
release_dma_channels(this);
} else {
this->dma_chans[0] = dma_chan;
@@ -2032,7 +2031,7 @@ static int gpmi_init_last(struct gpmi_nand_data *this)
ecc->write_page_raw = gpmi_ecc_write_page_raw;
ecc->read_oob_raw = gpmi_ecc_read_oob_raw;
ecc->write_oob_raw = gpmi_ecc_write_oob_raw;
- ecc->mode = NAND_ECC_HW;
+ ecc->engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
ecc->size = bch_geo->ecc_chunk_size;
ecc->strength = bch_geo->ecc_strength;
mtd_set_ooblayout(mtd, &gpmi_ooblayout_ops);
diff --git a/drivers/mtd/nand/raw/hisi504_nand.c b/drivers/mtd/nand/raw/hisi504_nand.c
index b84238e2268a..8b2122ce6ec3 100644
--- a/drivers/mtd/nand/raw/hisi504_nand.c
+++ b/drivers/mtd/nand/raw/hisi504_nand.c
@@ -186,7 +186,7 @@ static void hisi_nfc_dma_transfer(struct hinfc_host *host, int todev)
hinfc_write(host, host->dma_buffer, HINFC504_DMA_ADDR_DATA);
hinfc_write(host, host->dma_oob, HINFC504_DMA_ADDR_OOB);
- if (chip->ecc.mode == NAND_ECC_NONE) {
+ if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_NONE) {
hinfc_write(host, ((mtd->oobsize & HINFC504_DMA_LEN_OOB_MASK)
<< HINFC504_DMA_LEN_OOB_SHIFT), HINFC504_DMA_LEN);
@@ -468,7 +468,7 @@ static void hisi_nfc_cmdfunc(struct nand_chip *chip, unsigned command,
case NAND_CMD_STATUS:
flag = hinfc_read(host, HINFC504_CON);
- if (chip->ecc.mode == NAND_ECC_HW)
+ if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_ON_HOST)
hinfc_write(host,
flag & ~(HINFC504_CON_ECCTYPE_MASK <<
HINFC504_CON_ECCTYPE_SHIFT), HINFC504_CON);
@@ -721,7 +721,7 @@ static int hisi_nfc_attach_chip(struct nand_chip *chip)
}
hinfc_write(host, flag, HINFC504_CON);
- if (chip->ecc.mode == NAND_ECC_HW)
+ if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_ON_HOST)
hisi_nfc_ecc_probe(host);
return 0;
diff --git a/drivers/mtd/nand/raw/ingenic/ingenic_nand_drv.c b/drivers/mtd/nand/raw/ingenic/ingenic_nand_drv.c
index 69423bb29adb..0e9d426fe4f2 100644
--- a/drivers/mtd/nand/raw/ingenic/ingenic_nand_drv.c
+++ b/drivers/mtd/nand/raw/ingenic/ingenic_nand_drv.c
@@ -194,8 +194,8 @@ static int ingenic_nand_attach_chip(struct nand_chip *chip)
(chip->ecc.strength / 8);
}
- switch (chip->ecc.mode) {
- case NAND_ECC_HW:
+ switch (chip->ecc.engine_type) {
+ case NAND_ECC_ENGINE_TYPE_ON_HOST:
if (!nfc->ecc) {
dev_err(nfc->dev, "HW ECC selected, but ECC controller not found\n");
return -ENODEV;
@@ -205,22 +205,22 @@ static int ingenic_nand_attach_chip(struct nand_chip *chip)
chip->ecc.calculate = ingenic_nand_ecc_calculate;
chip->ecc.correct = ingenic_nand_ecc_correct;
fallthrough;
- case NAND_ECC_SOFT:
+ case NAND_ECC_ENGINE_TYPE_SOFT:
dev_info(nfc->dev, "using %s (strength %d, size %d, bytes %d)\n",
(nfc->ecc) ? "hardware ECC" : "software ECC",
chip->ecc.strength, chip->ecc.size, chip->ecc.bytes);
break;
- case NAND_ECC_NONE:
+ case NAND_ECC_ENGINE_TYPE_NONE:
dev_info(nfc->dev, "not using ECC\n");
break;
default:
dev_err(nfc->dev, "ECC mode %d not supported\n",
- chip->ecc.mode);
+ chip->ecc.engine_type);
return -EINVAL;
}
/* The NAND core will generate the ECC layout for SW ECC */
- if (chip->ecc.mode != NAND_ECC_HW)
+ if (chip->ecc.engine_type != NAND_ECC_ENGINE_TYPE_ON_HOST)
return 0;
/* Generate ECC layout. ECC codes are right aligned in the OOB area. */
@@ -243,8 +243,10 @@ static int ingenic_nand_attach_chip(struct nand_chip *chip)
/* For legacy reasons we use a different layout on the qi,lb60 board. */
if (of_machine_is_compatible("qi,lb60"))
mtd_set_ooblayout(mtd, &qi_lb60_ooblayout_ops);
- else
+ else if (nfc->soc_info->oob_layout)
mtd_set_ooblayout(mtd, nfc->soc_info->oob_layout);
+ else
+ mtd_set_ooblayout(mtd, nand_get_large_page_ooblayout());
return 0;
}
@@ -404,7 +406,7 @@ static int ingenic_nand_init_chip(struct platform_device *pdev,
mtd->dev.parent = dev;
chip->options = NAND_NO_SUBPAGE_WRITE;
- chip->ecc.mode = NAND_ECC_HW;
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
chip->controller = &nfc->controller;
nand_set_flash_node(chip, np);
@@ -532,7 +534,6 @@ static const struct jz_soc_info jz4740_soc_info = {
.data_offset = 0x00000000,
.cmd_offset = 0x00008000,
.addr_offset = 0x00010000,
- .oob_layout = &nand_ooblayout_lp_ops,
};
static const struct jz_soc_info jz4725b_soc_info = {
@@ -546,7 +547,6 @@ static const struct jz_soc_info jz4780_soc_info = {
.data_offset = 0x00000000,
.cmd_offset = 0x00400000,
.addr_offset = 0x00800000,
- .oob_layout = &nand_ooblayout_lp_ops,
};
static const struct of_device_id ingenic_nand_dt_match[] = {
diff --git a/drivers/mtd/nand/raw/lpc32xx_mlc.c b/drivers/mtd/nand/raw/lpc32xx_mlc.c
index 7521038af2ef..4940bb2e3c07 100644
--- a/drivers/mtd/nand/raw/lpc32xx_mlc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_mlc.c
@@ -656,7 +656,7 @@ static int lpc32xx_nand_attach_chip(struct nand_chip *chip)
if (!host->dummy_buf)
return -ENOMEM;
- chip->ecc.mode = NAND_ECC_HW;
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
chip->ecc.size = 512;
mtd_set_ooblayout(mtd, &lpc32xx_ooblayout_ops);
host->mlcsubpages = mtd->writesize / 512;
diff --git a/drivers/mtd/nand/raw/lpc32xx_slc.c b/drivers/mtd/nand/raw/lpc32xx_slc.c
index b151fd000815..6db9d2ed6881 100644
--- a/drivers/mtd/nand/raw/lpc32xx_slc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_slc.c
@@ -881,7 +881,8 @@ static int lpc32xx_nand_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, host);
/* NAND callbacks for LPC32xx SLC hardware */
- chip->ecc.mode = NAND_ECC_HW_SYNDROME;
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
+ chip->ecc.placement = NAND_ECC_PLACEMENT_INTERLEAVED;
chip->legacy.read_byte = lpc32xx_nand_read_byte;
chip->legacy.read_buf = lpc32xx_nand_read_buf;
chip->legacy.write_buf = lpc32xx_nand_write_buf;
diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c
index 8482d3bd8b1f..f5ca2002d08e 100644
--- a/drivers/mtd/nand/raw/marvell_nand.c
+++ b/drivers/mtd/nand/raw/marvell_nand.c
@@ -227,6 +227,8 @@
#define XTYPE_MASK 7
/**
+ * struct marvell_hw_ecc_layout - layout of Marvell ECC
+ *
* Marvell ECC engine works differently than the others, in order to limit the
* size of the IP, hardware engineers chose to set a fixed strength at 16 bits
* per subpage, and depending on a the desired strength needed by the NAND chip,
@@ -292,6 +294,8 @@ static const struct marvell_hw_ecc_layout marvell_nfc_layouts[] = {
};
/**
+ * struct marvell_nand_chip_sel - CS line description
+ *
* The Nand Flash Controller has up to 4 CE and 2 RB pins. The CE selection
* is made by a field in NDCB0 register, and in another field in NDCB2 register.
* The datasheet describes the logic with an error: ADDR5 field is once
@@ -312,14 +316,15 @@ struct marvell_nand_chip_sel {
};
/**
- * NAND chip structure: stores NAND chip device related information
+ * struct marvell_nand_chip - stores NAND chip device related information
*
* @chip: Base NAND chip structure
* @node: Used to store NAND chips into a list
- * @layout NAND layout when using hardware ECC
+ * @layout: NAND layout when using hardware ECC
* @ndcr: Controller register value for this NAND chip
* @ndtr0: Timing registers 0 value for this NAND chip
* @ndtr1: Timing registers 1 value for this NAND chip
+ * @addr_cyc: Amount of cycles needed to pass column address
* @selected_die: Current active CS
* @nsels: Number of CS lines required by the NAND chip
* @sels: Array of CS lines descriptions
@@ -349,7 +354,8 @@ static inline struct marvell_nand_chip_sel *to_nand_sel(struct marvell_nand_chip
}
/**
- * NAND controller capabilities for distinction between compatible strings
+ * struct marvell_nfc_caps - NAND controller capabilities for distinction
+ * between compatible strings
*
* @max_cs_nb: Number of Chip Select lines available
* @max_rb_nb: Number of Ready/Busy lines available
@@ -372,7 +378,7 @@ struct marvell_nfc_caps {
};
/**
- * NAND controller structure: stores Marvell NAND controller information
+ * struct marvell_nfc - stores Marvell NAND controller information
*
* @controller: Base controller structure
* @dev: Parent device (used to print error messages)
@@ -383,7 +389,9 @@ struct marvell_nfc_caps {
* @assigned_cs: Bitmask describing already assigned CS lines
* @chips: List containing all the NAND chips attached to
* this NAND controller
+ * @selected_chip: Currently selected target chip
* @caps: NAND controller capabilities for each compatible string
+ * @use_dma: Whetner DMA is used
* @dma_chan: DMA channel (NFCv1 only)
* @dma_buf: 32-bit aligned buffer for DMA transfers (NFCv1 only)
*/
@@ -411,7 +419,8 @@ static inline struct marvell_nfc *to_marvell_nfc(struct nand_controller *ctrl)
}
/**
- * NAND controller timings expressed in NAND Controller clock cycles
+ * struct marvell_nfc_timings - NAND controller timings expressed in NAND
+ * Controller clock cycles
*
* @tRP: ND_nRE pulse width
* @tRH: ND_nRE high duration
@@ -455,8 +464,8 @@ struct marvell_nfc_timings {
period_ns))
/**
- * NAND driver structure filled during the parsing of the ->exec_op() subop
- * subset of instructions.
+ * struct marvell_nfc_op - filled during the parsing of the ->exec_op()
+ * subop subset of instructions.
*
* @ndcb: Array of values written to NDCBx registers
* @cle_ale_delay_ns: Optional delay after the last CMD or ADDR cycle
@@ -685,9 +694,31 @@ static int marvell_nfc_wait_cmdd(struct nand_chip *chip)
return marvell_nfc_end_cmd(chip, cs_flag, "CMDD");
}
+static int marvell_nfc_poll_status(struct marvell_nfc *nfc, u32 mask,
+ u32 expected_val, unsigned long timeout_ms)
+{
+ unsigned long limit;
+ u32 st;
+
+ limit = jiffies + msecs_to_jiffies(timeout_ms);
+ do {
+ st = readl_relaxed(nfc->regs + NDSR);
+ if (st & NDSR_RDY(1))
+ st |= NDSR_RDY(0);
+
+ if ((st & mask) == expected_val)
+ return 0;
+
+ cpu_relax();
+ } while (time_after(limit, jiffies));
+
+ return -ETIMEDOUT;
+}
+
static int marvell_nfc_wait_op(struct nand_chip *chip, unsigned int timeout_ms)
{
struct marvell_nfc *nfc = to_marvell_nfc(chip->controller);
+ struct mtd_info *mtd = nand_to_mtd(chip);
u32 pending;
int ret;
@@ -695,12 +726,18 @@ static int marvell_nfc_wait_op(struct nand_chip *chip, unsigned int timeout_ms)
if (!timeout_ms)
timeout_ms = IRQ_TIMEOUT;
- init_completion(&nfc->complete);
+ if (mtd->oops_panic_write) {
+ ret = marvell_nfc_poll_status(nfc, NDSR_RDY(0),
+ NDSR_RDY(0),
+ timeout_ms);
+ } else {
+ init_completion(&nfc->complete);
- marvell_nfc_enable_int(nfc, NDCR_RDYM);
- ret = wait_for_completion_timeout(&nfc->complete,
- msecs_to_jiffies(timeout_ms));
- marvell_nfc_disable_int(nfc, NDCR_RDYM);
+ marvell_nfc_enable_int(nfc, NDCR_RDYM);
+ ret = wait_for_completion_timeout(&nfc->complete,
+ msecs_to_jiffies(timeout_ms));
+ marvell_nfc_disable_int(nfc, NDCR_RDYM);
+ }
pending = marvell_nfc_clear_int(nfc, NDSR_RDY(0) | NDSR_RDY(1));
/*
@@ -780,7 +817,7 @@ static void marvell_nfc_enable_hw_ecc(struct nand_chip *chip)
* When enabling BCH, set threshold to 0 to always know the
* number of corrected bitflips.
*/
- if (chip->ecc.algo == NAND_ECC_BCH)
+ if (chip->ecc.algo == NAND_ECC_ALGO_BCH)
writel_relaxed(NDECCCTRL_BCH_EN, nfc->regs + NDECCCTRL);
}
}
@@ -792,7 +829,7 @@ static void marvell_nfc_disable_hw_ecc(struct nand_chip *chip)
if (ndcr & NDCR_ECC_EN) {
writel_relaxed(ndcr & ~NDCR_ECC_EN, nfc->regs + NDCR);
- if (chip->ecc.algo == NAND_ECC_BCH)
+ if (chip->ecc.algo == NAND_ECC_ALGO_BCH)
writel_relaxed(0, nfc->regs + NDECCCTRL);
}
}
@@ -966,7 +1003,7 @@ static int marvell_nfc_hw_ecc_check_bitflips(struct nand_chip *chip,
if (ndsr & NDSR_CORERR) {
writel_relaxed(ndsr, nfc->regs + NDSR);
- if (chip->ecc.algo == NAND_ECC_BCH)
+ if (chip->ecc.algo == NAND_ECC_ALGO_BCH)
bf = NDSR_ERRCNT(ndsr);
else
bf = 1;
@@ -2218,7 +2255,7 @@ static int marvell_nand_hw_ecc_controller_init(struct mtd_info *mtd,
ecc->size = l->data_bytes;
if (ecc->strength == 1) {
- chip->ecc.algo = NAND_ECC_HAMMING;
+ chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
ecc->read_page_raw = marvell_nfc_hw_ecc_hmg_read_page_raw;
ecc->read_page = marvell_nfc_hw_ecc_hmg_read_page;
ecc->read_oob_raw = marvell_nfc_hw_ecc_hmg_read_oob_raw;
@@ -2228,7 +2265,7 @@ static int marvell_nand_hw_ecc_controller_init(struct mtd_info *mtd,
ecc->write_oob_raw = marvell_nfc_hw_ecc_hmg_write_oob_raw;
ecc->write_oob = ecc->write_oob_raw;
} else {
- chip->ecc.algo = NAND_ECC_BCH;
+ chip->ecc.algo = NAND_ECC_ALGO_BCH;
ecc->strength = 16;
ecc->read_page_raw = marvell_nfc_hw_ecc_bch_read_page_raw;
ecc->read_page = marvell_nfc_hw_ecc_bch_read_page;
@@ -2247,13 +2284,16 @@ static int marvell_nand_ecc_init(struct mtd_info *mtd,
struct nand_ecc_ctrl *ecc)
{
struct nand_chip *chip = mtd_to_nand(mtd);
+ const struct nand_ecc_props *requirements =
+ nanddev_get_ecc_requirements(&chip->base);
struct marvell_nfc *nfc = to_marvell_nfc(chip->controller);
int ret;
- if (ecc->mode != NAND_ECC_NONE && (!ecc->size || !ecc->strength)) {
- if (chip->base.eccreq.step_size && chip->base.eccreq.strength) {
- ecc->size = chip->base.eccreq.step_size;
- ecc->strength = chip->base.eccreq.strength;
+ if (ecc->engine_type != NAND_ECC_ENGINE_TYPE_NONE &&
+ (!ecc->size || !ecc->strength)) {
+ if (requirements->step_size && requirements->strength) {
+ ecc->size = requirements->step_size;
+ ecc->strength = requirements->strength;
} else {
dev_info(nfc->dev,
"No minimum ECC strength, using 1b/512B\n");
@@ -2262,15 +2302,15 @@ static int marvell_nand_ecc_init(struct mtd_info *mtd,
}
}
- switch (ecc->mode) {
- case NAND_ECC_HW:
+ switch (ecc->engine_type) {
+ case NAND_ECC_ENGINE_TYPE_ON_HOST:
ret = marvell_nand_hw_ecc_controller_init(mtd, ecc);
if (ret)
return ret;
break;
- case NAND_ECC_NONE:
- case NAND_ECC_SOFT:
- case NAND_ECC_ON_DIE:
+ case NAND_ECC_ENGINE_TYPE_NONE:
+ case NAND_ECC_ENGINE_TYPE_SOFT:
+ case NAND_ECC_ENGINE_TYPE_ON_DIE:
if (!nfc->caps->is_nfcv2 && mtd->writesize != SZ_512 &&
mtd->writesize != SZ_2K) {
dev_err(nfc->dev, "NFCv1 cannot write %d bytes pages\n",
@@ -2467,7 +2507,7 @@ static int marvell_nand_attach_chip(struct nand_chip *chip)
return ret;
}
- if (chip->ecc.mode == NAND_ECC_HW) {
+ if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_ON_HOST) {
/*
* Subpage write not available with hardware ECC, prohibit also
* subpage read as in userspace subpage access would still be
@@ -2642,7 +2682,7 @@ static int marvell_nand_chip_init(struct device *dev, struct marvell_nfc *nfc,
* Default to HW ECC engine mode. If the nand-ecc-mode property is given
* in the DT node, this entry will be overwritten in nand_scan_ident().
*/
- chip->ecc.mode = NAND_ECC_HW;
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
/*
* Save a reference value for timing registers before
@@ -2759,10 +2799,7 @@ static int marvell_nfc_init_dma(struct marvell_nfc *nfc)
if (IS_ERR(nfc->dma_chan)) {
ret = PTR_ERR(nfc->dma_chan);
nfc->dma_chan = NULL;
- if (ret != -EPROBE_DEFER)
- dev_err(nfc->dev, "DMA channel request failed: %d\n",
- ret);
- return ret;
+ return dev_err_probe(nfc->dev, ret, "DMA channel request failed\n");
}
r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
diff --git a/drivers/mtd/nand/raw/meson_nand.c b/drivers/mtd/nand/raw/meson_nand.c
index 0e5829a2b54f..48e6dac96be6 100644
--- a/drivers/mtd/nand/raw/meson_nand.c
+++ b/drivers/mtd/nand/raw/meson_nand.c
@@ -1197,7 +1197,7 @@ static int meson_nand_attach_chip(struct nand_chip *nand)
if (ret)
return -EINVAL;
- nand->ecc.mode = NAND_ECC_HW;
+ nand->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
nand->ecc.write_page_raw = meson_nfc_write_page_raw;
nand->ecc.write_page = meson_nfc_write_page_hwecc;
nand->ecc.write_oob_raw = nand_write_oob_std;
diff --git a/drivers/mtd/nand/raw/mpc5121_nfc.c b/drivers/mtd/nand/raw/mpc5121_nfc.c
index 18ecb096a32d..dfd0d3ed5ed0 100644
--- a/drivers/mtd/nand/raw/mpc5121_nfc.c
+++ b/drivers/mtd/nand/raw/mpc5121_nfc.c
@@ -688,8 +688,8 @@ static int mpc5121_nfc_probe(struct platform_device *op)
chip->legacy.set_features = nand_get_set_features_notsupp;
chip->legacy.get_features = nand_get_set_features_notsupp;
chip->bbt_options = NAND_BBT_USE_FLASH;
- chip->ecc.mode = NAND_ECC_SOFT;
- chip->ecc.algo = NAND_ECC_HAMMING;
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+ chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
/* Support external chip-select logic on ADS5121 board */
if (of_machine_is_compatible("fsl,mpc5121ads")) {
diff --git a/drivers/mtd/nand/raw/mtk_nand.c b/drivers/mtd/nand/raw/mtk_nand.c
index ad1b55dab211..57f1f1708994 100644
--- a/drivers/mtd/nand/raw/mtk_nand.c
+++ b/drivers/mtd/nand/raw/mtk_nand.c
@@ -1253,21 +1253,23 @@ static int mtk_nfc_set_spare_per_sector(u32 *sps, struct mtd_info *mtd)
static int mtk_nfc_ecc_init(struct device *dev, struct mtd_info *mtd)
{
struct nand_chip *nand = mtd_to_nand(mtd);
+ const struct nand_ecc_props *requirements =
+ nanddev_get_ecc_requirements(&nand->base);
struct mtk_nfc *nfc = nand_get_controller_data(nand);
u32 spare;
int free, ret;
/* support only ecc hw mode */
- if (nand->ecc.mode != NAND_ECC_HW) {
- dev_err(dev, "ecc.mode not supported\n");
+ if (nand->ecc.engine_type != NAND_ECC_ENGINE_TYPE_ON_HOST) {
+ dev_err(dev, "ecc.engine_type not supported\n");
return -EINVAL;
}
/* if optional dt settings not present */
if (!nand->ecc.size || !nand->ecc.strength) {
/* use datasheet requirements */
- nand->ecc.strength = nand->base.eccreq.strength;
- nand->ecc.size = nand->base.eccreq.step_size;
+ nand->ecc.strength = requirements->strength;
+ nand->ecc.size = requirements->step_size;
/*
* align eccstrength and eccsize
@@ -1416,7 +1418,7 @@ static int mtk_nfc_nand_chip_init(struct device *dev, struct mtk_nfc *nfc,
nand->options |= NAND_USES_DMA | NAND_SUBPAGE_READ;
/* set default mode in case dt entry is missing */
- nand->ecc.mode = NAND_ECC_HW;
+ nand->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
nand->ecc.write_subpage = mtk_nfc_write_subpage_hwecc;
nand->ecc.write_page_raw = mtk_nfc_write_page_raw;
diff --git a/drivers/mtd/nand/raw/mxc_nand.c b/drivers/mtd/nand/raw/mxc_nand.c
index a043d76b48cb..d4200eb2ad32 100644
--- a/drivers/mtd/nand/raw/mxc_nand.c
+++ b/drivers/mtd/nand/raw/mxc_nand.c
@@ -669,7 +669,7 @@ static void mxc_nand_enable_hwecc_v1_v2(struct nand_chip *chip, bool enable)
struct mxc_nand_host *host = nand_get_controller_data(chip);
uint16_t config1;
- if (chip->ecc.mode != NAND_ECC_HW)
+ if (chip->ecc.engine_type != NAND_ECC_ENGINE_TYPE_ON_HOST)
return;
config1 = readw(NFC_V1_V2_CONFIG1);
@@ -687,7 +687,7 @@ static void mxc_nand_enable_hwecc_v3(struct nand_chip *chip, bool enable)
struct mxc_nand_host *host = nand_get_controller_data(chip);
uint32_t config2;
- if (chip->ecc.mode != NAND_ECC_HW)
+ if (chip->ecc.engine_type != NAND_ECC_ENGINE_TYPE_ON_HOST)
return;
config2 = readl(NFC_V3_CONFIG2);
@@ -1117,7 +1117,8 @@ static void preset_v1(struct mtd_info *mtd)
struct mxc_nand_host *host = nand_get_controller_data(nand_chip);
uint16_t config1 = 0;
- if (nand_chip->ecc.mode == NAND_ECC_HW && mtd->writesize)
+ if (nand_chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_ON_HOST &&
+ mtd->writesize)
config1 |= NFC_V1_V2_CONFIG1_ECC_EN;
if (!host->devtype_data->irqpending_quirk)
@@ -1227,7 +1228,7 @@ static void preset_v2(struct mtd_info *mtd)
if (mtd->writesize) {
uint16_t pages_per_block = mtd->erasesize / mtd->writesize;
- if (nand_chip->ecc.mode == NAND_ECC_HW)
+ if (nand_chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_ON_HOST)
config1 |= NFC_V1_V2_CONFIG1_ECC_EN;
host->eccsize = get_eccsize(mtd);
@@ -1303,7 +1304,7 @@ static void preset_v3(struct mtd_info *mtd)
}
if (mtd->writesize) {
- if (chip->ecc.mode == NAND_ECC_HW)
+ if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_ON_HOST)
config2 |= NFC_V3_CONFIG2_ECC_EN;
config2 |= NFC_V3_CONFIG2_PPB(
@@ -1680,8 +1681,8 @@ static int mxcnd_attach_chip(struct nand_chip *chip)
struct mxc_nand_host *host = nand_get_controller_data(chip);
struct device *dev = mtd->dev.parent;
- switch (chip->ecc.mode) {
- case NAND_ECC_HW:
+ switch (chip->ecc.engine_type) {
+ case NAND_ECC_ENGINE_TYPE_ON_HOST:
chip->ecc.read_page = mxc_nand_read_page;
chip->ecc.read_page_raw = mxc_nand_read_page_raw;
chip->ecc.read_oob = mxc_nand_read_oob;
@@ -1690,7 +1691,7 @@ static int mxcnd_attach_chip(struct nand_chip *chip)
chip->ecc.write_oob = mxc_nand_write_oob;
break;
- case NAND_ECC_SOFT:
+ case NAND_ECC_ENGINE_TYPE_SOFT:
break;
default:
@@ -1728,7 +1729,7 @@ static int mxcnd_attach_chip(struct nand_chip *chip)
*/
host->used_oobsize = min(mtd->oobsize, 218U);
- if (chip->ecc.mode == NAND_ECC_HW) {
+ if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_ON_HOST) {
if (is_imx21_nfc(host) || is_imx27_nfc(host))
chip->ecc.strength = 1;
else
@@ -1843,10 +1844,10 @@ static int mxcnd_probe(struct platform_device *pdev)
mtd_set_ooblayout(mtd, host->devtype_data->ooblayout);
if (host->pdata.hw_ecc) {
- this->ecc.mode = NAND_ECC_HW;
+ this->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
} else {
- this->ecc.mode = NAND_ECC_SOFT;
- this->ecc.algo = NAND_ECC_HAMMING;
+ this->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+ this->ecc.algo = NAND_ECC_ALGO_HAMMING;
}
/* NAND bus width determines access functions used by upper layer */
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 0c768cb88f96..1f0d542d5923 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -34,6 +34,7 @@
#include <linux/mm.h>
#include <linux/types.h>
#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
#include <linux/mtd/nand_ecc.h>
#include <linux/mtd/nand_bch.h>
#include <linux/interrupt.h>
@@ -45,166 +46,6 @@
#include "internals.h"
-/* Define default oob placement schemes for large and small page devices */
-static int nand_ooblayout_ecc_sp(struct mtd_info *mtd, int section,
- struct mtd_oob_region *oobregion)
-{
- struct nand_chip *chip = mtd_to_nand(mtd);
- struct nand_ecc_ctrl *ecc = &chip->ecc;
-
- if (section > 1)
- return -ERANGE;
-
- if (!section) {
- oobregion->offset = 0;
- if (mtd->oobsize == 16)
- oobregion->length = 4;
- else
- oobregion->length = 3;
- } else {
- if (mtd->oobsize == 8)
- return -ERANGE;
-
- oobregion->offset = 6;
- oobregion->length = ecc->total - 4;
- }
-
- return 0;
-}
-
-static int nand_ooblayout_free_sp(struct mtd_info *mtd, int section,
- struct mtd_oob_region *oobregion)
-{
- if (section > 1)
- return -ERANGE;
-
- if (mtd->oobsize == 16) {
- if (section)
- return -ERANGE;
-
- oobregion->length = 8;
- oobregion->offset = 8;
- } else {
- oobregion->length = 2;
- if (!section)
- oobregion->offset = 3;
- else
- oobregion->offset = 6;
- }
-
- return 0;
-}
-
-const struct mtd_ooblayout_ops nand_ooblayout_sp_ops = {
- .ecc = nand_ooblayout_ecc_sp,
- .free = nand_ooblayout_free_sp,
-};
-EXPORT_SYMBOL_GPL(nand_ooblayout_sp_ops);
-
-static int nand_ooblayout_ecc_lp(struct mtd_info *mtd, int section,
- struct mtd_oob_region *oobregion)
-{
- struct nand_chip *chip = mtd_to_nand(mtd);
- struct nand_ecc_ctrl *ecc = &chip->ecc;
-
- if (section || !ecc->total)
- return -ERANGE;
-
- oobregion->length = ecc->total;
- oobregion->offset = mtd->oobsize - oobregion->length;
-
- return 0;
-}
-
-static int nand_ooblayout_free_lp(struct mtd_info *mtd, int section,
- struct mtd_oob_region *oobregion)
-{
- struct nand_chip *chip = mtd_to_nand(mtd);
- struct nand_ecc_ctrl *ecc = &chip->ecc;
-
- if (section)
- return -ERANGE;
-
- oobregion->length = mtd->oobsize - ecc->total - 2;
- oobregion->offset = 2;
-
- return 0;
-}
-
-const struct mtd_ooblayout_ops nand_ooblayout_lp_ops = {
- .ecc = nand_ooblayout_ecc_lp,
- .free = nand_ooblayout_free_lp,
-};
-EXPORT_SYMBOL_GPL(nand_ooblayout_lp_ops);
-
-/*
- * Support the old "large page" layout used for 1-bit Hamming ECC where ECC
- * are placed at a fixed offset.
- */
-static int nand_ooblayout_ecc_lp_hamming(struct mtd_info *mtd, int section,
- struct mtd_oob_region *oobregion)
-{
- struct nand_chip *chip = mtd_to_nand(mtd);
- struct nand_ecc_ctrl *ecc = &chip->ecc;
-
- if (section)
- return -ERANGE;
-
- switch (mtd->oobsize) {
- case 64:
- oobregion->offset = 40;
- break;
- case 128:
- oobregion->offset = 80;
- break;
- default:
- return -EINVAL;
- }
-
- oobregion->length = ecc->total;
- if (oobregion->offset + oobregion->length > mtd->oobsize)
- return -ERANGE;
-
- return 0;
-}
-
-static int nand_ooblayout_free_lp_hamming(struct mtd_info *mtd, int section,
- struct mtd_oob_region *oobregion)
-{
- struct nand_chip *chip = mtd_to_nand(mtd);
- struct nand_ecc_ctrl *ecc = &chip->ecc;
- int ecc_offset = 0;
-
- if (section < 0 || section > 1)
- return -ERANGE;
-
- switch (mtd->oobsize) {
- case 64:
- ecc_offset = 40;
- break;
- case 128:
- ecc_offset = 80;
- break;
- default:
- return -EINVAL;
- }
-
- if (section == 0) {
- oobregion->offset = 2;
- oobregion->length = ecc_offset - 2;
- } else {
- oobregion->offset = ecc_offset + ecc->total;
- oobregion->length = mtd->oobsize - oobregion->offset;
- }
-
- return 0;
-}
-
-static const struct mtd_ooblayout_ops nand_ooblayout_lp_hamming_ops = {
- .ecc = nand_ooblayout_ecc_lp_hamming,
- .free = nand_ooblayout_free_lp_hamming,
-};
-
static int nand_pairing_dist3_get_info(struct mtd_info *mtd, int page,
struct mtd_pairing_info *info)
{
@@ -4750,6 +4591,8 @@ static inline bool is_full_id_nand(struct nand_flash_dev *type)
static bool find_full_id_nand(struct nand_chip *chip,
struct nand_flash_dev *type)
{
+ struct nand_device *base = &chip->base;
+ struct nand_ecc_props requirements;
struct mtd_info *mtd = nand_to_mtd(chip);
struct nand_memory_organization *memorg;
u8 *id_data = chip->id.data;
@@ -4771,8 +4614,9 @@ static bool find_full_id_nand(struct nand_chip *chip,
memorg->pagesize *
memorg->pages_per_eraseblock);
chip->options |= type->options;
- chip->base.eccreq.strength = NAND_ECC_STRENGTH(type);
- chip->base.eccreq.step_size = NAND_ECC_STEP(type);
+ requirements.strength = NAND_ECC_STRENGTH(type);
+ requirements.step_size = NAND_ECC_STEP(type);
+ nanddev_set_ecc_requirements(base, &requirements);
chip->parameters.model = kstrdup(type->name, GFP_KERNEL);
if (!chip->parameters.model)
@@ -5033,91 +4877,101 @@ free_detect_allocation:
return ret;
}
-static const char * const nand_ecc_modes[] = {
- [NAND_ECC_NONE] = "none",
- [NAND_ECC_SOFT] = "soft",
- [NAND_ECC_HW] = "hw",
- [NAND_ECC_HW_SYNDROME] = "hw_syndrome",
- [NAND_ECC_ON_DIE] = "on-die",
-};
-
-static int of_get_nand_ecc_mode(struct device_node *np)
+static enum nand_ecc_engine_type
+of_get_rawnand_ecc_engine_type_legacy(struct device_node *np)
{
+ enum nand_ecc_legacy_mode {
+ NAND_ECC_INVALID,
+ NAND_ECC_NONE,
+ NAND_ECC_SOFT,
+ NAND_ECC_SOFT_BCH,
+ NAND_ECC_HW,
+ NAND_ECC_HW_SYNDROME,
+ NAND_ECC_ON_DIE,
+ };
+ const char * const nand_ecc_legacy_modes[] = {
+ [NAND_ECC_NONE] = "none",
+ [NAND_ECC_SOFT] = "soft",
+ [NAND_ECC_SOFT_BCH] = "soft_bch",
+ [NAND_ECC_HW] = "hw",
+ [NAND_ECC_HW_SYNDROME] = "hw_syndrome",
+ [NAND_ECC_ON_DIE] = "on-die",
+ };
+ enum nand_ecc_legacy_mode eng_type;
const char *pm;
- int err, i;
+ int err;
err = of_property_read_string(np, "nand-ecc-mode", &pm);
- if (err < 0)
- return err;
-
- for (i = NAND_ECC_NONE; i < ARRAY_SIZE(nand_ecc_modes); i++)
- if (!strcasecmp(pm, nand_ecc_modes[i]))
- return i;
-
- /*
- * For backward compatibility we support few obsoleted values that don't
- * have their mappings into the nand_ecc_mode enum anymore (they were
- * merged with other enums).
- */
- if (!strcasecmp(pm, "soft_bch"))
- return NAND_ECC_SOFT;
+ if (err)
+ return NAND_ECC_ENGINE_TYPE_INVALID;
+
+ for (eng_type = NAND_ECC_NONE;
+ eng_type < ARRAY_SIZE(nand_ecc_legacy_modes); eng_type++) {
+ if (!strcasecmp(pm, nand_ecc_legacy_modes[eng_type])) {
+ switch (eng_type) {
+ case NAND_ECC_NONE:
+ return NAND_ECC_ENGINE_TYPE_NONE;
+ case NAND_ECC_SOFT:
+ case NAND_ECC_SOFT_BCH:
+ return NAND_ECC_ENGINE_TYPE_SOFT;
+ case NAND_ECC_HW:
+ case NAND_ECC_HW_SYNDROME:
+ return NAND_ECC_ENGINE_TYPE_ON_HOST;
+ case NAND_ECC_ON_DIE:
+ return NAND_ECC_ENGINE_TYPE_ON_DIE;
+ default:
+ break;
+ }
+ }
+ }
- return -ENODEV;
+ return NAND_ECC_ENGINE_TYPE_INVALID;
}
-static const char * const nand_ecc_algos[] = {
- [NAND_ECC_HAMMING] = "hamming",
- [NAND_ECC_BCH] = "bch",
- [NAND_ECC_RS] = "rs",
-};
-
-static enum nand_ecc_algo of_get_nand_ecc_algo(struct device_node *np)
+static enum nand_ecc_placement
+of_get_rawnand_ecc_placement_legacy(struct device_node *np)
{
- enum nand_ecc_algo ecc_algo;
const char *pm;
int err;
- err = of_property_read_string(np, "nand-ecc-algo", &pm);
+ err = of_property_read_string(np, "nand-ecc-mode", &pm);
if (!err) {
- for (ecc_algo = NAND_ECC_HAMMING;
- ecc_algo < ARRAY_SIZE(nand_ecc_algos);
- ecc_algo++) {
- if (!strcasecmp(pm, nand_ecc_algos[ecc_algo]))
- return ecc_algo;
- }
+ if (!strcasecmp(pm, "hw_syndrome"))
+ return NAND_ECC_PLACEMENT_INTERLEAVED;
}
- /*
- * For backward compatibility we also read "nand-ecc-mode" checking
- * for some obsoleted values that were specifying ECC algorithm.
- */
+ return NAND_ECC_PLACEMENT_UNKNOWN;
+}
+
+static enum nand_ecc_algo of_get_rawnand_ecc_algo_legacy(struct device_node *np)
+{
+ const char *pm;
+ int err;
+
err = of_property_read_string(np, "nand-ecc-mode", &pm);
if (!err) {
if (!strcasecmp(pm, "soft"))
- return NAND_ECC_HAMMING;
+ return NAND_ECC_ALGO_HAMMING;
else if (!strcasecmp(pm, "soft_bch"))
- return NAND_ECC_BCH;
+ return NAND_ECC_ALGO_BCH;
}
- return NAND_ECC_UNKNOWN;
+ return NAND_ECC_ALGO_UNKNOWN;
}
-static int of_get_nand_ecc_step_size(struct device_node *np)
+static void of_get_nand_ecc_legacy_user_config(struct nand_chip *chip)
{
- int ret;
- u32 val;
+ struct device_node *dn = nand_get_flash_node(chip);
+ struct nand_ecc_props *user_conf = &chip->base.ecc.user_conf;
- ret = of_property_read_u32(np, "nand-ecc-step-size", &val);
- return ret ? ret : val;
-}
+ if (user_conf->engine_type == NAND_ECC_ENGINE_TYPE_INVALID)
+ user_conf->engine_type = of_get_rawnand_ecc_engine_type_legacy(dn);
-static int of_get_nand_ecc_strength(struct device_node *np)
-{
- int ret;
- u32 val;
+ if (user_conf->algo == NAND_ECC_ALGO_UNKNOWN)
+ user_conf->algo = of_get_rawnand_ecc_algo_legacy(dn);
- ret = of_property_read_u32(np, "nand-ecc-strength", &val);
- return ret ? ret : val;
+ if (user_conf->placement == NAND_ECC_PLACEMENT_UNKNOWN)
+ user_conf->placement = of_get_rawnand_ecc_placement_legacy(dn);
}
static int of_get_nand_bus_width(struct device_node *np)
@@ -5141,11 +4995,10 @@ static bool of_get_nand_on_flash_bbt(struct device_node *np)
return of_property_read_bool(np, "nand-on-flash-bbt");
}
-static int nand_dt_init(struct nand_chip *chip)
+static int rawnand_dt_init(struct nand_chip *chip)
{
+ struct nand_device *nand = mtd_to_nanddev(nand_to_mtd(chip));
struct device_node *dn = nand_get_flash_node(chip);
- enum nand_ecc_algo ecc_algo;
- int ecc_mode, ecc_strength, ecc_step;
if (!dn)
return 0;
@@ -5159,25 +5012,29 @@ static int nand_dt_init(struct nand_chip *chip)
if (of_get_nand_on_flash_bbt(dn))
chip->bbt_options |= NAND_BBT_USE_FLASH;
- ecc_mode = of_get_nand_ecc_mode(dn);
- ecc_algo = of_get_nand_ecc_algo(dn);
- ecc_strength = of_get_nand_ecc_strength(dn);
- ecc_step = of_get_nand_ecc_step_size(dn);
-
- if (ecc_mode >= 0)
- chip->ecc.mode = ecc_mode;
+ of_get_nand_ecc_user_config(nand);
+ of_get_nand_ecc_legacy_user_config(chip);
- if (ecc_algo != NAND_ECC_UNKNOWN)
- chip->ecc.algo = ecc_algo;
-
- if (ecc_strength >= 0)
- chip->ecc.strength = ecc_strength;
+ /*
+ * If neither the user nor the NAND controller have requested a specific
+ * ECC engine type, we will default to NAND_ECC_ENGINE_TYPE_ON_HOST.
+ */
+ nand->ecc.defaults.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
- if (ecc_step > 0)
- chip->ecc.size = ecc_step;
+ /*
+ * Use the user requested engine type, unless there is none, in this
+ * case default to the NAND controller choice, otherwise fallback to
+ * the raw NAND default one.
+ */
+ if (nand->ecc.user_conf.engine_type != NAND_ECC_ENGINE_TYPE_INVALID)
+ chip->ecc.engine_type = nand->ecc.user_conf.engine_type;
+ if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_INVALID)
+ chip->ecc.engine_type = nand->ecc.defaults.engine_type;
- if (of_property_read_bool(dn, "nand-ecc-maximize"))
- chip->ecc.options |= NAND_ECC_MAXIMIZE;
+ chip->ecc.placement = nand->ecc.user_conf.placement;
+ chip->ecc.algo = nand->ecc.user_conf.algo;
+ chip->ecc.strength = nand->ecc.user_conf.strength;
+ chip->ecc.size = nand->ecc.user_conf.step_size;
return 0;
}
@@ -5215,7 +5072,7 @@ static int nand_scan_ident(struct nand_chip *chip, unsigned int maxchips,
/* Enforce the right timings for reset/detection */
chip->current_interface_config = nand_get_reset_interface_config();
- ret = nand_dt_init(chip);
+ ret = rawnand_dt_init(chip);
if (ret)
return ret;
@@ -5282,16 +5139,76 @@ static void nand_scan_ident_cleanup(struct nand_chip *chip)
kfree(chip->parameters.onfi);
}
+static int nand_set_ecc_on_host_ops(struct nand_chip *chip)
+{
+ struct nand_ecc_ctrl *ecc = &chip->ecc;
+
+ switch (ecc->placement) {
+ case NAND_ECC_PLACEMENT_UNKNOWN:
+ case NAND_ECC_PLACEMENT_OOB:
+ /* Use standard hwecc read page function? */
+ if (!ecc->read_page)
+ ecc->read_page = nand_read_page_hwecc;
+ if (!ecc->write_page)
+ ecc->write_page = nand_write_page_hwecc;
+ if (!ecc->read_page_raw)
+ ecc->read_page_raw = nand_read_page_raw;
+ if (!ecc->write_page_raw)
+ ecc->write_page_raw = nand_write_page_raw;
+ if (!ecc->read_oob)
+ ecc->read_oob = nand_read_oob_std;
+ if (!ecc->write_oob)
+ ecc->write_oob = nand_write_oob_std;
+ if (!ecc->read_subpage)
+ ecc->read_subpage = nand_read_subpage;
+ if (!ecc->write_subpage && ecc->hwctl && ecc->calculate)
+ ecc->write_subpage = nand_write_subpage_hwecc;
+ fallthrough;
+
+ case NAND_ECC_PLACEMENT_INTERLEAVED:
+ if ((!ecc->calculate || !ecc->correct || !ecc->hwctl) &&
+ (!ecc->read_page ||
+ ecc->read_page == nand_read_page_hwecc ||
+ !ecc->write_page ||
+ ecc->write_page == nand_write_page_hwecc)) {
+ WARN(1, "No ECC functions supplied; hardware ECC not possible\n");
+ return -EINVAL;
+ }
+ /* Use standard syndrome read/write page function? */
+ if (!ecc->read_page)
+ ecc->read_page = nand_read_page_syndrome;
+ if (!ecc->write_page)
+ ecc->write_page = nand_write_page_syndrome;
+ if (!ecc->read_page_raw)
+ ecc->read_page_raw = nand_read_page_raw_syndrome;
+ if (!ecc->write_page_raw)
+ ecc->write_page_raw = nand_write_page_raw_syndrome;
+ if (!ecc->read_oob)
+ ecc->read_oob = nand_read_oob_syndrome;
+ if (!ecc->write_oob)
+ ecc->write_oob = nand_write_oob_syndrome;
+ break;
+
+ default:
+ pr_warn("Invalid NAND_ECC_PLACEMENT %d\n",
+ ecc->placement);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int nand_set_ecc_soft_ops(struct nand_chip *chip)
{
struct mtd_info *mtd = nand_to_mtd(chip);
+ struct nand_device *nanddev = mtd_to_nanddev(mtd);
struct nand_ecc_ctrl *ecc = &chip->ecc;
- if (WARN_ON(ecc->mode != NAND_ECC_SOFT))
+ if (WARN_ON(ecc->engine_type != NAND_ECC_ENGINE_TYPE_SOFT))
return -EINVAL;
switch (ecc->algo) {
- case NAND_ECC_HAMMING:
+ case NAND_ECC_ALGO_HAMMING:
ecc->calculate = nand_calculate_ecc;
ecc->correct = nand_correct_data;
ecc->read_page = nand_read_page_swecc;
@@ -5312,7 +5229,7 @@ static int nand_set_ecc_soft_ops(struct nand_chip *chip)
ecc->options |= NAND_ECC_SOFT_HAMMING_SM_ORDER;
return 0;
- case NAND_ECC_BCH:
+ case NAND_ECC_ALGO_BCH:
if (!mtd_nand_has_bch()) {
WARN(1, "CONFIG_MTD_NAND_ECC_SW_BCH not enabled\n");
return -EINVAL;
@@ -5350,7 +5267,7 @@ static int nand_set_ecc_soft_ops(struct nand_chip *chip)
return -EINVAL;
}
- mtd_set_ooblayout(mtd, &nand_ooblayout_lp_ops);
+ mtd_set_ooblayout(mtd, nand_get_large_page_ooblayout());
}
@@ -5359,8 +5276,8 @@ static int nand_set_ecc_soft_ops(struct nand_chip *chip)
* used, otherwise we don't know how many bytes can really be
* used.
*/
- if (mtd->ooblayout == &nand_ooblayout_lp_ops &&
- ecc->options & NAND_ECC_MAXIMIZE) {
+ if (mtd->ooblayout == nand_get_large_page_ooblayout() &&
+ nanddev->ecc.user_conf.flags & NAND_ECC_MAXIMIZE_STRENGTH) {
int steps, bytes;
/* Always prefer 1k blocks over 512bytes ones */
@@ -5454,10 +5371,12 @@ static int
nand_match_ecc_req(struct nand_chip *chip,
const struct nand_ecc_caps *caps, int oobavail)
{
+ const struct nand_ecc_props *requirements =
+ nanddev_get_ecc_requirements(&chip->base);
struct mtd_info *mtd = nand_to_mtd(chip);
const struct nand_ecc_step_info *stepinfo;
- int req_step = chip->base.eccreq.step_size;
- int req_strength = chip->base.eccreq.strength;
+ int req_step = requirements->step_size;
+ int req_strength = requirements->strength;
int req_corr, step_size, strength, nsteps, ecc_bytes, ecc_bytes_total;
int best_step, best_strength, best_ecc_bytes;
int best_ecc_bytes_total = INT_MAX;
@@ -5598,11 +5517,12 @@ nand_maximize_ecc(struct nand_chip *chip,
* @caps: ECC engine caps info structure
* @oobavail: OOB size that the ECC engine can use
*
- * Choose the ECC configuration according to following logic
+ * Choose the ECC configuration according to following logic.
*
* 1. If both ECC step size and ECC strength are already set (usually by DT)
* then check if it is supported by this controller.
- * 2. If NAND_ECC_MAXIMIZE is set, then select maximum ECC strength.
+ * 2. If the user provided the nand-ecc-maximize property, then select maximum
+ * ECC strength.
* 3. Otherwise, try to match the ECC step size and ECC strength closest
* to the chip's requirement. If available OOB size can't fit the chip
* requirement then fallback to the maximum ECC step size and ECC strength.
@@ -5613,6 +5533,7 @@ int nand_ecc_choose_conf(struct nand_chip *chip,
const struct nand_ecc_caps *caps, int oobavail)
{
struct mtd_info *mtd = nand_to_mtd(chip);
+ struct nand_device *nanddev = mtd_to_nanddev(mtd);
if (WARN_ON(oobavail < 0 || oobavail > mtd->oobsize))
return -EINVAL;
@@ -5620,7 +5541,7 @@ int nand_ecc_choose_conf(struct nand_chip *chip,
if (chip->ecc.size && chip->ecc.strength)
return nand_check_ecc_caps(chip, caps, oobavail);
- if (chip->ecc.options & NAND_ECC_MAXIMIZE)
+ if (nanddev->ecc.user_conf.flags & NAND_ECC_MAXIMIZE_STRENGTH)
return nand_maximize_ecc(chip, caps, oobavail);
if (!nand_match_ecc_req(chip, caps, oobavail))
@@ -5630,41 +5551,6 @@ int nand_ecc_choose_conf(struct nand_chip *chip,
}
EXPORT_SYMBOL_GPL(nand_ecc_choose_conf);
-/*
- * Check if the chip configuration meet the datasheet requirements.
-
- * If our configuration corrects A bits per B bytes and the minimum
- * required correction level is X bits per Y bytes, then we must ensure
- * both of the following are true:
- *
- * (1) A / B >= X / Y
- * (2) A >= X
- *
- * Requirement (1) ensures we can correct for the required bitflip density.
- * Requirement (2) ensures we can correct even when all bitflips are clumped
- * in the same sector.
- */
-static bool nand_ecc_strength_good(struct nand_chip *chip)
-{
- struct mtd_info *mtd = nand_to_mtd(chip);
- struct nand_ecc_ctrl *ecc = &chip->ecc;
- int corr, ds_corr;
-
- if (ecc->size == 0 || chip->base.eccreq.step_size == 0)
- /* Not enough information */
- return true;
-
- /*
- * We get the number of corrected bits per page to compare
- * the correction density.
- */
- corr = (mtd->writesize * ecc->strength) / ecc->size;
- ds_corr = (mtd->writesize * chip->base.eccreq.strength) /
- chip->base.eccreq.step_size;
-
- return corr >= ds_corr && ecc->strength >= chip->base.eccreq.strength;
-}
-
static int rawnand_erase(struct nand_device *nand, const struct nand_pos *pos)
{
struct nand_chip *chip = container_of(nand, struct nand_chip,
@@ -5752,15 +5638,17 @@ static int nand_scan_tail(struct nand_chip *chip)
* If no default placement scheme is given, select an appropriate one.
*/
if (!mtd->ooblayout &&
- !(ecc->mode == NAND_ECC_SOFT && ecc->algo == NAND_ECC_BCH)) {
+ !(ecc->engine_type == NAND_ECC_ENGINE_TYPE_SOFT &&
+ ecc->algo == NAND_ECC_ALGO_BCH)) {
switch (mtd->oobsize) {
case 8:
case 16:
- mtd_set_ooblayout(mtd, &nand_ooblayout_sp_ops);
+ mtd_set_ooblayout(mtd, nand_get_small_page_ooblayout());
break;
case 64:
case 128:
- mtd_set_ooblayout(mtd, &nand_ooblayout_lp_hamming_ops);
+ mtd_set_ooblayout(mtd,
+ nand_get_large_page_hamming_ooblayout());
break;
default:
/*
@@ -5770,9 +5658,9 @@ static int nand_scan_tail(struct nand_chip *chip)
* page with ECC layout when ->oobsize <= 128 for
* compatibility reasons.
*/
- if (ecc->mode == NAND_ECC_NONE) {
+ if (ecc->engine_type == NAND_ECC_ENGINE_TYPE_NONE) {
mtd_set_ooblayout(mtd,
- &nand_ooblayout_lp_ops);
+ nand_get_large_page_ooblayout());
break;
}
@@ -5788,49 +5676,11 @@ static int nand_scan_tail(struct nand_chip *chip)
* selected and we have 256 byte pagesize fallback to software ECC
*/
- switch (ecc->mode) {
- case NAND_ECC_HW:
- /* Use standard hwecc read page function? */
- if (!ecc->read_page)
- ecc->read_page = nand_read_page_hwecc;
- if (!ecc->write_page)
- ecc->write_page = nand_write_page_hwecc;
- if (!ecc->read_page_raw)
- ecc->read_page_raw = nand_read_page_raw;
- if (!ecc->write_page_raw)
- ecc->write_page_raw = nand_write_page_raw;
- if (!ecc->read_oob)
- ecc->read_oob = nand_read_oob_std;
- if (!ecc->write_oob)
- ecc->write_oob = nand_write_oob_std;
- if (!ecc->read_subpage)
- ecc->read_subpage = nand_read_subpage;
- if (!ecc->write_subpage && ecc->hwctl && ecc->calculate)
- ecc->write_subpage = nand_write_subpage_hwecc;
- fallthrough;
- case NAND_ECC_HW_SYNDROME:
- if ((!ecc->calculate || !ecc->correct || !ecc->hwctl) &&
- (!ecc->read_page ||
- ecc->read_page == nand_read_page_hwecc ||
- !ecc->write_page ||
- ecc->write_page == nand_write_page_hwecc)) {
- WARN(1, "No ECC functions supplied; hardware ECC not possible\n");
- ret = -EINVAL;
+ switch (ecc->engine_type) {
+ case NAND_ECC_ENGINE_TYPE_ON_HOST:
+ ret = nand_set_ecc_on_host_ops(chip);
+ if (ret)
goto err_nand_manuf_cleanup;
- }
- /* Use standard syndrome read/write page function? */
- if (!ecc->read_page)
- ecc->read_page = nand_read_page_syndrome;
- if (!ecc->write_page)
- ecc->write_page = nand_write_page_syndrome;
- if (!ecc->read_page_raw)
- ecc->read_page_raw = nand_read_page_raw_syndrome;
- if (!ecc->write_page_raw)
- ecc->write_page_raw = nand_write_page_raw_syndrome;
- if (!ecc->read_oob)
- ecc->read_oob = nand_read_oob_syndrome;
- if (!ecc->write_oob)
- ecc->write_oob = nand_write_oob_syndrome;
if (mtd->writesize >= ecc->size) {
if (!ecc->strength) {
@@ -5842,18 +5692,17 @@ static int nand_scan_tail(struct nand_chip *chip)
}
pr_warn("%d byte HW ECC not possible on %d byte page size, fallback to SW ECC\n",
ecc->size, mtd->writesize);
- ecc->mode = NAND_ECC_SOFT;
- ecc->algo = NAND_ECC_HAMMING;
+ ecc->engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+ ecc->algo = NAND_ECC_ALGO_HAMMING;
fallthrough;
- case NAND_ECC_SOFT:
+
+ case NAND_ECC_ENGINE_TYPE_SOFT:
ret = nand_set_ecc_soft_ops(chip);
- if (ret) {
- ret = -EINVAL;
+ if (ret)
goto err_nand_manuf_cleanup;
- }
break;
- case NAND_ECC_ON_DIE:
+ case NAND_ECC_ENGINE_TYPE_ON_DIE:
if (!ecc->read_page || !ecc->write_page) {
WARN(1, "No ECC functions supplied; on-die ECC not possible\n");
ret = -EINVAL;
@@ -5865,8 +5714,8 @@ static int nand_scan_tail(struct nand_chip *chip)
ecc->write_oob = nand_write_oob_std;
break;
- case NAND_ECC_NONE:
- pr_warn("NAND_ECC_NONE selected by board driver. This is not recommended!\n");
+ case NAND_ECC_ENGINE_TYPE_NONE:
+ pr_warn("NAND_ECC_ENGINE_TYPE_NONE selected by board driver. This is not recommended!\n");
ecc->read_page = nand_read_page_raw;
ecc->write_page = nand_write_page_raw;
ecc->read_oob = nand_read_oob_std;
@@ -5879,7 +5728,7 @@ static int nand_scan_tail(struct nand_chip *chip)
break;
default:
- WARN(1, "Invalid NAND_ECC_MODE %d\n", ecc->mode);
+ WARN(1, "Invalid NAND_ECC_MODE %d\n", ecc->engine_type);
ret = -EINVAL;
goto err_nand_manuf_cleanup;
}
@@ -5913,7 +5762,10 @@ static int nand_scan_tail(struct nand_chip *chip)
ret = -EINVAL;
goto err_nand_manuf_cleanup;
}
+
ecc->total = ecc->steps * ecc->bytes;
+ chip->base.ecc.ctx.total = ecc->total;
+
if (ecc->total > mtd->oobsize) {
WARN(1, "Total number of ECC bytes exceeded oobsize\n");
ret = -EINVAL;
@@ -5931,11 +5783,11 @@ static int nand_scan_tail(struct nand_chip *chip)
mtd->oobavail = ret;
/* ECC sanity check: warn if it's too weak */
- if (!nand_ecc_strength_good(chip))
+ if (!nand_ecc_is_strong_enough(&chip->base))
pr_warn("WARNING: %s: the ECC used on your system (%db/%dB) is too weak compared to the one required by the NAND chip (%db/%dB)\n",
mtd->name, chip->ecc.strength, chip->ecc.size,
- chip->base.eccreq.strength,
- chip->base.eccreq.step_size);
+ nanddev_get_ecc_requirements(&chip->base)->strength,
+ nanddev_get_ecc_requirements(&chip->base)->step_size);
/* Allow subpage writes up to ecc.steps. Not possible for MLC flash */
if (!(chip->options & NAND_NO_SUBPAGE_WRITE) && nand_is_slc(chip)) {
@@ -5956,8 +5808,8 @@ static int nand_scan_tail(struct nand_chip *chip)
chip->pagecache.page = -1;
/* Large page NAND with SOFT_ECC should support subpage reads */
- switch (ecc->mode) {
- case NAND_ECC_SOFT:
+ switch (ecc->engine_type) {
+ case NAND_ECC_ENGINE_TYPE_SOFT:
if (chip->page_shift > 9)
chip->options |= NAND_SUBPAGE_READ;
break;
@@ -6101,8 +5953,8 @@ EXPORT_SYMBOL(nand_scan_with_ids);
*/
void nand_cleanup(struct nand_chip *chip)
{
- if (chip->ecc.mode == NAND_ECC_SOFT &&
- chip->ecc.algo == NAND_ECC_BCH)
+ if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_SOFT &&
+ chip->ecc.algo == NAND_ECC_ALGO_BCH)
nand_bch_free((struct nand_bch_control *)chip->ecc.priv);
nanddev_cleanup(&chip->base);
diff --git a/drivers/mtd/nand/raw/nand_bch.c b/drivers/mtd/nand/raw/nand_bch.c
index d5af8c5fd02f..9d19ac14c196 100644
--- a/drivers/mtd/nand/raw/nand_bch.c
+++ b/drivers/mtd/nand/raw/nand_bch.c
@@ -165,6 +165,7 @@ struct nand_bch_control *nand_bch_init(struct mtd_info *mtd)
*/
nand->ecc.steps = eccsteps;
nand->ecc.total = eccsteps * eccbytes;
+ nand->base.ecc.ctx.total = nand->ecc.total;
if (mtd_ooblayout_count_eccbytes(mtd) != (eccsteps*eccbytes)) {
pr_warn("invalid ecc layout\n");
goto fail;
diff --git a/drivers/mtd/nand/raw/nand_esmt.c b/drivers/mtd/nand/raw/nand_esmt.c
index 3338c68aaaf1..4412c407aef3 100644
--- a/drivers/mtd/nand/raw/nand_esmt.c
+++ b/drivers/mtd/nand/raw/nand_esmt.c
@@ -10,27 +10,32 @@
static void esmt_nand_decode_id(struct nand_chip *chip)
{
+ struct nand_device *base = &chip->base;
+ struct nand_ecc_props requirements = {};
+
nand_decode_ext_id(chip);
/* Extract ECC requirements from 5th id byte. */
if (chip->id.len >= 5 && nand_is_slc(chip)) {
- chip->base.eccreq.step_size = 512;
+ requirements.step_size = 512;
switch (chip->id.data[4] & 0x3) {
case 0x0:
- chip->base.eccreq.strength = 4;
+ requirements.strength = 4;
break;
case 0x1:
- chip->base.eccreq.strength = 2;
+ requirements.strength = 2;
break;
case 0x2:
- chip->base.eccreq.strength = 1;
+ requirements.strength = 1;
break;
default:
WARN(1, "Could not get ECC info");
- chip->base.eccreq.step_size = 0;
+ requirements.step_size = 0;
break;
}
}
+
+ nanddev_set_ecc_requirements(base, &requirements);
}
static int esmt_nand_init(struct nand_chip *chip)
diff --git a/drivers/mtd/nand/raw/nand_hynix.c b/drivers/mtd/nand/raw/nand_hynix.c
index 6d08eb834456..a9f50c9af109 100644
--- a/drivers/mtd/nand/raw/nand_hynix.c
+++ b/drivers/mtd/nand/raw/nand_hynix.c
@@ -495,34 +495,36 @@ static void hynix_nand_extract_oobsize(struct nand_chip *chip,
static void hynix_nand_extract_ecc_requirements(struct nand_chip *chip,
bool valid_jedecid)
{
+ struct nand_device *base = &chip->base;
+ struct nand_ecc_props requirements = {};
u8 ecc_level = (chip->id.data[4] >> 4) & 0x7;
if (valid_jedecid) {
/* Reference: H27UCG8T2E datasheet */
- chip->base.eccreq.step_size = 1024;
+ requirements.step_size = 1024;
switch (ecc_level) {
case 0:
- chip->base.eccreq.step_size = 0;
- chip->base.eccreq.strength = 0;
+ requirements.step_size = 0;
+ requirements.strength = 0;
break;
case 1:
- chip->base.eccreq.strength = 4;
+ requirements.strength = 4;
break;
case 2:
- chip->base.eccreq.strength = 24;
+ requirements.strength = 24;
break;
case 3:
- chip->base.eccreq.strength = 32;
+ requirements.strength = 32;
break;
case 4:
- chip->base.eccreq.strength = 40;
+ requirements.strength = 40;
break;
case 5:
- chip->base.eccreq.strength = 50;
+ requirements.strength = 50;
break;
case 6:
- chip->base.eccreq.strength = 60;
+ requirements.strength = 60;
break;
default:
/*
@@ -543,14 +545,14 @@ static void hynix_nand_extract_ecc_requirements(struct nand_chip *chip,
if (nand_tech < 3) {
/* > 26nm, reference: H27UBG8T2A datasheet */
if (ecc_level < 5) {
- chip->base.eccreq.step_size = 512;
- chip->base.eccreq.strength = 1 << ecc_level;
+ requirements.step_size = 512;
+ requirements.strength = 1 << ecc_level;
} else if (ecc_level < 7) {
if (ecc_level == 5)
- chip->base.eccreq.step_size = 2048;
+ requirements.step_size = 2048;
else
- chip->base.eccreq.step_size = 1024;
- chip->base.eccreq.strength = 24;
+ requirements.step_size = 1024;
+ requirements.strength = 24;
} else {
/*
* We should never reach this case, but if that
@@ -563,18 +565,20 @@ static void hynix_nand_extract_ecc_requirements(struct nand_chip *chip,
} else {
/* <= 26nm, reference: H27UBG8T2B datasheet */
if (!ecc_level) {
- chip->base.eccreq.step_size = 0;
- chip->base.eccreq.strength = 0;
+ requirements.step_size = 0;
+ requirements.strength = 0;
} else if (ecc_level < 5) {
- chip->base.eccreq.step_size = 512;
- chip->base.eccreq.strength = 1 << (ecc_level - 1);
+ requirements.step_size = 512;
+ requirements.strength = 1 << (ecc_level - 1);
} else {
- chip->base.eccreq.step_size = 1024;
- chip->base.eccreq.strength = 24 +
+ requirements.step_size = 1024;
+ requirements.strength = 24 +
(8 * (ecc_level - 5));
}
}
}
+
+ nanddev_set_ecc_requirements(base, &requirements);
}
static void hynix_nand_extract_scrambling_requirements(struct nand_chip *chip,
diff --git a/drivers/mtd/nand/raw/nand_jedec.c b/drivers/mtd/nand/raw/nand_jedec.c
index b15c42f48755..85b6d9372d80 100644
--- a/drivers/mtd/nand/raw/nand_jedec.c
+++ b/drivers/mtd/nand/raw/nand_jedec.c
@@ -23,6 +23,7 @@
*/
int nand_jedec_detect(struct nand_chip *chip)
{
+ struct nand_device *base = &chip->base;
struct mtd_info *mtd = nand_to_mtd(chip);
struct nand_memory_organization *memorg;
struct nand_jedec_params *p;
@@ -120,8 +121,12 @@ int nand_jedec_detect(struct nand_chip *chip)
ecc = &p->ecc_info[0];
if (ecc->codeword_size >= 9) {
- chip->base.eccreq.strength = ecc->ecc_bits;
- chip->base.eccreq.step_size = 1 << ecc->codeword_size;
+ struct nand_ecc_props requirements = {
+ .strength = ecc->ecc_bits,
+ .step_size = 1 << ecc->codeword_size,
+ };
+
+ nanddev_set_ecc_requirements(base, &requirements);
} else {
pr_warn("Invalid codeword size\n");
}
diff --git a/drivers/mtd/nand/raw/nand_micron.c b/drivers/mtd/nand/raw/nand_micron.c
index 4385092a9325..c0192881906b 100644
--- a/drivers/mtd/nand/raw/nand_micron.c
+++ b/drivers/mtd/nand/raw/nand_micron.c
@@ -413,6 +413,8 @@ enum {
*/
static int micron_supports_on_die_ecc(struct nand_chip *chip)
{
+ const struct nand_ecc_props *requirements =
+ nanddev_get_ecc_requirements(&chip->base);
u8 id[5];
int ret;
@@ -425,7 +427,7 @@ static int micron_supports_on_die_ecc(struct nand_chip *chip)
/*
* We only support on-die ECC of 4/512 or 8/512
*/
- if (chip->base.eccreq.strength != 4 && chip->base.eccreq.strength != 8)
+ if (requirements->strength != 4 && requirements->strength != 8)
return MICRON_ON_DIE_UNSUPPORTED;
/* 0x2 means on-die ECC is available. */
@@ -466,7 +468,7 @@ static int micron_supports_on_die_ecc(struct nand_chip *chip)
/*
* We only support on-die ECC of 4/512 or 8/512
*/
- if (chip->base.eccreq.strength != 4 && chip->base.eccreq.strength != 8)
+ if (requirements->strength != 4 && requirements->strength != 8)
return MICRON_ON_DIE_UNSUPPORTED;
return MICRON_ON_DIE_SUPPORTED;
@@ -474,6 +476,9 @@ static int micron_supports_on_die_ecc(struct nand_chip *chip)
static int micron_nand_init(struct nand_chip *chip)
{
+ struct nand_device *base = &chip->base;
+ const struct nand_ecc_props *requirements =
+ nanddev_get_ecc_requirements(base);
struct mtd_info *mtd = nand_to_mtd(chip);
struct micron_nand *micron;
int ondie;
@@ -497,13 +502,13 @@ static int micron_nand_init(struct nand_chip *chip)
ondie = micron_supports_on_die_ecc(chip);
if (ondie == MICRON_ON_DIE_MANDATORY &&
- chip->ecc.mode != NAND_ECC_ON_DIE) {
+ chip->ecc.engine_type != NAND_ECC_ENGINE_TYPE_ON_DIE) {
pr_err("On-die ECC forcefully enabled, not supported\n");
ret = -EINVAL;
goto err_free_manuf_data;
}
- if (chip->ecc.mode == NAND_ECC_ON_DIE) {
+ if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_ON_DIE) {
if (ondie == MICRON_ON_DIE_UNSUPPORTED) {
pr_err("On-die ECC selected but not supported\n");
ret = -EINVAL;
@@ -523,7 +528,7 @@ static int micron_nand_init(struct nand_chip *chip)
* That's not needed for 8-bit ECC, because the status expose
* a better approximation of the number of bitflips in a page.
*/
- if (chip->base.eccreq.strength == 4) {
+ if (requirements->strength == 4) {
micron->ecc.rawbuf = kmalloc(mtd->writesize +
mtd->oobsize,
GFP_KERNEL);
@@ -533,17 +538,17 @@ static int micron_nand_init(struct nand_chip *chip)
}
}
- if (chip->base.eccreq.strength == 4)
+ if (requirements->strength == 4)
mtd_set_ooblayout(mtd,
&micron_nand_on_die_4_ooblayout_ops);
else
mtd_set_ooblayout(mtd,
&micron_nand_on_die_8_ooblayout_ops);
- chip->ecc.bytes = chip->base.eccreq.strength * 2;
+ chip->ecc.bytes = requirements->strength * 2;
chip->ecc.size = 512;
- chip->ecc.strength = chip->base.eccreq.strength;
- chip->ecc.algo = NAND_ECC_BCH;
+ chip->ecc.strength = requirements->strength;
+ chip->ecc.algo = NAND_ECC_ALGO_BCH;
chip->ecc.read_page = micron_nand_read_page_on_die_ecc;
chip->ecc.write_page = micron_nand_write_page_on_die_ecc;
diff --git a/drivers/mtd/nand/raw/nand_onfi.c b/drivers/mtd/nand/raw/nand_onfi.c
index be3456627288..45649e03797d 100644
--- a/drivers/mtd/nand/raw/nand_onfi.c
+++ b/drivers/mtd/nand/raw/nand_onfi.c
@@ -34,6 +34,8 @@ u16 onfi_crc16(u16 crc, u8 const *p, size_t len)
static int nand_flash_detect_ext_param_page(struct nand_chip *chip,
struct nand_onfi_params *p)
{
+ struct nand_device *base = &chip->base;
+ struct nand_ecc_props requirements;
struct onfi_ext_param_page *ep;
struct onfi_ext_section *s;
struct onfi_ext_ecc_info *ecc;
@@ -94,8 +96,10 @@ static int nand_flash_detect_ext_param_page(struct nand_chip *chip,
goto ext_out;
}
- chip->base.eccreq.strength = ecc->ecc_bits;
- chip->base.eccreq.step_size = 1 << ecc->codeword_size;
+ requirements.strength = ecc->ecc_bits;
+ requirements.step_size = 1 << ecc->codeword_size;
+ nanddev_set_ecc_requirements(base, &requirements);
+
ret = 0;
ext_out:
@@ -139,6 +143,7 @@ static void nand_bit_wise_majority(const void **srcbufs,
*/
int nand_onfi_detect(struct nand_chip *chip)
{
+ struct nand_device *base = &chip->base;
struct mtd_info *mtd = nand_to_mtd(chip);
struct nand_memory_organization *memorg;
struct nand_onfi_params *p = NULL, *pbuf;
@@ -265,8 +270,12 @@ int nand_onfi_detect(struct nand_chip *chip)
chip->options |= NAND_BUSWIDTH_16;
if (p->ecc_bits != 0xff) {
- chip->base.eccreq.strength = p->ecc_bits;
- chip->base.eccreq.step_size = 512;
+ struct nand_ecc_props requirements = {
+ .strength = p->ecc_bits,
+ .step_size = 512,
+ };
+
+ nanddev_set_ecc_requirements(base, &requirements);
} else if (onfi_version >= 21 &&
(le16_to_cpu(p->features) & ONFI_FEATURE_EXT_PARAM_PAGE)) {
diff --git a/drivers/mtd/nand/raw/nand_samsung.c b/drivers/mtd/nand/raw/nand_samsung.c
index 3a4a19e808f6..0be6b7563805 100644
--- a/drivers/mtd/nand/raw/nand_samsung.c
+++ b/drivers/mtd/nand/raw/nand_samsung.c
@@ -10,6 +10,8 @@
static void samsung_nand_decode_id(struct nand_chip *chip)
{
+ struct nand_device *base = &chip->base;
+ struct nand_ecc_props requirements = {};
struct mtd_info *mtd = nand_to_mtd(chip);
struct nand_memory_organization *memorg;
@@ -71,23 +73,23 @@ static void samsung_nand_decode_id(struct nand_chip *chip)
/* Extract ECC requirements from 5th id byte*/
extid = (chip->id.data[4] >> 4) & 0x07;
if (extid < 5) {
- chip->base.eccreq.step_size = 512;
- chip->base.eccreq.strength = 1 << extid;
+ requirements.step_size = 512;
+ requirements.strength = 1 << extid;
} else {
- chip->base.eccreq.step_size = 1024;
+ requirements.step_size = 1024;
switch (extid) {
case 5:
- chip->base.eccreq.strength = 24;
+ requirements.strength = 24;
break;
case 6:
- chip->base.eccreq.strength = 40;
+ requirements.strength = 40;
break;
case 7:
- chip->base.eccreq.strength = 60;
+ requirements.strength = 60;
break;
default:
WARN(1, "Could not decode ECC info");
- chip->base.eccreq.step_size = 0;
+ requirements.step_size = 0;
}
}
} else {
@@ -97,8 +99,8 @@ static void samsung_nand_decode_id(struct nand_chip *chip)
switch (chip->id.data[1]) {
/* K9F4G08U0D-S[I|C]B0(T00) */
case 0xDC:
- chip->base.eccreq.step_size = 512;
- chip->base.eccreq.strength = 1;
+ requirements.step_size = 512;
+ requirements.strength = 1;
break;
/* K9F1G08U0E 21nm chips do not support subpage write */
@@ -112,6 +114,8 @@ static void samsung_nand_decode_id(struct nand_chip *chip)
}
}
}
+
+ nanddev_set_ecc_requirements(base, &requirements);
}
static int samsung_nand_init(struct nand_chip *chip)
diff --git a/drivers/mtd/nand/raw/nand_toshiba.c b/drivers/mtd/nand/raw/nand_toshiba.c
index f746c19f3b2c..cf4f37959421 100644
--- a/drivers/mtd/nand/raw/nand_toshiba.c
+++ b/drivers/mtd/nand/raw/nand_toshiba.c
@@ -140,11 +140,13 @@ static void toshiba_nand_benand_init(struct nand_chip *chip)
chip->options |= NAND_SUBPAGE_READ;
- mtd_set_ooblayout(mtd, &nand_ooblayout_lp_ops);
+ mtd_set_ooblayout(mtd, nand_get_large_page_ooblayout());
}
static void toshiba_nand_decode_id(struct nand_chip *chip)
{
+ struct nand_device *base = &chip->base;
+ struct nand_ecc_props requirements = {};
struct mtd_info *mtd = nand_to_mtd(chip);
struct nand_memory_organization *memorg;
@@ -175,23 +177,25 @@ static void toshiba_nand_decode_id(struct nand_chip *chip)
* - 24nm: 8 bit ECC for each 512Byte is required.
*/
if (chip->id.len >= 6 && nand_is_slc(chip)) {
- chip->base.eccreq.step_size = 512;
+ requirements.step_size = 512;
switch (chip->id.data[5] & 0x7) {
case 0x4:
- chip->base.eccreq.strength = 1;
+ requirements.strength = 1;
break;
case 0x5:
- chip->base.eccreq.strength = 4;
+ requirements.strength = 4;
break;
case 0x6:
- chip->base.eccreq.strength = 8;
+ requirements.strength = 8;
break;
default:
WARN(1, "Could not get ECC info");
- chip->base.eccreq.step_size = 0;
+ requirements.step_size = 0;
break;
}
}
+
+ nanddev_set_ecc_requirements(base, &requirements);
}
static int
@@ -273,7 +277,8 @@ static int toshiba_nand_init(struct nand_chip *chip)
chip->options |= NAND_BBM_FIRSTPAGE | NAND_BBM_SECONDPAGE;
/* Check that chip is BENAND and ECC mode is on-die */
- if (nand_is_slc(chip) && chip->ecc.mode == NAND_ECC_ON_DIE &&
+ if (nand_is_slc(chip) &&
+ chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_ON_DIE &&
chip->id.data[4] & TOSHIBA_NAND_ID4_IS_BENAND)
toshiba_nand_benand_init(chip);
diff --git a/drivers/mtd/nand/raw/nandsim.c b/drivers/mtd/nand/raw/nandsim.c
index f5a53aac3c5f..a8048cb8d220 100644
--- a/drivers/mtd/nand/raw/nandsim.c
+++ b/drivers/mtd/nand/raw/nandsim.c
@@ -2234,8 +2234,8 @@ static int ns_attach_chip(struct nand_chip *chip)
return -EINVAL;
}
- chip->ecc.mode = NAND_ECC_SOFT;
- chip->ecc.algo = NAND_ECC_BCH;
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+ chip->ecc.algo = NAND_ECC_ALGO_BCH;
chip->ecc.size = 512;
chip->ecc.strength = bch;
chip->ecc.bytes = eccbytes;
@@ -2274,8 +2274,8 @@ static int __init ns_init_module(void)
nsmtd = nand_to_mtd(chip);
nand_set_controller_data(chip, (void *)ns);
- chip->ecc.mode = NAND_ECC_SOFT;
- chip->ecc.algo = NAND_ECC_HAMMING;
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+ chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
/* The NAND_SKIP_BBTSCAN option is necessary for 'overridesize' */
/* and 'badblocks' parameters to work */
chip->options |= NAND_SKIP_BBTSCAN;
diff --git a/drivers/mtd/nand/raw/ndfc.c b/drivers/mtd/nand/raw/ndfc.c
index ed38338c1383..0fb4ba93c41e 100644
--- a/drivers/mtd/nand/raw/ndfc.c
+++ b/drivers/mtd/nand/raw/ndfc.c
@@ -149,7 +149,7 @@ static int ndfc_chip_init(struct ndfc_controller *ndfc,
chip->ecc.correct = nand_correct_data;
chip->ecc.hwctl = ndfc_enable_hwecc;
chip->ecc.calculate = ndfc_calculate_ecc;
- chip->ecc.mode = NAND_ECC_HW;
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
chip->ecc.size = 256;
chip->ecc.bytes = 3;
chip->ecc.strength = 1;
diff --git a/drivers/mtd/nand/raw/omap2.c b/drivers/mtd/nand/raw/omap2.c
index eb7fcfd9276b..512f60780a50 100644
--- a/drivers/mtd/nand/raw/omap2.c
+++ b/drivers/mtd/nand/raw/omap2.c
@@ -884,8 +884,8 @@ static int omap_correct_data(struct nand_chip *chip, u_char *dat,
int stat = 0;
/* Ex NAND_ECC_HW12_2048 */
- if ((info->nand.ecc.mode == NAND_ECC_HW) &&
- (info->nand.ecc.size == 2048))
+ if (info->nand.ecc.engine_type == NAND_ECC_ENGINE_TYPE_ON_HOST &&
+ info->nand.ecc.size == 2048)
blockCnt = 4;
else
blockCnt = 1;
@@ -2006,12 +2006,12 @@ static int omap_nand_attach_chip(struct nand_chip *chip)
return -EINVAL;
/*
- * Bail out earlier to let NAND_ECC_SOFT code create its own
+ * Bail out earlier to let NAND_ECC_ENGINE_TYPE_SOFT code create its own
* ooblayout instead of using ours.
*/
if (info->ecc_opt == OMAP_ECC_HAM1_CODE_SW) {
- chip->ecc.mode = NAND_ECC_SOFT;
- chip->ecc.algo = NAND_ECC_HAMMING;
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+ chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
return 0;
}
@@ -2019,7 +2019,7 @@ static int omap_nand_attach_chip(struct nand_chip *chip)
switch (info->ecc_opt) {
case OMAP_ECC_HAM1_CODE_HW:
dev_info(dev, "nand: using OMAP_ECC_HAM1_CODE_HW\n");
- chip->ecc.mode = NAND_ECC_HW;
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
chip->ecc.bytes = 3;
chip->ecc.size = 512;
chip->ecc.strength = 1;
@@ -2036,7 +2036,7 @@ static int omap_nand_attach_chip(struct nand_chip *chip)
case OMAP_ECC_BCH4_CODE_HW_DETECTION_SW:
pr_info("nand: using OMAP_ECC_BCH4_CODE_HW_DETECTION_SW\n");
- chip->ecc.mode = NAND_ECC_HW;
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
chip->ecc.size = 512;
chip->ecc.bytes = 7;
chip->ecc.strength = 4;
@@ -2056,7 +2056,7 @@ static int omap_nand_attach_chip(struct nand_chip *chip)
case OMAP_ECC_BCH4_CODE_HW:
pr_info("nand: using OMAP_ECC_BCH4_CODE_HW ECC scheme\n");
- chip->ecc.mode = NAND_ECC_HW;
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
chip->ecc.size = 512;
/* 14th bit is kept reserved for ROM-code compatibility */
chip->ecc.bytes = 7 + 1;
@@ -2078,7 +2078,7 @@ static int omap_nand_attach_chip(struct nand_chip *chip)
case OMAP_ECC_BCH8_CODE_HW_DETECTION_SW:
pr_info("nand: using OMAP_ECC_BCH8_CODE_HW_DETECTION_SW\n");
- chip->ecc.mode = NAND_ECC_HW;
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
chip->ecc.size = 512;
chip->ecc.bytes = 13;
chip->ecc.strength = 8;
@@ -2098,7 +2098,7 @@ static int omap_nand_attach_chip(struct nand_chip *chip)
case OMAP_ECC_BCH8_CODE_HW:
pr_info("nand: using OMAP_ECC_BCH8_CODE_HW ECC scheme\n");
- chip->ecc.mode = NAND_ECC_HW;
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
chip->ecc.size = 512;
/* 14th bit is kept reserved for ROM-code compatibility */
chip->ecc.bytes = 13 + 1;
@@ -2121,7 +2121,7 @@ static int omap_nand_attach_chip(struct nand_chip *chip)
case OMAP_ECC_BCH16_CODE_HW:
pr_info("Using OMAP_ECC_BCH16_CODE_HW ECC scheme\n");
- chip->ecc.mode = NAND_ECC_HW;
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
chip->ecc.size = 512;
chip->ecc.bytes = 26;
chip->ecc.strength = 16;
diff --git a/drivers/mtd/nand/raw/orion_nand.c b/drivers/mtd/nand/raw/orion_nand.c
index 880b54ca1b41..df9c0f8e4b4e 100644
--- a/drivers/mtd/nand/raw/orion_nand.c
+++ b/drivers/mtd/nand/raw/orion_nand.c
@@ -139,8 +139,8 @@ static int __init orion_nand_probe(struct platform_device *pdev)
nc->legacy.IO_ADDR_R = nc->legacy.IO_ADDR_W = io_base;
nc->legacy.cmd_ctrl = orion_nand_cmd_ctrl;
nc->legacy.read_buf = orion_nand_read_buf;
- nc->ecc.mode = NAND_ECC_SOFT;
- nc->ecc.algo = NAND_ECC_HAMMING;
+ nc->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+ nc->ecc.algo = NAND_ECC_ALGO_HAMMING;
if (board->chip_delay)
nc->legacy.chip_delay = board->chip_delay;
diff --git a/drivers/mtd/nand/raw/pasemi_nand.c b/drivers/mtd/nand/raw/pasemi_nand.c
index d8eca8c3fdcd..2b8f155cc0c5 100644
--- a/drivers/mtd/nand/raw/pasemi_nand.c
+++ b/drivers/mtd/nand/raw/pasemi_nand.c
@@ -68,7 +68,7 @@ static void pasemi_hwcontrol(struct nand_chip *chip, int cmd,
inl(lpcctl);
}
-int pasemi_device_ready(struct nand_chip *chip)
+static int pasemi_device_ready(struct nand_chip *chip)
{
return !!(inl(lpcctl) & LBICTRL_LPCCTL_NR);
}
@@ -132,8 +132,8 @@ static int pasemi_nand_probe(struct platform_device *ofdev)
chip->legacy.read_buf = pasemi_read_buf;
chip->legacy.write_buf = pasemi_write_buf;
chip->legacy.chip_delay = 0;
- chip->ecc.mode = NAND_ECC_SOFT;
- chip->ecc.algo = NAND_ECC_HAMMING;
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+ chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
/* Enable the following for a flash based bad block table */
chip->bbt_options = NAND_BBT_USE_FLASH;
diff --git a/drivers/mtd/nand/raw/plat_nand.c b/drivers/mtd/nand/raw/plat_nand.c
index 556182f26057..b98c0d5c413f 100644
--- a/drivers/mtd/nand/raw/plat_nand.c
+++ b/drivers/mtd/nand/raw/plat_nand.c
@@ -66,8 +66,8 @@ static int plat_nand_probe(struct platform_device *pdev)
data->chip.options |= pdata->chip.options;
data->chip.bbt_options |= pdata->chip.bbt_options;
- data->chip.ecc.mode = NAND_ECC_SOFT;
- data->chip.ecc.algo = NAND_ECC_HAMMING;
+ data->chip.ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+ data->chip.ecc.algo = NAND_ECC_ALGO_HAMMING;
platform_set_drvdata(pdev, data);
diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c
index bd7a7251429b..777fb0de0680 100644
--- a/drivers/mtd/nand/raw/qcom_nandc.c
+++ b/drivers/mtd/nand/raw/qcom_nandc.c
@@ -2550,7 +2550,7 @@ static int qcom_nand_attach_chip(struct nand_chip *chip)
ecc->write_page_raw = qcom_nandc_write_page_raw;
ecc->write_oob = qcom_nandc_write_oob;
- ecc->mode = NAND_ECC_HW;
+ ecc->engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
mtd_set_ooblayout(mtd, &qcom_nand_ooblayout_ops);
@@ -2702,10 +2702,8 @@ static int qcom_nandc_alloc(struct qcom_nand_controller *nandc)
if (IS_ERR(nandc->tx_chan)) {
ret = PTR_ERR(nandc->tx_chan);
nandc->tx_chan = NULL;
- if (ret != -EPROBE_DEFER)
- dev_err(nandc->dev,
- "tx DMA channel request failed: %d\n",
- ret);
+ dev_err_probe(nandc->dev, ret,
+ "tx DMA channel request failed\n");
goto unalloc;
}
@@ -2713,10 +2711,8 @@ static int qcom_nandc_alloc(struct qcom_nand_controller *nandc)
if (IS_ERR(nandc->rx_chan)) {
ret = PTR_ERR(nandc->rx_chan);
nandc->rx_chan = NULL;
- if (ret != -EPROBE_DEFER)
- dev_err(nandc->dev,
- "rx DMA channel request failed: %d\n",
- ret);
+ dev_err_probe(nandc->dev, ret,
+ "rx DMA channel request failed\n");
goto unalloc;
}
@@ -2724,10 +2720,8 @@ static int qcom_nandc_alloc(struct qcom_nand_controller *nandc)
if (IS_ERR(nandc->cmd_chan)) {
ret = PTR_ERR(nandc->cmd_chan);
nandc->cmd_chan = NULL;
- if (ret != -EPROBE_DEFER)
- dev_err(nandc->dev,
- "cmd DMA channel request failed: %d\n",
- ret);
+ dev_err_probe(nandc->dev, ret,
+ "cmd DMA channel request failed\n");
goto unalloc;
}
@@ -2750,10 +2744,8 @@ static int qcom_nandc_alloc(struct qcom_nand_controller *nandc)
if (IS_ERR(nandc->chan)) {
ret = PTR_ERR(nandc->chan);
nandc->chan = NULL;
- if (ret != -EPROBE_DEFER)
- dev_err(nandc->dev,
- "rxtx DMA channel request failed: %d\n",
- ret);
+ dev_err_probe(nandc->dev, ret,
+ "rxtx DMA channel request failed\n");
return ret;
}
}
diff --git a/drivers/mtd/nand/raw/r852.c b/drivers/mtd/nand/raw/r852.c
index f865e3a47b01..6b7addd2c420 100644
--- a/drivers/mtd/nand/raw/r852.c
+++ b/drivers/mtd/nand/raw/r852.c
@@ -859,7 +859,8 @@ static int r852_probe(struct pci_dev *pci_dev, const struct pci_device_id *id)
chip->legacy.write_buf = r852_write_buf;
/* ecc */
- chip->ecc.mode = NAND_ECC_HW_SYNDROME;
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
+ chip->ecc.placement = NAND_ECC_PLACEMENT_INTERLEAVED;
chip->ecc.size = R852_DMA_LEN;
chip->ecc.bytes = SM_OOB_SIZE;
chip->ecc.strength = 2;
diff --git a/drivers/mtd/nand/raw/s3c2410.c b/drivers/mtd/nand/raw/s3c2410.c
index 105522205979..fbd0fa48e063 100644
--- a/drivers/mtd/nand/raw/s3c2410.c
+++ b/drivers/mtd/nand/raw/s3c2410.c
@@ -904,7 +904,7 @@ static void s3c2410_nand_init_chip(struct s3c2410_nand_info *info,
nmtd->info = info;
nmtd->set = set;
- chip->ecc.mode = info->platform->ecc_mode;
+ chip->ecc.engine_type = info->platform->engine_type;
/*
* If you use u-boot BBT creation code, specifying this flag will
@@ -929,24 +929,24 @@ static int s3c2410_nand_attach_chip(struct nand_chip *chip)
struct mtd_info *mtd = nand_to_mtd(chip);
struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd);
- switch (chip->ecc.mode) {
+ switch (chip->ecc.engine_type) {
- case NAND_ECC_NONE:
+ case NAND_ECC_ENGINE_TYPE_NONE:
dev_info(info->device, "ECC disabled\n");
break;
- case NAND_ECC_SOFT:
+ case NAND_ECC_ENGINE_TYPE_SOFT:
/*
- * This driver expects Hamming based ECC when ecc_mode is set
- * to NAND_ECC_SOFT. Force ecc.algo to NAND_ECC_HAMMING to
- * avoid adding an extra ecc_algo field to
- * s3c2410_platform_nand.
+ * This driver expects Hamming based ECC when engine_type is set
+ * to NAND_ECC_ENGINE_TYPE_SOFT. Force ecc.algo to
+ * NAND_ECC_ALGO_HAMMING to avoid adding an extra ecc_algo field
+ * to s3c2410_platform_nand.
*/
- chip->ecc.algo = NAND_ECC_HAMMING;
+ chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
dev_info(info->device, "soft ECC\n");
break;
- case NAND_ECC_HW:
+ case NAND_ECC_ENGINE_TYPE_ON_HOST:
chip->ecc.calculate = s3c2410_nand_calculate_ecc;
chip->ecc.correct = s3c2410_nand_correct_data;
chip->ecc.strength = 1;
diff --git a/drivers/mtd/nand/raw/sh_flctl.c b/drivers/mtd/nand/raw/sh_flctl.c
index a661b8bb2dd5..13df4bdf792a 100644
--- a/drivers/mtd/nand/raw/sh_flctl.c
+++ b/drivers/mtd/nand/raw/sh_flctl.c
@@ -1039,13 +1039,13 @@ static int flctl_chip_attach_chip(struct nand_chip *chip)
chip->ecc.strength = 4;
chip->ecc.read_page = flctl_read_page_hwecc;
chip->ecc.write_page = flctl_write_page_hwecc;
- chip->ecc.mode = NAND_ECC_HW;
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
/* 4 symbols ECC enabled */
flctl->flcmncr_base |= _4ECCEN;
} else {
- chip->ecc.mode = NAND_ECC_SOFT;
- chip->ecc.algo = NAND_ECC_HAMMING;
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+ chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
}
return 0;
diff --git a/drivers/mtd/nand/raw/sharpsl.c b/drivers/mtd/nand/raw/sharpsl.c
index 51286f7acf54..1327bfb3d5d3 100644
--- a/drivers/mtd/nand/raw/sharpsl.c
+++ b/drivers/mtd/nand/raw/sharpsl.c
@@ -157,7 +157,7 @@ static int sharpsl_nand_probe(struct platform_device *pdev)
/* 15 us command delay time */
this->legacy.chip_delay = 15;
/* set eccmode using hardware ECC */
- this->ecc.mode = NAND_ECC_HW;
+ this->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
this->ecc.size = 256;
this->ecc.bytes = 3;
this->ecc.strength = 1;
diff --git a/drivers/mtd/nand/raw/socrates_nand.c b/drivers/mtd/nand/raw/socrates_nand.c
index 243b34cfbc1b..0f63ff6f7fe7 100644
--- a/drivers/mtd/nand/raw/socrates_nand.c
+++ b/drivers/mtd/nand/raw/socrates_nand.c
@@ -153,8 +153,9 @@ static int socrates_nand_probe(struct platform_device *ofdev)
nand_chip->legacy.read_buf = socrates_nand_read_buf;
nand_chip->legacy.dev_ready = socrates_nand_device_ready;
- nand_chip->ecc.mode = NAND_ECC_SOFT; /* enable ECC */
- nand_chip->ecc.algo = NAND_ECC_HAMMING;
+ /* enable ECC */
+ nand_chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+ nand_chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
/* TODO: I have no idea what real delay is. */
nand_chip->legacy.chip_delay = 20; /* 20us command delay time */
diff --git a/drivers/mtd/nand/raw/stm32_fmc2_nand.c b/drivers/mtd/nand/raw/stm32_fmc2_nand.c
index 7f4546ae9130..b31a5818234d 100644
--- a/drivers/mtd/nand/raw/stm32_fmc2_nand.c
+++ b/drivers/mtd/nand/raw/stm32_fmc2_nand.c
@@ -1696,14 +1696,15 @@ static int stm32_fmc2_nfc_attach_chip(struct nand_chip *chip)
int ret;
/*
- * Only NAND_ECC_HW mode is actually supported
+ * Only NAND_ECC_ENGINE_TYPE_ON_HOST mode is actually supported
* Hamming => ecc.strength = 1
* BCH4 => ecc.strength = 4
* BCH8 => ecc.strength = 8
* ECC sector size = 512
*/
- if (chip->ecc.mode != NAND_ECC_HW) {
- dev_err(nfc->dev, "nand_ecc_mode is not well defined in the DT\n");
+ if (chip->ecc.engine_type != NAND_ECC_ENGINE_TYPE_ON_HOST) {
+ dev_err(nfc->dev,
+ "nand_ecc_engine_type is not well defined in the DT\n");
return -EINVAL;
}
@@ -1762,7 +1763,7 @@ static int stm32_fmc2_nfc_parse_child(struct stm32_fmc2_nfc *nfc,
return ret;
}
- if (cs > FMC2_MAX_CE) {
+ if (cs >= FMC2_MAX_CE) {
dev_err(nfc->dev, "invalid reg value: %d\n", cs);
return -EINVAL;
}
@@ -1952,7 +1953,7 @@ static int stm32_fmc2_nfc_probe(struct platform_device *pdev)
NAND_USES_DMA;
/* Default ECC settings */
- chip->ecc.mode = NAND_ECC_HW;
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
chip->ecc.size = FMC2_ECC_STEP_SIZE;
chip->ecc.strength = FMC2_ECC_BCH8;
diff --git a/drivers/mtd/nand/raw/sunxi_nand.c b/drivers/mtd/nand/raw/sunxi_nand.c
index 9c50c2b965e1..2a7ca3072f35 100644
--- a/drivers/mtd/nand/raw/sunxi_nand.c
+++ b/drivers/mtd/nand/raw/sunxi_nand.c
@@ -1575,7 +1575,7 @@ static int sunxi_nand_ooblayout_free(struct mtd_info *mtd, int section,
* only have 2 bytes available in the first user data
* section.
*/
- if (!section && ecc->mode == NAND_ECC_HW) {
+ if (!section && ecc->engine_type == NAND_ECC_ENGINE_TYPE_ON_HOST) {
oobregion->offset = 2;
oobregion->length = 2;
@@ -1609,12 +1609,13 @@ static int sunxi_nand_hw_ecc_ctrl_init(struct nand_chip *nand,
static const u8 strengths[] = { 16, 24, 28, 32, 40, 48, 56, 60, 64 };
struct sunxi_nfc *nfc = to_sunxi_nfc(nand->controller);
struct mtd_info *mtd = nand_to_mtd(nand);
+ struct nand_device *nanddev = mtd_to_nanddev(mtd);
struct sunxi_nand_hw_ecc *data;
int nsectors;
int ret;
int i;
- if (ecc->options & NAND_ECC_MAXIMIZE) {
+ if (nanddev->ecc.user_conf.flags & NAND_ECC_MAXIMIZE_STRENGTH) {
int bytes;
ecc->size = 1024;
@@ -1720,11 +1721,11 @@ err:
static void sunxi_nand_ecc_cleanup(struct nand_ecc_ctrl *ecc)
{
- switch (ecc->mode) {
- case NAND_ECC_HW:
+ switch (ecc->engine_type) {
+ case NAND_ECC_ENGINE_TYPE_ON_HOST:
sunxi_nand_hw_ecc_ctrl_cleanup(ecc);
break;
- case NAND_ECC_NONE:
+ case NAND_ECC_ENGINE_TYPE_NONE:
default:
break;
}
@@ -1732,6 +1733,8 @@ static void sunxi_nand_ecc_cleanup(struct nand_ecc_ctrl *ecc)
static int sunxi_nand_attach_chip(struct nand_chip *nand)
{
+ const struct nand_ecc_props *requirements =
+ nanddev_get_ecc_requirements(&nand->base);
struct nand_ecc_ctrl *ecc = &nand->ecc;
struct device_node *np = nand_get_flash_node(nand);
int ret;
@@ -1745,21 +1748,21 @@ static int sunxi_nand_attach_chip(struct nand_chip *nand)
nand->options |= NAND_SUBPAGE_READ;
if (!ecc->size) {
- ecc->size = nand->base.eccreq.step_size;
- ecc->strength = nand->base.eccreq.strength;
+ ecc->size = requirements->step_size;
+ ecc->strength = requirements->strength;
}
if (!ecc->size || !ecc->strength)
return -EINVAL;
- switch (ecc->mode) {
- case NAND_ECC_HW:
+ switch (ecc->engine_type) {
+ case NAND_ECC_ENGINE_TYPE_ON_HOST:
ret = sunxi_nand_hw_ecc_ctrl_init(nand, ecc, np);
if (ret)
return ret;
break;
- case NAND_ECC_NONE:
- case NAND_ECC_SOFT:
+ case NAND_ECC_ENGINE_TYPE_NONE:
+ case NAND_ECC_ENGINE_TYPE_SOFT:
break;
default:
return -EINVAL;
@@ -1991,7 +1994,7 @@ static int sunxi_nand_chip_init(struct device *dev, struct sunxi_nfc *nfc,
* Set the ECC mode to the default value in case nothing is specified
* in the DT.
*/
- nand->ecc.mode = NAND_ECC_HW;
+ nand->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
nand_set_flash_node(nand, np);
mtd = nand_to_mtd(nand);
diff --git a/drivers/mtd/nand/raw/tango_nand.c b/drivers/mtd/nand/raw/tango_nand.c
index bdb965ae7a4a..359187b5a4be 100644
--- a/drivers/mtd/nand/raw/tango_nand.c
+++ b/drivers/mtd/nand/raw/tango_nand.c
@@ -549,8 +549,8 @@ static int tango_attach_chip(struct nand_chip *chip)
{
struct nand_ecc_ctrl *ecc = &chip->ecc;
- ecc->mode = NAND_ECC_HW;
- ecc->algo = NAND_ECC_BCH;
+ ecc->engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
+ ecc->algo = NAND_ECC_ALGO_BCH;
ecc->bytes = DIV_ROUND_UP(ecc->strength * FIELD_ORDER, BITS_PER_BYTE);
ecc->read_page_raw = tango_read_page_raw;
diff --git a/drivers/mtd/nand/raw/tegra_nand.c b/drivers/mtd/nand/raw/tegra_nand.c
index 6b6212ffa01c..fbf67722a049 100644
--- a/drivers/mtd/nand/raw/tegra_nand.c
+++ b/drivers/mtd/nand/raw/tegra_nand.c
@@ -479,7 +479,7 @@ static void tegra_nand_hw_ecc(struct tegra_nand_controller *ctrl,
{
struct tegra_nand_chip *nand = to_tegra_chip(chip);
- if (chip->ecc.algo == NAND_ECC_BCH && enable)
+ if (chip->ecc.algo == NAND_ECC_ALGO_BCH && enable)
writel_relaxed(nand->bch_config, ctrl->regs + BCH_CONFIG);
else
writel_relaxed(0, ctrl->regs + BCH_CONFIG);
@@ -840,7 +840,10 @@ static int tegra_nand_get_strength(struct nand_chip *chip, const int *strength,
int strength_len, int bits_per_step,
int oobsize)
{
- bool maximize = chip->ecc.options & NAND_ECC_MAXIMIZE;
+ struct nand_device *base = mtd_to_nanddev(nand_to_mtd(chip));
+ const struct nand_ecc_props *requirements =
+ nanddev_get_ecc_requirements(base);
+ bool maximize = base->ecc.user_conf.flags & NAND_ECC_MAXIMIZE_STRENGTH;
int i;
/*
@@ -855,7 +858,7 @@ static int tegra_nand_get_strength(struct nand_chip *chip, const int *strength,
} else {
strength_sel = strength[i];
- if (strength_sel < chip->base.eccreq.strength)
+ if (strength_sel < requirements->strength)
continue;
}
@@ -877,7 +880,7 @@ static int tegra_nand_select_strength(struct nand_chip *chip, int oobsize)
int strength_len, bits_per_step;
switch (chip->ecc.algo) {
- case NAND_ECC_RS:
+ case NAND_ECC_ALGO_RS:
bits_per_step = BITS_PER_STEP_RS;
if (chip->options & NAND_IS_BOOT_MEDIUM) {
strength = rs_strength_bootable;
@@ -887,7 +890,7 @@ static int tegra_nand_select_strength(struct nand_chip *chip, int oobsize)
strength_len = ARRAY_SIZE(rs_strength);
}
break;
- case NAND_ECC_BCH:
+ case NAND_ECC_ALGO_BCH:
bits_per_step = BITS_PER_STEP_BCH;
if (chip->options & NAND_IS_BOOT_MEDIUM) {
strength = bch_strength_bootable;
@@ -908,6 +911,8 @@ static int tegra_nand_select_strength(struct nand_chip *chip, int oobsize)
static int tegra_nand_attach_chip(struct nand_chip *chip)
{
struct tegra_nand_controller *ctrl = to_tegra_ctrl(chip->controller);
+ const struct nand_ecc_props *requirements =
+ nanddev_get_ecc_requirements(&chip->base);
struct tegra_nand_chip *nand = to_tegra_chip(chip);
struct mtd_info *mtd = nand_to_mtd(chip);
int bits_per_step;
@@ -916,12 +921,12 @@ static int tegra_nand_attach_chip(struct nand_chip *chip)
if (chip->bbt_options & NAND_BBT_USE_FLASH)
chip->bbt_options |= NAND_BBT_NO_OOB;
- chip->ecc.mode = NAND_ECC_HW;
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
chip->ecc.size = 512;
chip->ecc.steps = mtd->writesize / chip->ecc.size;
- if (chip->base.eccreq.step_size != 512) {
+ if (requirements->step_size != 512) {
dev_err(ctrl->dev, "Unsupported step size %d\n",
- chip->base.eccreq.step_size);
+ requirements->step_size);
return -EINVAL;
}
@@ -935,14 +940,14 @@ static int tegra_nand_attach_chip(struct nand_chip *chip)
if (chip->options & NAND_BUSWIDTH_16)
nand->config |= CONFIG_BUS_WIDTH_16;
- if (chip->ecc.algo == NAND_ECC_UNKNOWN) {
+ if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN) {
if (mtd->writesize < 2048)
- chip->ecc.algo = NAND_ECC_RS;
+ chip->ecc.algo = NAND_ECC_ALGO_RS;
else
- chip->ecc.algo = NAND_ECC_BCH;
+ chip->ecc.algo = NAND_ECC_ALGO_BCH;
}
- if (chip->ecc.algo == NAND_ECC_BCH && mtd->writesize < 2048) {
+ if (chip->ecc.algo == NAND_ECC_ALGO_BCH && mtd->writesize < 2048) {
dev_err(ctrl->dev, "BCH supports 2K or 4K page size only\n");
return -EINVAL;
}
@@ -952,7 +957,7 @@ static int tegra_nand_attach_chip(struct nand_chip *chip)
if (ret < 0) {
dev_err(ctrl->dev,
"No valid strength found, minimum %d\n",
- chip->base.eccreq.strength);
+ requirements->strength);
return ret;
}
@@ -963,7 +968,7 @@ static int tegra_nand_attach_chip(struct nand_chip *chip)
CONFIG_SKIP_SPARE_SIZE_4;
switch (chip->ecc.algo) {
- case NAND_ECC_RS:
+ case NAND_ECC_ALGO_RS:
bits_per_step = BITS_PER_STEP_RS * chip->ecc.strength;
mtd_set_ooblayout(mtd, &tegra_nand_oob_rs_ops);
nand->config_ecc |= CONFIG_HW_ECC | CONFIG_ECC_SEL |
@@ -984,7 +989,7 @@ static int tegra_nand_attach_chip(struct nand_chip *chip)
return -EINVAL;
}
break;
- case NAND_ECC_BCH:
+ case NAND_ECC_ALGO_BCH:
bits_per_step = BITS_PER_STEP_BCH * chip->ecc.strength;
mtd_set_ooblayout(mtd, &tegra_nand_oob_bch_ops);
nand->bch_config = BCH_ENABLE;
@@ -1013,7 +1018,7 @@ static int tegra_nand_attach_chip(struct nand_chip *chip)
}
dev_info(ctrl->dev, "Using %s with strength %d per 512 byte step\n",
- chip->ecc.algo == NAND_ECC_BCH ? "BCH" : "RS",
+ chip->ecc.algo == NAND_ECC_ALGO_BCH ? "BCH" : "RS",
chip->ecc.strength);
chip->ecc.bytes = DIV_ROUND_UP(bits_per_step, BITS_PER_BYTE);
diff --git a/drivers/mtd/nand/raw/tmio_nand.c b/drivers/mtd/nand/raw/tmio_nand.c
index 843a8683b737..235a2f7b1bad 100644
--- a/drivers/mtd/nand/raw/tmio_nand.c
+++ b/drivers/mtd/nand/raw/tmio_nand.c
@@ -410,7 +410,7 @@ static int tmio_probe(struct platform_device *dev)
nand_chip->legacy.read_buf = tmio_nand_read_buf;
/* set eccmode using hardware ECC */
- nand_chip->ecc.mode = NAND_ECC_HW;
+ nand_chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
nand_chip->ecc.size = 512;
nand_chip->ecc.bytes = 6;
nand_chip->ecc.strength = 2;
diff --git a/drivers/mtd/nand/raw/txx9ndfmc.c b/drivers/mtd/nand/raw/txx9ndfmc.c
index 47d966871445..ef81dce6b5c4 100644
--- a/drivers/mtd/nand/raw/txx9ndfmc.c
+++ b/drivers/mtd/nand/raw/txx9ndfmc.c
@@ -329,7 +329,7 @@ static int __init txx9ndfmc_probe(struct platform_device *dev)
chip->ecc.calculate = txx9ndfmc_calculate_ecc;
chip->ecc.correct = txx9ndfmc_correct_data;
chip->ecc.hwctl = txx9ndfmc_enable_hwecc;
- chip->ecc.mode = NAND_ECC_HW;
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
chip->ecc.strength = 1;
chip->legacy.chip_delay = 100;
chip->controller = &drvdata->controller;
diff --git a/drivers/mtd/nand/raw/vf610_nfc.c b/drivers/mtd/nand/raw/vf610_nfc.c
index 7248c5901183..40d70f991d89 100644
--- a/drivers/mtd/nand/raw/vf610_nfc.c
+++ b/drivers/mtd/nand/raw/vf610_nfc.c
@@ -323,11 +323,6 @@ static inline void vf610_nfc_ecc_mode(struct vf610_nfc *nfc, int ecc_mode)
CONFIG_ECC_MODE_SHIFT, ecc_mode);
}
-static inline void vf610_nfc_transfer_size(struct vf610_nfc *nfc, int size)
-{
- vf610_nfc_write(nfc, NFC_SECTOR_SIZE, size);
-}
-
static inline void vf610_nfc_run(struct vf610_nfc *nfc, u32 col, u32 row,
u32 cmd1, u32 cmd2, u32 trfr_sz)
{
@@ -732,7 +727,7 @@ static void vf610_nfc_init_controller(struct vf610_nfc *nfc)
else
vf610_nfc_clear(nfc, NFC_FLASH_CONFIG, CONFIG_16BIT);
- if (nfc->chip.ecc.mode == NAND_ECC_HW) {
+ if (nfc->chip.ecc.engine_type == NAND_ECC_ENGINE_TYPE_ON_HOST) {
/* Set ECC status offset in SRAM */
vf610_nfc_set_field(nfc, NFC_FLASH_CONFIG,
CONFIG_ECC_SRAM_ADDR_MASK,
@@ -761,7 +756,7 @@ static int vf610_nfc_attach_chip(struct nand_chip *chip)
return -ENXIO;
}
- if (chip->ecc.mode != NAND_ECC_HW)
+ if (chip->ecc.engine_type != NAND_ECC_ENGINE_TYPE_ON_HOST)
return 0;
if (mtd->writesize != PAGE_2K && mtd->oobsize < 64) {
@@ -779,7 +774,7 @@ static int vf610_nfc_attach_chip(struct nand_chip *chip)
mtd->oobsize = 64;
/* Use default large page ECC layout defined in NAND core */
- mtd_set_ooblayout(mtd, &nand_ooblayout_lp_ops);
+ mtd_set_ooblayout(mtd, nand_get_large_page_ooblayout());
if (chip->ecc.strength == 32) {
nfc->ecc_mode = ECC_60_BYTE;
chip->ecc.bytes = 60;
@@ -852,8 +847,10 @@ static int vf610_nfc_probe(struct platform_device *pdev)
}
of_id = of_match_device(vf610_nfc_dt_ids, &pdev->dev);
- if (!of_id)
- return -ENODEV;
+ if (!of_id) {
+ err = -ENODEV;
+ goto err_disable_clk;
+ }
nfc->variant = (enum vf610_nfc_variant)of_id->data;
diff --git a/drivers/mtd/nand/raw/xway_nand.c b/drivers/mtd/nand/raw/xway_nand.c
index 29255476afdb..f2dbd63a5c1f 100644
--- a/drivers/mtd/nand/raw/xway_nand.c
+++ b/drivers/mtd/nand/raw/xway_nand.c
@@ -180,8 +180,8 @@ static int xway_nand_probe(struct platform_device *pdev)
data->chip.legacy.read_byte = xway_read_byte;
data->chip.legacy.chip_delay = 30;
- data->chip.ecc.mode = NAND_ECC_SOFT;
- data->chip.ecc.algo = NAND_ECC_HAMMING;
+ data->chip.ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+ data->chip.ecc.algo = NAND_ECC_ALGO_HAMMING;
platform_set_drvdata(pdev, data);
nand_set_controller_data(&data->chip, data);
diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c
index e2c382ffc5b6..c35221794645 100644
--- a/drivers/mtd/nand/spi/core.c
+++ b/drivers/mtd/nand/spi/core.c
@@ -419,7 +419,7 @@ static int spinand_check_ecc_status(struct spinand_device *spinand, u8 status)
* fixed, so let's return the maximum possible value so that
* wear-leveling layers move the data immediately.
*/
- return nand->eccreq.strength;
+ return nanddev_get_ecc_conf(nand)->strength;
case STATUS_ECC_UNCOR_ERROR:
return -EBADMSG;
@@ -497,7 +497,7 @@ static int spinand_mtd_read(struct mtd_info *mtd, loff_t from,
mutex_lock(&spinand->lock);
- nanddev_io_for_each_page(nand, from, ops, &iter) {
+ nanddev_io_for_each_page(nand, NAND_PAGE_READ, from, ops, &iter) {
ret = spinand_select_target(spinand, iter.req.pos.target);
if (ret)
break;
@@ -545,7 +545,7 @@ static int spinand_mtd_write(struct mtd_info *mtd, loff_t to,
mutex_lock(&spinand->lock);
- nanddev_io_for_each_page(nand, to, ops, &iter) {
+ nanddev_io_for_each_page(nand, NAND_PAGE_WRITE, to, ops, &iter) {
ret = spinand_select_target(spinand, iter.req.pos.target);
if (ret)
break;
@@ -902,7 +902,7 @@ int spinand_match_and_init(struct spinand_device *spinand,
continue;
nand->memorg = table[i].memorg;
- nand->eccreq = table[i].eccreq;
+ nanddev_set_ecc_requirements(nand, &table[i].eccreq);
spinand->eccinfo = table[i].eccinfo;
spinand->flags = table[i].flags;
spinand->id.len = 1 + table[i].devid.len;
@@ -1090,8 +1090,8 @@ static int spinand_init(struct spinand_device *spinand)
mtd->oobavail = ret;
/* Propagate ECC information to mtd_info */
- mtd->ecc_strength = nand->eccreq.strength;
- mtd->ecc_step_size = nand->eccreq.step_size;
+ mtd->ecc_strength = nanddev_get_ecc_conf(nand)->strength;
+ mtd->ecc_step_size = nanddev_get_ecc_conf(nand)->step_size;
return 0;
diff --git a/drivers/mtd/nand/spi/gigadevice.c b/drivers/mtd/nand/spi/gigadevice.c
index d219c970042a..33c67403c4aa 100644
--- a/drivers/mtd/nand/spi/gigadevice.c
+++ b/drivers/mtd/nand/spi/gigadevice.c
@@ -21,7 +21,7 @@
#define GD5FXGQ4UXFXXG_STATUS_ECC_UNCOR_ERROR (7 << 4)
static SPINAND_OP_VARIANTS(read_cache_variants,
- SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(0, 2, NULL, 0),
+ SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(0, 1, NULL, 0),
SPINAND_PAGE_READ_FROM_CACHE_X4_OP(0, 1, NULL, 0),
SPINAND_PAGE_READ_FROM_CACHE_DUALIO_OP(0, 1, NULL, 0),
SPINAND_PAGE_READ_FROM_CACHE_X2_OP(0, 1, NULL, 0),
@@ -29,7 +29,7 @@ static SPINAND_OP_VARIANTS(read_cache_variants,
SPINAND_PAGE_READ_FROM_CACHE_OP(false, 0, 1, NULL, 0));
static SPINAND_OP_VARIANTS(read_cache_variants_f,
- SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(0, 2, NULL, 0),
+ SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(0, 1, NULL, 0),
SPINAND_PAGE_READ_FROM_CACHE_X4_OP_3A(0, 1, NULL, 0),
SPINAND_PAGE_READ_FROM_CACHE_DUALIO_OP(0, 1, NULL, 0),
SPINAND_PAGE_READ_FROM_CACHE_X2_OP_3A(0, 1, NULL, 0),
@@ -132,6 +132,35 @@ static const struct mtd_ooblayout_ops gd5fxgq4_variant2_ooblayout = {
.free = gd5fxgq4_variant2_ooblayout_free,
};
+static int gd5fxgq4xc_ooblayout_256_ecc(struct mtd_info *mtd, int section,
+ struct mtd_oob_region *oobregion)
+{
+ if (section)
+ return -ERANGE;
+
+ oobregion->offset = 128;
+ oobregion->length = 128;
+
+ return 0;
+}
+
+static int gd5fxgq4xc_ooblayout_256_free(struct mtd_info *mtd, int section,
+ struct mtd_oob_region *oobregion)
+{
+ if (section)
+ return -ERANGE;
+
+ oobregion->offset = 1;
+ oobregion->length = 127;
+
+ return 0;
+}
+
+static const struct mtd_ooblayout_ops gd5fxgq4xc_oob_256_ops = {
+ .ecc = gd5fxgq4xc_ooblayout_256_ecc,
+ .free = gd5fxgq4xc_ooblayout_256_free,
+};
+
static int gd5fxgq4uexxg_ecc_get_status(struct spinand_device *spinand,
u8 status)
{
@@ -202,7 +231,7 @@ static const struct spinand_info gigadevice_spinand_table[] = {
SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
&write_cache_variants,
&update_cache_variants),
- 0,
+ SPINAND_HAS_QE_BIT,
SPINAND_ECCINFO(&gd5fxgq4xa_ooblayout,
gd5fxgq4xa_ecc_get_status)),
SPINAND_INFO("GD5F2GQ4xA",
@@ -212,7 +241,7 @@ static const struct spinand_info gigadevice_spinand_table[] = {
SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
&write_cache_variants,
&update_cache_variants),
- 0,
+ SPINAND_HAS_QE_BIT,
SPINAND_ECCINFO(&gd5fxgq4xa_ooblayout,
gd5fxgq4xa_ecc_get_status)),
SPINAND_INFO("GD5F4GQ4xA",
@@ -222,9 +251,29 @@ static const struct spinand_info gigadevice_spinand_table[] = {
SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
&write_cache_variants,
&update_cache_variants),
- 0,
+ SPINAND_HAS_QE_BIT,
SPINAND_ECCINFO(&gd5fxgq4xa_ooblayout,
gd5fxgq4xa_ecc_get_status)),
+ SPINAND_INFO("GD5F4GQ4RC",
+ SPINAND_ID(SPINAND_READID_METHOD_OPCODE, 0xa4, 0x68),
+ NAND_MEMORG(1, 4096, 256, 64, 2048, 40, 1, 1, 1),
+ NAND_ECCREQ(8, 512),
+ SPINAND_INFO_OP_VARIANTS(&read_cache_variants_f,
+ &write_cache_variants,
+ &update_cache_variants),
+ SPINAND_HAS_QE_BIT,
+ SPINAND_ECCINFO(&gd5fxgq4xc_oob_256_ops,
+ gd5fxgq4ufxxg_ecc_get_status)),
+ SPINAND_INFO("GD5F4GQ4UC",
+ SPINAND_ID(SPINAND_READID_METHOD_OPCODE, 0xb4, 0x68),
+ NAND_MEMORG(1, 4096, 256, 64, 2048, 40, 1, 1, 1),
+ NAND_ECCREQ(8, 512),
+ SPINAND_INFO_OP_VARIANTS(&read_cache_variants_f,
+ &write_cache_variants,
+ &update_cache_variants),
+ SPINAND_HAS_QE_BIT,
+ SPINAND_ECCINFO(&gd5fxgq4xc_oob_256_ops,
+ gd5fxgq4ufxxg_ecc_get_status)),
SPINAND_INFO("GD5F1GQ4UExxG",
SPINAND_ID(SPINAND_READID_METHOD_OPCODE_ADDR, 0xd1),
NAND_MEMORG(1, 2048, 128, 64, 1024, 20, 1, 1, 1),
@@ -232,7 +281,7 @@ static const struct spinand_info gigadevice_spinand_table[] = {
SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
&write_cache_variants,
&update_cache_variants),
- 0,
+ SPINAND_HAS_QE_BIT,
SPINAND_ECCINFO(&gd5fxgq4_variant2_ooblayout,
gd5fxgq4uexxg_ecc_get_status)),
SPINAND_INFO("GD5F1GQ4UFxxG",
@@ -242,7 +291,7 @@ static const struct spinand_info gigadevice_spinand_table[] = {
SPINAND_INFO_OP_VARIANTS(&read_cache_variants_f,
&write_cache_variants,
&update_cache_variants),
- 0,
+ SPINAND_HAS_QE_BIT,
SPINAND_ECCINFO(&gd5fxgq4_variant2_ooblayout,
gd5fxgq4ufxxg_ecc_get_status)),
};
diff --git a/drivers/mtd/nand/spi/macronix.c b/drivers/mtd/nand/spi/macronix.c
index 0f900f3aa21a..8e801e4c3a00 100644
--- a/drivers/mtd/nand/spi/macronix.c
+++ b/drivers/mtd/nand/spi/macronix.c
@@ -84,10 +84,11 @@ static int mx35lf1ge4ab_ecc_get_status(struct spinand_device *spinand,
* data around if it's not necessary.
*/
if (mx35lf1ge4ab_get_eccsr(spinand, &eccsr))
- return nand->eccreq.strength;
+ return nanddev_get_ecc_conf(nand)->strength;
- if (WARN_ON(eccsr > nand->eccreq.strength || !eccsr))
- return nand->eccreq.strength;
+ if (WARN_ON(eccsr > nanddev_get_ecc_conf(nand)->strength ||
+ !eccsr))
+ return nanddev_get_ecc_conf(nand)->strength;
return eccsr;
@@ -118,6 +119,26 @@ static const struct spinand_info macronix_spinand_table[] = {
&update_cache_variants),
SPINAND_HAS_QE_BIT,
SPINAND_ECCINFO(&mx35lfxge4ab_ooblayout, NULL)),
+ SPINAND_INFO("MX31LF1GE4BC",
+ SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x1e),
+ NAND_MEMORG(1, 2048, 64, 64, 1024, 20, 1, 1, 1),
+ NAND_ECCREQ(8, 512),
+ SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
+ &write_cache_variants,
+ &update_cache_variants),
+ 0 /*SPINAND_HAS_QE_BIT*/,
+ SPINAND_ECCINFO(&mx35lfxge4ab_ooblayout,
+ mx35lf1ge4ab_ecc_get_status)),
+ SPINAND_INFO("MX31UF1GE4BC",
+ SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x9e),
+ NAND_MEMORG(1, 2048, 64, 64, 1024, 20, 1, 1, 1),
+ NAND_ECCREQ(8, 512),
+ SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
+ &write_cache_variants,
+ &update_cache_variants),
+ 0 /*SPINAND_HAS_QE_BIT*/,
+ SPINAND_ECCINFO(&mx35lfxge4ab_ooblayout,
+ mx35lf1ge4ab_ecc_get_status)),
};
static const struct spinand_manufacturer_ops macronix_spinand_manuf_ops = {
diff --git a/drivers/mtd/nand/spi/toshiba.c b/drivers/mtd/nand/spi/toshiba.c
index bc801d83343e..21fde2875674 100644
--- a/drivers/mtd/nand/spi/toshiba.c
+++ b/drivers/mtd/nand/spi/toshiba.c
@@ -90,12 +90,12 @@ static int tx58cxgxsxraix_ecc_get_status(struct spinand_device *spinand,
* data around if it's not necessary.
*/
if (spi_mem_exec_op(spinand->spimem, &op))
- return nand->eccreq.strength;
+ return nanddev_get_ecc_conf(nand)->strength;
mbf >>= 4;
- if (WARN_ON(mbf > nand->eccreq.strength || !mbf))
- return nand->eccreq.strength;
+ if (WARN_ON(mbf > nanddev_get_ecc_conf(nand)->strength || !mbf))
+ return nanddev_get_ecc_conf(nand)->strength;
return mbf;
diff --git a/drivers/mtd/parsers/Kconfig b/drivers/mtd/parsers/Kconfig
index f98363c9b363..e72354322f62 100644
--- a/drivers/mtd/parsers/Kconfig
+++ b/drivers/mtd/parsers/Kconfig
@@ -12,7 +12,7 @@ config MTD_BCM47XX_PARTS
boards.
config MTD_BCM63XX_PARTS
- tristate "BCM63XX CFE partitioning parser"
+ bool "BCM63XX CFE partitioning parser"
depends on BCM63XX || BMIPS_GENERIC || COMPILE_TEST
select CRC32
select MTD_PARSER_IMAGETAG
diff --git a/drivers/mtd/spi-nor/controllers/intel-spi-pci.c b/drivers/mtd/spi-nor/controllers/intel-spi-pci.c
index c72aa1ab71ad..555fe55d14ae 100644
--- a/drivers/mtd/spi-nor/controllers/intel-spi-pci.c
+++ b/drivers/mtd/spi-nor/controllers/intel-spi-pci.c
@@ -73,6 +73,7 @@ static const struct pci_device_id intel_spi_pci_ids[] = {
{ PCI_VDEVICE(INTEL, 0x43a4), (unsigned long)&cnl_info },
{ PCI_VDEVICE(INTEL, 0x4b24), (unsigned long)&bxt_info },
{ PCI_VDEVICE(INTEL, 0x4da4), (unsigned long)&bxt_info },
+ { PCI_VDEVICE(INTEL, 0x7aa4), (unsigned long)&cnl_info },
{ PCI_VDEVICE(INTEL, 0xa0a4), (unsigned long)&bxt_info },
{ PCI_VDEVICE(INTEL, 0xa1a4), (unsigned long)&bxt_info },
{ PCI_VDEVICE(INTEL, 0xa224), (unsigned long)&bxt_info },
diff --git a/drivers/mtd/spi-nor/macronix.c b/drivers/mtd/spi-nor/macronix.c
index f97f3d127575..9203abaac229 100644
--- a/drivers/mtd/spi-nor/macronix.c
+++ b/drivers/mtd/spi-nor/macronix.c
@@ -50,7 +50,7 @@ static const struct flash_info macronix_parts[] = {
{ "mx25u4035", INFO(0xc22533, 0, 64 * 1024, 8, SECT_4K) },
{ "mx25u8035", INFO(0xc22534, 0, 64 * 1024, 16, SECT_4K) },
{ "mx25u6435f", INFO(0xc22537, 0, 64 * 1024, 128, SECT_4K) },
- { "mx25l12805d", INFO(0xc22018, 0, 64 * 1024, 256, 0) },
+ { "mx25l12805d", INFO(0xc22018, 0, 64 * 1024, 256, SECT_4K) },
{ "mx25l12855e", INFO(0xc22618, 0, 64 * 1024, 256, 0) },
{ "mx25r1635f", INFO(0xc22815, 0, 64 * 1024, 32,
SECT_4K | SPI_NOR_DUAL_READ |
diff --git a/drivers/mtd/spi-nor/winbond.c b/drivers/mtd/spi-nor/winbond.c
index 6dcde15fb1aa..e5dfa786f190 100644
--- a/drivers/mtd/spi-nor/winbond.c
+++ b/drivers/mtd/spi-nor/winbond.c
@@ -63,6 +63,15 @@ static const struct flash_info winbond_parts[] = {
{ "w25q32jwm", INFO(0xef8016, 0, 64 * 1024, 64,
SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ |
SPI_NOR_HAS_LOCK | SPI_NOR_HAS_TB) },
+ { "w25q64jwm", INFO(0xef8017, 0, 64 * 1024, 128,
+ SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ |
+ SPI_NOR_HAS_LOCK | SPI_NOR_HAS_TB) },
+ { "w25q128jwm", INFO(0xef8018, 0, 64 * 1024, 256,
+ SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ |
+ SPI_NOR_HAS_LOCK | SPI_NOR_HAS_TB) },
+ { "w25q256jwm", INFO(0xef8019, 0, 64 * 1024, 512,
+ SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ |
+ SPI_NOR_HAS_LOCK | SPI_NOR_HAS_TB) },
{ "w25x64", INFO(0xef3017, 0, 64 * 1024, 128, SECT_4K) },
{ "w25q64", INFO(0xef4017, 0, 64 * 1024, 128,
SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index 42cac572f82d..7847de75a74c 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -1639,6 +1639,19 @@ int ubi_thread(void *u)
!ubi->thread_enabled || ubi_dbg_is_bgt_disabled(ubi)) {
set_current_state(TASK_INTERRUPTIBLE);
spin_unlock(&ubi->wl_lock);
+
+ /*
+ * Check kthread_should_stop() after we set the task
+ * state to guarantee that we either see the stop bit
+ * and exit or the task state is reset to runnable such
+ * that it's not scheduled out indefinitely and detects
+ * the stop bit at kthread_should_stop().
+ */
+ if (kthread_should_stop()) {
+ set_current_state(TASK_RUNNING);
+ break;
+ }
+
schedule();
continue;
}
diff --git a/drivers/net/appletalk/Kconfig b/drivers/net/appletalk/Kconfig
index d4f22a2e5be4..43918398f0d3 100644
--- a/drivers/net/appletalk/Kconfig
+++ b/drivers/net/appletalk/Kconfig
@@ -48,7 +48,7 @@ config LTPC
If you are in doubt, this card is the one with the 65C02 chip on it.
You also need version 1.3.3 or later of the netatalk package.
This driver is experimental, which means that it may not work.
- See the file <file:Documentation/networking/ltpc.rst>.
+ See the file <file:Documentation/networking/device_drivers/appletalk/ltpc.rst>.
config COPS
tristate "COPS LocalTalk PC support"
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
index 1eef66ee849e..cac8f085b16d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
@@ -130,14 +130,6 @@ static int mlx5i_flash_device(struct net_device *netdev,
return mlx5e_ethtool_flash_device(priv, flash);
}
-enum mlx5_ptys_width {
- MLX5_PTYS_WIDTH_1X = 1 << 0,
- MLX5_PTYS_WIDTH_2X = 1 << 1,
- MLX5_PTYS_WIDTH_4X = 1 << 2,
- MLX5_PTYS_WIDTH_8X = 1 << 3,
- MLX5_PTYS_WIDTH_12X = 1 << 4,
-};
-
static inline int mlx5_ptys_width_enum_to_int(enum mlx5_ptys_width width)
{
switch (width) {
@@ -174,24 +166,6 @@ static inline int mlx5_ptys_rate_enum_to_int(enum mlx5_ptys_rate rate)
}
}
-static int mlx5i_get_port_settings(struct net_device *netdev,
- u16 *ib_link_width_oper, u16 *ib_proto_oper)
-{
- struct mlx5e_priv *priv = mlx5i_epriv(netdev);
- struct mlx5_core_dev *mdev = priv->mdev;
- u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0};
- int ret;
-
- ret = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_IB, 1);
- if (ret)
- return ret;
-
- *ib_link_width_oper = MLX5_GET(ptys_reg, out, ib_link_width_oper);
- *ib_proto_oper = MLX5_GET(ptys_reg, out, ib_proto_oper);
-
- return 0;
-}
-
static int mlx5i_get_speed_settings(u16 ib_link_width_oper, u16 ib_proto_oper)
{
int rate, width;
@@ -209,11 +183,14 @@ static int mlx5i_get_speed_settings(u16 ib_link_width_oper, u16 ib_proto_oper)
static int mlx5i_get_link_ksettings(struct net_device *netdev,
struct ethtool_link_ksettings *link_ksettings)
{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
u16 ib_link_width_oper;
u16 ib_proto_oper;
int speed, ret;
- ret = mlx5i_get_port_settings(netdev, &ib_link_width_oper, &ib_proto_oper);
+ ret = mlx5_query_ib_port_oper(mdev, &ib_link_width_oper, &ib_proto_oper,
+ 1);
if (ret)
return ret;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index e4186e84b3ff..4bb219565c58 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -154,24 +154,8 @@ int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration)
sizeof(out), MLX5_REG_MLCR, 0, 1);
}
-int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev,
- u8 *link_width_oper, u8 local_port)
-{
- u32 out[MLX5_ST_SZ_DW(ptys_reg)];
- int err;
-
- err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_IB, local_port);
- if (err)
- return err;
-
- *link_width_oper = MLX5_GET(ptys_reg, out, ib_link_width_oper);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(mlx5_query_port_link_width_oper);
-
-int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev,
- u8 *proto_oper, u8 local_port)
+int mlx5_query_ib_port_oper(struct mlx5_core_dev *dev, u16 *link_width_oper,
+ u16 *proto_oper, u8 local_port)
{
u32 out[MLX5_ST_SZ_DW(ptys_reg)];
int err;
@@ -181,11 +165,12 @@ int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev,
if (err)
return err;
+ *link_width_oper = MLX5_GET(ptys_reg, out, ib_link_width_oper);
*proto_oper = MLX5_GET(ptys_reg, out, ib_proto_oper);
return 0;
}
-EXPORT_SYMBOL(mlx5_query_port_ib_proto_oper);
+EXPORT_SYMBOL(mlx5_query_ib_port_oper);
/* This function should be used after setting a port register only */
void mlx5_toggle_port_link(struct mlx5_core_dev *dev)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
index d3136556a1e9..da864d12916b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
@@ -504,7 +504,8 @@ static void qed_rdma_init_devinfo(struct qed_hwfn *p_hwfn,
dev->max_mw = 0;
dev->max_mr_mw_fmr_pbl = (PAGE_SIZE / 8) * (PAGE_SIZE / 8);
dev->max_mr_mw_fmr_size = dev->max_mr_mw_fmr_pbl * PAGE_SIZE;
- dev->max_pkey = QED_RDMA_MAX_P_KEY;
+ if (QED_IS_ROCE_PERSONALITY(p_hwfn))
+ dev->max_pkey = QED_RDMA_MAX_P_KEY;
dev->max_srq = p_hwfn->p_rdma_info->num_srqs;
dev->max_srq_wr = QED_RDMA_MAX_SRQ_WQE_ELEM;
@@ -1519,7 +1520,7 @@ qed_rdma_register_tid(void *rdma_cxt,
params->pbl_two_level);
SET_FIELD(flags, RDMA_REGISTER_TID_RAMROD_DATA_ZERO_BASED,
- params->zbva);
+ false);
SET_FIELD(flags, RDMA_REGISTER_TID_RAMROD_DATA_PHY_MR, params->phy_mr);
@@ -1581,15 +1582,7 @@ qed_rdma_register_tid(void *rdma_cxt,
p_ramrod->pd = cpu_to_le16(params->pd);
p_ramrod->length_hi = (u8)(params->length >> 32);
p_ramrod->length_lo = DMA_LO_LE(params->length);
- if (params->zbva) {
- /* Lower 32 bits of the registered MR address.
- * In case of zero based MR, will hold FBO
- */
- p_ramrod->va.hi = 0;
- p_ramrod->va.lo = cpu_to_le32(params->fbo);
- } else {
- DMA_REGPAIR_LE(p_ramrod->va, params->vaddr);
- }
+ DMA_REGPAIR_LE(p_ramrod->va, params->vaddr);
DMA_REGPAIR_LE(p_ramrod->pbl_base, params->pbl_ptr);
/* DIF */
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index b9aa6384563b..bedbb85a179a 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
@@ -1026,7 +1026,9 @@ int qede_change_mtu(struct net_device *ndev, int new_mtu)
args.u.mtu = new_mtu;
args.func = &qede_update_mtu;
qede_reload(edev, &args, false);
-
+#if IS_ENABLED(CONFIG_QED_RDMA)
+ qede_rdma_event_change_mtu(edev);
+#endif
edev->ops->common->update_mtu(edev->cdev, new_mtu);
return 0;
diff --git a/drivers/net/ethernet/qlogic/qede/qede_rdma.c b/drivers/net/ethernet/qlogic/qede/qede_rdma.c
index 769ec2f4d0b7..2f6598086d9b 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_rdma.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_rdma.c
@@ -234,6 +234,15 @@ static void qede_rdma_changeaddr(struct qede_dev *edev)
qedr_drv->notify(edev->rdma_info.qedr_dev, QEDE_CHANGE_ADDR);
}
+static void qede_rdma_change_mtu(struct qede_dev *edev)
+{
+ if (qede_rdma_supported(edev)) {
+ if (qedr_drv && edev->rdma_info.qedr_dev && qedr_drv->notify)
+ qedr_drv->notify(edev->rdma_info.qedr_dev,
+ QEDE_CHANGE_MTU);
+ }
+}
+
static struct qede_rdma_event_work *
qede_rdma_get_free_event_node(struct qede_dev *edev)
{
@@ -287,6 +296,9 @@ static void qede_rdma_handle_event(struct work_struct *work)
case QEDE_CHANGE_ADDR:
qede_rdma_changeaddr(edev);
break;
+ case QEDE_CHANGE_MTU:
+ qede_rdma_change_mtu(edev);
+ break;
default:
DP_NOTICE(edev, "Invalid rdma event %d", event);
}
@@ -338,3 +350,8 @@ void qede_rdma_event_changeaddr(struct qede_dev *edev)
{
qede_rdma_add_event(edev, QEDE_CHANGE_ADDR);
}
+
+void qede_rdma_event_change_mtu(struct qede_dev *edev)
+{
+ qede_rdma_add_event(edev, QEDE_CHANGE_MTU);
+}
diff --git a/drivers/net/hamradio/scc.c b/drivers/net/hamradio/scc.c
index 1e915871baa7..36eeb80406f2 100644
--- a/drivers/net/hamradio/scc.c
+++ b/drivers/net/hamradio/scc.c
@@ -7,7 +7,7 @@
* ------------------
*
* You can find a subset of the documentation in
- * Documentation/networking/device_drivers/wan/z8530drv.rst.
+ * Documentation/networking/device_drivers/hamradio/z8530drv.rst.
*/
/*
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index ae477f7756af..8ee24e351bdc 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -140,6 +140,20 @@ struct xenvif_queue { /* Per-queue data for xenvif */
char name[QUEUE_NAME_SIZE]; /* DEVNAME-qN */
struct xenvif *vif; /* Parent VIF */
+ /*
+ * TX/RX common EOI handling.
+ * When feature-split-event-channels = 0, interrupt handler sets
+ * NETBK_COMMON_EOI, otherwise NETBK_RX_EOI and NETBK_TX_EOI are set
+ * by the RX and TX interrupt handlers.
+ * RX and TX handler threads will issue an EOI when either
+ * NETBK_COMMON_EOI or their specific bits (NETBK_RX_EOI or
+ * NETBK_TX_EOI) are set and they will reset those bits.
+ */
+ atomic_t eoi_pending;
+#define NETBK_RX_EOI 0x01
+#define NETBK_TX_EOI 0x02
+#define NETBK_COMMON_EOI 0x04
+
/* Use NAPI for guest TX */
struct napi_struct napi;
/* When feature-split-event-channels = 0, tx_irq = rx_irq. */
@@ -378,6 +392,7 @@ int xenvif_dealloc_kthread(void *data);
irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data);
+bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread);
void xenvif_rx_action(struct xenvif_queue *queue);
void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb);
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 8af497285691..acb786d8b1d8 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -77,12 +77,28 @@ int xenvif_schedulable(struct xenvif *vif)
!vif->disabled;
}
+static bool xenvif_handle_tx_interrupt(struct xenvif_queue *queue)
+{
+ bool rc;
+
+ rc = RING_HAS_UNCONSUMED_REQUESTS(&queue->tx);
+ if (rc)
+ napi_schedule(&queue->napi);
+ return rc;
+}
+
static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id)
{
struct xenvif_queue *queue = dev_id;
+ int old;
- if (RING_HAS_UNCONSUMED_REQUESTS(&queue->tx))
- napi_schedule(&queue->napi);
+ old = atomic_fetch_or(NETBK_TX_EOI, &queue->eoi_pending);
+ WARN(old & NETBK_TX_EOI, "Interrupt while EOI pending\n");
+
+ if (!xenvif_handle_tx_interrupt(queue)) {
+ atomic_andnot(NETBK_TX_EOI, &queue->eoi_pending);
+ xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS);
+ }
return IRQ_HANDLED;
}
@@ -116,19 +132,46 @@ static int xenvif_poll(struct napi_struct *napi, int budget)
return work_done;
}
+static bool xenvif_handle_rx_interrupt(struct xenvif_queue *queue)
+{
+ bool rc;
+
+ rc = xenvif_have_rx_work(queue, false);
+ if (rc)
+ xenvif_kick_thread(queue);
+ return rc;
+}
+
static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id)
{
struct xenvif_queue *queue = dev_id;
+ int old;
- xenvif_kick_thread(queue);
+ old = atomic_fetch_or(NETBK_RX_EOI, &queue->eoi_pending);
+ WARN(old & NETBK_RX_EOI, "Interrupt while EOI pending\n");
+
+ if (!xenvif_handle_rx_interrupt(queue)) {
+ atomic_andnot(NETBK_RX_EOI, &queue->eoi_pending);
+ xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS);
+ }
return IRQ_HANDLED;
}
irqreturn_t xenvif_interrupt(int irq, void *dev_id)
{
- xenvif_tx_interrupt(irq, dev_id);
- xenvif_rx_interrupt(irq, dev_id);
+ struct xenvif_queue *queue = dev_id;
+ int old;
+
+ old = atomic_fetch_or(NETBK_COMMON_EOI, &queue->eoi_pending);
+ WARN(old, "Interrupt while EOI pending\n");
+
+ /* Use bitwise or as we need to call both functions. */
+ if ((!xenvif_handle_tx_interrupt(queue) |
+ !xenvif_handle_rx_interrupt(queue))) {
+ atomic_andnot(NETBK_COMMON_EOI, &queue->eoi_pending);
+ xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS);
+ }
return IRQ_HANDLED;
}
@@ -605,7 +648,7 @@ int xenvif_connect_ctrl(struct xenvif *vif, grant_ref_t ring_ref,
if (req_prod - rsp_prod > RING_SIZE(&vif->ctrl))
goto err_unmap;
- err = bind_interdomain_evtchn_to_irq(vif->domid, evtchn);
+ err = bind_interdomain_evtchn_to_irq_lateeoi(vif->domid, evtchn);
if (err < 0)
goto err_unmap;
@@ -709,7 +752,7 @@ int xenvif_connect_data(struct xenvif_queue *queue,
if (tx_evtchn == rx_evtchn) {
/* feature-split-event-channels == 0 */
- err = bind_interdomain_evtchn_to_irqhandler(
+ err = bind_interdomain_evtchn_to_irqhandler_lateeoi(
queue->vif->domid, tx_evtchn, xenvif_interrupt, 0,
queue->name, queue);
if (err < 0)
@@ -720,7 +763,7 @@ int xenvif_connect_data(struct xenvif_queue *queue,
/* feature-split-event-channels == 1 */
snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name),
"%s-tx", queue->name);
- err = bind_interdomain_evtchn_to_irqhandler(
+ err = bind_interdomain_evtchn_to_irqhandler_lateeoi(
queue->vif->domid, tx_evtchn, xenvif_tx_interrupt, 0,
queue->tx_irq_name, queue);
if (err < 0)
@@ -730,7 +773,7 @@ int xenvif_connect_data(struct xenvif_queue *queue,
snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name),
"%s-rx", queue->name);
- err = bind_interdomain_evtchn_to_irqhandler(
+ err = bind_interdomain_evtchn_to_irqhandler_lateeoi(
queue->vif->domid, rx_evtchn, xenvif_rx_interrupt, 0,
queue->rx_irq_name, queue);
if (err < 0)
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 6dfca7265644..bc3421d14576 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -169,6 +169,10 @@ void xenvif_napi_schedule_or_enable_events(struct xenvif_queue *queue)
if (more_to_do)
napi_schedule(&queue->napi);
+ else if (atomic_fetch_andnot(NETBK_TX_EOI | NETBK_COMMON_EOI,
+ &queue->eoi_pending) &
+ (NETBK_TX_EOI | NETBK_COMMON_EOI))
+ xen_irq_lateeoi(queue->tx_irq, 0);
}
static void tx_add_credit(struct xenvif_queue *queue)
@@ -1643,9 +1647,14 @@ static bool xenvif_ctrl_work_todo(struct xenvif *vif)
irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data)
{
struct xenvif *vif = data;
+ unsigned int eoi_flag = XEN_EOI_FLAG_SPURIOUS;
- while (xenvif_ctrl_work_todo(vif))
+ while (xenvif_ctrl_work_todo(vif)) {
xenvif_ctrl_action(vif);
+ eoi_flag = 0;
+ }
+
+ xen_irq_lateeoi(irq, eoi_flag);
return IRQ_HANDLED;
}
diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c
index ac034f69a170..b8febe1d1bfd 100644
--- a/drivers/net/xen-netback/rx.c
+++ b/drivers/net/xen-netback/rx.c
@@ -503,13 +503,13 @@ static bool xenvif_rx_queue_ready(struct xenvif_queue *queue)
return queue->stalled && prod - cons >= 1;
}
-static bool xenvif_have_rx_work(struct xenvif_queue *queue)
+bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread)
{
return xenvif_rx_ring_slots_available(queue) ||
(queue->vif->stall_timeout &&
(xenvif_rx_queue_stalled(queue) ||
xenvif_rx_queue_ready(queue))) ||
- kthread_should_stop() ||
+ (test_kthread && kthread_should_stop()) ||
queue->vif->disabled;
}
@@ -540,15 +540,20 @@ static void xenvif_wait_for_rx_work(struct xenvif_queue *queue)
{
DEFINE_WAIT(wait);
- if (xenvif_have_rx_work(queue))
+ if (xenvif_have_rx_work(queue, true))
return;
for (;;) {
long ret;
prepare_to_wait(&queue->wq, &wait, TASK_INTERRUPTIBLE);
- if (xenvif_have_rx_work(queue))
+ if (xenvif_have_rx_work(queue, true))
break;
+ if (atomic_fetch_andnot(NETBK_RX_EOI | NETBK_COMMON_EOI,
+ &queue->eoi_pending) &
+ (NETBK_RX_EOI | NETBK_COMMON_EOI))
+ xen_irq_lateeoi(queue->rx_irq, 0);
+
ret = schedule_timeout(xenvif_rx_queue_timeout(queue));
if (!ret)
break;
diff --git a/drivers/power/reset/Kconfig b/drivers/power/reset/Kconfig
index 0a1fb5c74f83..d55b3727e00e 100644
--- a/drivers/power/reset/Kconfig
+++ b/drivers/power/reset/Kconfig
@@ -129,10 +129,10 @@ config POWER_RESET_QCOM_PON
config POWER_RESET_OCELOT_RESET
bool "Microsemi Ocelot reset driver"
- depends on MSCC_OCELOT || COMPILE_TEST
+ depends on MSCC_OCELOT || ARCH_SPARX5 || COMPILE_TEST
select MFD_SYSCON
help
- This driver supports restart for Microsemi Ocelot SoC.
+ This driver supports restart for Microsemi Ocelot SoC and similar.
config POWER_RESET_OXNAS
bool "OXNAS SoC restart driver"
diff --git a/drivers/power/reset/ocelot-reset.c b/drivers/power/reset/ocelot-reset.c
index 419952c61fd0..f74e1dbb4ba3 100644
--- a/drivers/power/reset/ocelot-reset.c
+++ b/drivers/power/reset/ocelot-reset.c
@@ -15,15 +15,20 @@
#include <linux/reboot.h>
#include <linux/regmap.h>
+struct reset_props {
+ const char *syscon;
+ u32 protect_reg;
+ u32 vcore_protect;
+ u32 if_si_owner_bit;
+};
+
struct ocelot_reset_context {
void __iomem *base;
struct regmap *cpu_ctrl;
+ const struct reset_props *props;
struct notifier_block restart_handler;
};
-#define ICPU_CFG_CPU_SYSTEM_CTRL_RESET 0x20
-#define CORE_RST_PROTECT BIT(2)
-
#define SOFT_CHIP_RST BIT(0)
#define ICPU_CFG_CPU_SYSTEM_CTRL_GENERAL_CTRL 0x24
@@ -31,7 +36,6 @@ struct ocelot_reset_context {
#define IF_SI_OWNER_SISL 0
#define IF_SI_OWNER_SIBM 1
#define IF_SI_OWNER_SIMC 2
-#define IF_SI_OWNER_OFFSET 4
static int ocelot_restart_handle(struct notifier_block *this,
unsigned long mode, void *cmd)
@@ -39,15 +43,18 @@ static int ocelot_restart_handle(struct notifier_block *this,
struct ocelot_reset_context *ctx = container_of(this, struct
ocelot_reset_context,
restart_handler);
+ u32 if_si_owner_bit = ctx->props->if_si_owner_bit;
/* Make sure the core is not protected from reset */
- regmap_update_bits(ctx->cpu_ctrl, ICPU_CFG_CPU_SYSTEM_CTRL_RESET,
- CORE_RST_PROTECT, 0);
+ regmap_update_bits(ctx->cpu_ctrl, ctx->props->protect_reg,
+ ctx->props->vcore_protect, 0);
/* Make the SI back to boot mode */
regmap_update_bits(ctx->cpu_ctrl, ICPU_CFG_CPU_SYSTEM_CTRL_GENERAL_CTRL,
- IF_SI_OWNER_MASK << IF_SI_OWNER_OFFSET,
- IF_SI_OWNER_SIBM << IF_SI_OWNER_OFFSET);
+ IF_SI_OWNER_MASK << if_si_owner_bit,
+ IF_SI_OWNER_SIBM << if_si_owner_bit);
+
+ pr_emerg("Resetting SoC\n");
writel(SOFT_CHIP_RST, ctx->base);
@@ -72,9 +79,13 @@ static int ocelot_reset_probe(struct platform_device *pdev)
if (IS_ERR(ctx->base))
return PTR_ERR(ctx->base);
- ctx->cpu_ctrl = syscon_regmap_lookup_by_compatible("mscc,ocelot-cpu-syscon");
- if (IS_ERR(ctx->cpu_ctrl))
+ ctx->props = device_get_match_data(dev);
+
+ ctx->cpu_ctrl = syscon_regmap_lookup_by_compatible(ctx->props->syscon);
+ if (IS_ERR(ctx->cpu_ctrl)) {
+ dev_err(dev, "No syscon map: %s\n", ctx->props->syscon);
return PTR_ERR(ctx->cpu_ctrl);
+ }
ctx->restart_handler.notifier_call = ocelot_restart_handle;
ctx->restart_handler.priority = 192;
@@ -85,9 +96,29 @@ static int ocelot_reset_probe(struct platform_device *pdev)
return err;
}
+static const struct reset_props reset_props_ocelot = {
+ .syscon = "mscc,ocelot-cpu-syscon",
+ .protect_reg = 0x20,
+ .vcore_protect = BIT(2),
+ .if_si_owner_bit = 4,
+};
+
+static const struct reset_props reset_props_sparx5 = {
+ .syscon = "microchip,sparx5-cpu-syscon",
+ .protect_reg = 0x84,
+ .vcore_protect = BIT(10),
+ .if_si_owner_bit = 6,
+};
+
static const struct of_device_id ocelot_reset_of_match[] = {
- { .compatible = "mscc,ocelot-chip-reset" },
- {}
+ {
+ .compatible = "mscc,ocelot-chip-reset",
+ .data = &reset_props_ocelot
+ }, {
+ .compatible = "microchip,sparx5-chip-reset",
+ .data = &reset_props_sparx5
+ },
+ { /*sentinel*/ }
};
static struct platform_driver ocelot_reset_driver = {
diff --git a/drivers/power/supply/Kconfig b/drivers/power/supply/Kconfig
index faf2830aa152..eec646c568b7 100644
--- a/drivers/power/supply/Kconfig
+++ b/drivers/power/supply/Kconfig
@@ -164,7 +164,7 @@ config BATTERY_DS2782
config BATTERY_LEGO_EV3
tristate "LEGO MINDSTORMS EV3 battery"
- depends on OF && IIO && GPIOLIB
+ depends on OF && IIO && GPIOLIB && (ARCH_DAVINCI_DA850 || COMPILE_TEST)
help
Say Y here to enable support for the LEGO MINDSTORMS EV3 battery.
@@ -367,10 +367,15 @@ config AXP288_FUEL_GAUGE
config BATTERY_MAX17040
tristate "Maxim MAX17040 Fuel Gauge"
depends on I2C
+ select REGMAP_I2C
help
- MAX17040 is fuel-gauge systems for lithium-ion (Li+) batteries
- in handheld and portable equipment. The MAX17040 is configured
- to operate with a single lithium cell
+ Maxim models with ModelGauge are fuel-gauge systems for lithium-ion
+ (Li+) batteries in handheld and portable equipment, including
+ max17040, max17041, max17043, max17044, max17048, max17049, max17058,
+ max17059. It is also included in some batteries like max77836.
+
+ Driver supports reporting SOC (State of Charge, i.e capacity),
+ voltage and configurable low-SOC wakeup interrupt.
config BATTERY_MAX17042
tristate "Maxim MAX17042/17047/17050/8997/8966 Fuel Gauge"
@@ -631,13 +636,22 @@ config CHARGER_BQ25890
help
Say Y to enable support for the TI BQ25890 battery charger.
+config CHARGER_BQ25980
+ tristate "TI BQ25980 battery charger driver"
+ depends on I2C
+ depends on GPIOLIB || COMPILE_TEST
+ select REGMAP_I2C
+ help
+ Say Y to enable support for the TI BQ25980, BQ25975 and BQ25960
+ series of fast battery chargers.
+
config CHARGER_SMB347
- tristate "Summit Microelectronics SMB347 Battery Charger"
+ tristate "Summit Microelectronics SMB3XX Battery Charger"
depends on I2C
select REGMAP_I2C
help
- Say Y to include support for Summit Microelectronics SMB347
- Battery Charger.
+ Say Y to include support for Summit Microelectronics SMB345,
+ SMB347 or SMB358 Battery Charger.
config CHARGER_TPS65090
tristate "TPS65090 battery charger driver"
@@ -752,4 +766,12 @@ config CHARGER_WILCO
information can be found in
Documentation/ABI/testing/sysfs-class-power-wilco
+config RN5T618_POWER
+ tristate "RN5T618 charger/fuel gauge support"
+ depends on MFD_RN5T618
+ help
+ Say Y here to have support for RN5T618 PMIC family fuel gauge and charger.
+ This driver can also be built as a module. If so, the module will be
+ called rn5t618_power.
+
endif # POWER_SUPPLY
diff --git a/drivers/power/supply/Makefile b/drivers/power/supply/Makefile
index b3c694a65114..dd4b86318cd9 100644
--- a/drivers/power/supply/Makefile
+++ b/drivers/power/supply/Makefile
@@ -84,6 +84,7 @@ obj-$(CONFIG_CHARGER_BQ24257) += bq24257_charger.o
obj-$(CONFIG_CHARGER_BQ24735) += bq24735-charger.o
obj-$(CONFIG_CHARGER_BQ2515X) += bq2515x_charger.o
obj-$(CONFIG_CHARGER_BQ25890) += bq25890_charger.o
+obj-$(CONFIG_CHARGER_BQ25980) += bq25980_charger.o
obj-$(CONFIG_CHARGER_SMB347) += smb347-charger.o
obj-$(CONFIG_CHARGER_TPS65090) += tps65090-charger.o
obj-$(CONFIG_CHARGER_TPS65217) += tps65217_charger.o
@@ -96,3 +97,4 @@ obj-$(CONFIG_CHARGER_UCS1002) += ucs1002_power.o
obj-$(CONFIG_CHARGER_BD70528) += bd70528-charger.o
obj-$(CONFIG_CHARGER_BD99954) += bd99954-charger.o
obj-$(CONFIG_CHARGER_WILCO) += wilco-charger.o
+obj-$(CONFIG_RN5T618_POWER) += rn5t618_power.o
diff --git a/drivers/power/supply/ab8500_fg.c b/drivers/power/supply/ab8500_fg.c
index 7eec415c82a3..592a73d4dde6 100644
--- a/drivers/power/supply/ab8500_fg.c
+++ b/drivers/power/supply/ab8500_fg.c
@@ -653,7 +653,7 @@ int ab8500_fg_inst_curr_finalize(struct ab8500_fg *di, int *res)
/*
* negative value for Discharging
- * convert 2's compliment into decimal
+ * convert 2's complement into decimal
*/
if (high & 0x10)
val = (low | (high << 8) | 0xFFFFE000);
@@ -781,7 +781,7 @@ static void ab8500_fg_acc_cur_work(struct work_struct *work)
if (ret < 0)
goto exit;
- /* Check for sign bit in case of negative value, 2's compliment */
+ /* Check for sign bit in case of negative value, 2's complement */
if (high & 0x10)
val = (low | (med << 8) | (high << 16) | 0xFFE00000);
else
diff --git a/drivers/power/supply/bq24257_charger.c b/drivers/power/supply/bq24257_charger.c
index 8e60cb0f3c3f..96cb3290bcaa 100644
--- a/drivers/power/supply/bq24257_charger.c
+++ b/drivers/power/supply/bq24257_charger.c
@@ -1152,6 +1152,7 @@ static const struct of_device_id bq24257_of_match[] = {
};
MODULE_DEVICE_TABLE(of, bq24257_of_match);
+#ifdef CONFIG_ACPI
static const struct acpi_device_id bq24257_acpi_match[] = {
{ "BQ242500", BQ24250 },
{ "BQ242510", BQ24251 },
@@ -1159,6 +1160,7 @@ static const struct acpi_device_id bq24257_acpi_match[] = {
{},
};
MODULE_DEVICE_TABLE(acpi, bq24257_acpi_match);
+#endif
static struct i2c_driver bq24257_driver = {
.driver = {
diff --git a/drivers/power/supply/bq2515x_charger.c b/drivers/power/supply/bq2515x_charger.c
index 36b0c8c98d40..374b112f712a 100644
--- a/drivers/power/supply/bq2515x_charger.c
+++ b/drivers/power/supply/bq2515x_charger.c
@@ -168,7 +168,7 @@ enum bq2515x_id {
* @device_id: value of device_id
* @mains_online: boolean value indicating power supply online
*
- * @bq2515x_init_data init_data: charger initialization data structure
+ * @init_data: charger initialization data structure
*/
struct bq2515x_device {
struct power_supply *mains;
@@ -188,7 +188,7 @@ struct bq2515x_device {
struct bq2515x_init_data init_data;
};
-static struct reg_default bq25150_reg_defaults[] = {
+static const struct reg_default bq25150_reg_defaults[] = {
{BQ2515X_FLAG0, 0x0},
{BQ2515X_FLAG1, 0x0},
{BQ2515X_FLAG2, 0x0},
@@ -227,7 +227,7 @@ static struct reg_default bq25150_reg_defaults[] = {
{BQ2515X_DEVICE_ID, 0x20},
};
-static struct reg_default bq25155_reg_defaults[] = {
+static const struct reg_default bq25155_reg_defaults[] = {
{BQ2515X_FLAG0, 0x0},
{BQ2515X_FLAG1, 0x0},
{BQ2515X_FLAG2, 0x0},
@@ -886,14 +886,14 @@ static int bq2515x_battery_get_property(struct power_supply *psy,
return 0;
}
-static enum power_supply_property bq2515x_battery_properties[] = {
+static const enum power_supply_property bq2515x_battery_properties[] = {
POWER_SUPPLY_PROP_VOLTAGE_NOW,
POWER_SUPPLY_PROP_CURRENT_NOW,
POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX,
POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE_MAX,
};
-static enum power_supply_property bq2515x_mains_properties[] = {
+static const enum power_supply_property bq2515x_mains_properties[] = {
POWER_SUPPLY_PROP_ONLINE,
POWER_SUPPLY_PROP_STATUS,
POWER_SUPPLY_PROP_HEALTH,
@@ -905,7 +905,7 @@ static enum power_supply_property bq2515x_mains_properties[] = {
POWER_SUPPLY_PROP_PRECHARGE_CURRENT,
};
-static struct power_supply_desc bq2515x_mains_desc = {
+static const struct power_supply_desc bq2515x_mains_desc = {
.name = "bq2515x-mains",
.type = POWER_SUPPLY_TYPE_MAINS,
.get_property = bq2515x_mains_get_property,
@@ -915,7 +915,7 @@ static struct power_supply_desc bq2515x_mains_desc = {
.property_is_writeable = bq2515x_power_supply_property_is_writeable,
};
-static struct power_supply_desc bq2515x_battery_desc = {
+static const struct power_supply_desc bq2515x_battery_desc = {
.name = "bq2515x-battery",
.type = POWER_SUPPLY_TYPE_BATTERY,
.get_property = bq2515x_battery_get_property,
diff --git a/drivers/power/supply/bq25890_charger.c b/drivers/power/supply/bq25890_charger.c
index 77150667e36b..34c21c51bac1 100644
--- a/drivers/power/supply/bq25890_charger.c
+++ b/drivers/power/supply/bq25890_charger.c
@@ -83,6 +83,8 @@ struct bq25890_init_data {
u8 boostf; /* boost frequency */
u8 ilim_en; /* enable ILIM pin */
u8 treg; /* thermal regulation threshold */
+ u8 rbatcomp; /* IBAT sense resistor value */
+ u8 vclamp; /* IBAT compensation voltage limit */
};
struct bq25890_state {
@@ -258,6 +260,8 @@ enum bq25890_table_ids {
TBL_VREG,
TBL_BOOSTV,
TBL_SYSVMIN,
+ TBL_VBATCOMP,
+ TBL_RBATCOMP,
/* lookup tables */
TBL_TREG,
@@ -299,6 +303,8 @@ static const union {
[TBL_VREG] = { .rt = {3840000, 4608000, 16000} }, /* uV */
[TBL_BOOSTV] = { .rt = {4550000, 5510000, 64000} }, /* uV */
[TBL_SYSVMIN] = { .rt = {3000000, 3700000, 100000} }, /* uV */
+ [TBL_VBATCOMP] ={ .rt = {0, 224000, 32000} }, /* uV */
+ [TBL_RBATCOMP] ={ .rt = {0, 140000, 20000} }, /* uOhm */
/* lookup tables */
[TBL_TREG] = { .lt = {bq25890_treg_tbl, BQ25890_TREG_TBL_SIZE} },
@@ -648,7 +654,9 @@ static int bq25890_hw_init(struct bq25890_device *bq)
{F_BOOSTI, bq->init_data.boosti},
{F_BOOSTF, bq->init_data.boostf},
{F_EN_ILIM, bq->init_data.ilim_en},
- {F_TREG, bq->init_data.treg}
+ {F_TREG, bq->init_data.treg},
+ {F_BATCMP, bq->init_data.rbatcomp},
+ {F_VCLAMP, bq->init_data.vclamp},
};
ret = bq25890_chip_reset(bq);
@@ -859,11 +867,14 @@ static int bq25890_fw_read_u32_props(struct bq25890_device *bq)
{"ti,boost-max-current", false, TBL_BOOSTI, &init->boosti},
/* optional properties */
- {"ti,thermal-regulation-threshold", true, TBL_TREG, &init->treg}
+ {"ti,thermal-regulation-threshold", true, TBL_TREG, &init->treg},
+ {"ti,ibatcomp-micro-ohms", true, TBL_RBATCOMP, &init->rbatcomp},
+ {"ti,ibatcomp-clamp-microvolt", true, TBL_VBATCOMP, &init->vclamp},
};
/* initialize data for optional properties */
init->treg = 3; /* 120 degrees Celsius */
+ init->rbatcomp = init->vclamp = 0; /* IBAT compensation disabled */
for (i = 0; i < ARRAY_SIZE(props); i++) {
ret = device_property_read_u32(bq->dev, props[i].name,
@@ -1073,11 +1084,13 @@ static const struct of_device_id bq25890_of_match[] = {
};
MODULE_DEVICE_TABLE(of, bq25890_of_match);
+#ifdef CONFIG_ACPI
static const struct acpi_device_id bq25890_acpi_match[] = {
{"BQ258900", 0},
{},
};
MODULE_DEVICE_TABLE(acpi, bq25890_acpi_match);
+#endif
static struct i2c_driver bq25890_driver = {
.driver = {
diff --git a/drivers/power/supply/bq25980_charger.c b/drivers/power/supply/bq25980_charger.c
new file mode 100644
index 000000000000..c936f311eb4f
--- /dev/null
+++ b/drivers/power/supply/bq25980_charger.c
@@ -0,0 +1,1314 @@
+// SPDX-License-Identifier: GPL-2.0
+// BQ25980 Battery Charger Driver
+// Copyright (C) 2020 Texas Instruments Incorporated - http://www.ti.com/
+
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/gpio/consumer.h>
+#include <linux/power_supply.h>
+#include <linux/regmap.h>
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/moduleparam.h>
+#include <linux/slab.h>
+
+#include "bq25980_charger.h"
+
+struct bq25980_state {
+ bool dischg;
+ bool ovp;
+ bool ocp;
+ bool wdt;
+ bool tflt;
+ bool online;
+ bool ce;
+ bool hiz;
+ bool bypass;
+
+ u32 vbat_adc;
+ u32 vsys_adc;
+ u32 ibat_adc;
+};
+
+enum bq25980_id {
+ BQ25980,
+ BQ25975,
+ BQ25960,
+};
+
+struct bq25980_chip_info {
+
+ int model_id;
+
+ const struct regmap_config *regmap_config;
+
+ int busocp_def;
+ int busocp_sc_max;
+ int busocp_byp_max;
+ int busocp_sc_min;
+ int busocp_byp_min;
+
+ int busovp_sc_def;
+ int busovp_byp_def;
+ int busovp_sc_step;
+
+ int busovp_sc_offset;
+ int busovp_byp_step;
+ int busovp_byp_offset;
+ int busovp_sc_min;
+ int busovp_sc_max;
+ int busovp_byp_min;
+ int busovp_byp_max;
+
+ int batovp_def;
+ int batovp_max;
+ int batovp_min;
+ int batovp_step;
+ int batovp_offset;
+
+ int batocp_def;
+ int batocp_max;
+};
+
+struct bq25980_init_data {
+ u32 ichg;
+ u32 bypass_ilim;
+ u32 sc_ilim;
+ u32 vreg;
+ u32 iterm;
+ u32 iprechg;
+ u32 bypass_vlim;
+ u32 sc_vlim;
+ u32 ichg_max;
+ u32 vreg_max;
+};
+
+struct bq25980_device {
+ struct i2c_client *client;
+ struct device *dev;
+ struct power_supply *charger;
+ struct power_supply *battery;
+ struct mutex lock;
+ struct regmap *regmap;
+
+ char model_name[I2C_NAME_SIZE];
+
+ struct bq25980_init_data init_data;
+ const struct bq25980_chip_info *chip_info;
+ struct bq25980_state state;
+ int watchdog_timer;
+};
+
+static struct reg_default bq25980_reg_defs[] = {
+ {BQ25980_BATOVP, 0x5A},
+ {BQ25980_BATOVP_ALM, 0x46},
+ {BQ25980_BATOCP, 0x51},
+ {BQ25980_BATOCP_ALM, 0x50},
+ {BQ25980_BATUCP_ALM, 0x28},
+ {BQ25980_CHRGR_CTRL_1, 0x0},
+ {BQ25980_BUSOVP, 0x26},
+ {BQ25980_BUSOVP_ALM, 0x22},
+ {BQ25980_BUSOCP, 0xD},
+ {BQ25980_BUSOCP_ALM, 0xC},
+ {BQ25980_TEMP_CONTROL, 0x30},
+ {BQ25980_TDIE_ALM, 0xC8},
+ {BQ25980_TSBUS_FLT, 0x15},
+ {BQ25980_TSBAT_FLG, 0x15},
+ {BQ25980_VAC_CONTROL, 0x0},
+ {BQ25980_CHRGR_CTRL_2, 0x0},
+ {BQ25980_CHRGR_CTRL_3, 0x20},
+ {BQ25980_CHRGR_CTRL_4, 0x1D},
+ {BQ25980_CHRGR_CTRL_5, 0x18},
+ {BQ25980_STAT1, 0x0},
+ {BQ25980_STAT2, 0x0},
+ {BQ25980_STAT3, 0x0},
+ {BQ25980_STAT4, 0x0},
+ {BQ25980_STAT5, 0x0},
+ {BQ25980_FLAG1, 0x0},
+ {BQ25980_FLAG2, 0x0},
+ {BQ25980_FLAG3, 0x0},
+ {BQ25980_FLAG4, 0x0},
+ {BQ25980_FLAG5, 0x0},
+ {BQ25980_MASK1, 0x0},
+ {BQ25980_MASK2, 0x0},
+ {BQ25980_MASK3, 0x0},
+ {BQ25980_MASK4, 0x0},
+ {BQ25980_MASK5, 0x0},
+ {BQ25980_DEVICE_INFO, 0x8},
+ {BQ25980_ADC_CONTROL1, 0x0},
+ {BQ25980_ADC_CONTROL2, 0x0},
+ {BQ25980_IBUS_ADC_LSB, 0x0},
+ {BQ25980_IBUS_ADC_MSB, 0x0},
+ {BQ25980_VBUS_ADC_LSB, 0x0},
+ {BQ25980_VBUS_ADC_MSB, 0x0},
+ {BQ25980_VAC1_ADC_LSB, 0x0},
+ {BQ25980_VAC2_ADC_LSB, 0x0},
+ {BQ25980_VOUT_ADC_LSB, 0x0},
+ {BQ25980_VBAT_ADC_LSB, 0x0},
+ {BQ25980_IBAT_ADC_MSB, 0x0},
+ {BQ25980_IBAT_ADC_LSB, 0x0},
+ {BQ25980_TSBUS_ADC_LSB, 0x0},
+ {BQ25980_TSBAT_ADC_LSB, 0x0},
+ {BQ25980_TDIE_ADC_LSB, 0x0},
+ {BQ25980_DEGLITCH_TIME, 0x0},
+ {BQ25980_CHRGR_CTRL_6, 0x0},
+};
+
+static struct reg_default bq25975_reg_defs[] = {
+ {BQ25980_BATOVP, 0x5A},
+ {BQ25980_BATOVP_ALM, 0x46},
+ {BQ25980_BATOCP, 0x51},
+ {BQ25980_BATOCP_ALM, 0x50},
+ {BQ25980_BATUCP_ALM, 0x28},
+ {BQ25980_CHRGR_CTRL_1, 0x0},
+ {BQ25980_BUSOVP, 0x26},
+ {BQ25980_BUSOVP_ALM, 0x22},
+ {BQ25980_BUSOCP, 0xD},
+ {BQ25980_BUSOCP_ALM, 0xC},
+ {BQ25980_TEMP_CONTROL, 0x30},
+ {BQ25980_TDIE_ALM, 0xC8},
+ {BQ25980_TSBUS_FLT, 0x15},
+ {BQ25980_TSBAT_FLG, 0x15},
+ {BQ25980_VAC_CONTROL, 0x0},
+ {BQ25980_CHRGR_CTRL_2, 0x0},
+ {BQ25980_CHRGR_CTRL_3, 0x20},
+ {BQ25980_CHRGR_CTRL_4, 0x1D},
+ {BQ25980_CHRGR_CTRL_5, 0x18},
+ {BQ25980_STAT1, 0x0},
+ {BQ25980_STAT2, 0x0},
+ {BQ25980_STAT3, 0x0},
+ {BQ25980_STAT4, 0x0},
+ {BQ25980_STAT5, 0x0},
+ {BQ25980_FLAG1, 0x0},
+ {BQ25980_FLAG2, 0x0},
+ {BQ25980_FLAG3, 0x0},
+ {BQ25980_FLAG4, 0x0},
+ {BQ25980_FLAG5, 0x0},
+ {BQ25980_MASK1, 0x0},
+ {BQ25980_MASK2, 0x0},
+ {BQ25980_MASK3, 0x0},
+ {BQ25980_MASK4, 0x0},
+ {BQ25980_MASK5, 0x0},
+ {BQ25980_DEVICE_INFO, 0x8},
+ {BQ25980_ADC_CONTROL1, 0x0},
+ {BQ25980_ADC_CONTROL2, 0x0},
+ {BQ25980_IBUS_ADC_LSB, 0x0},
+ {BQ25980_IBUS_ADC_MSB, 0x0},
+ {BQ25980_VBUS_ADC_LSB, 0x0},
+ {BQ25980_VBUS_ADC_MSB, 0x0},
+ {BQ25980_VAC1_ADC_LSB, 0x0},
+ {BQ25980_VAC2_ADC_LSB, 0x0},
+ {BQ25980_VOUT_ADC_LSB, 0x0},
+ {BQ25980_VBAT_ADC_LSB, 0x0},
+ {BQ25980_IBAT_ADC_MSB, 0x0},
+ {BQ25980_IBAT_ADC_LSB, 0x0},
+ {BQ25980_TSBUS_ADC_LSB, 0x0},
+ {BQ25980_TSBAT_ADC_LSB, 0x0},
+ {BQ25980_TDIE_ADC_LSB, 0x0},
+ {BQ25980_DEGLITCH_TIME, 0x0},
+ {BQ25980_CHRGR_CTRL_6, 0x0},
+};
+
+static struct reg_default bq25960_reg_defs[] = {
+ {BQ25980_BATOVP, 0x5A},
+ {BQ25980_BATOVP_ALM, 0x46},
+ {BQ25980_BATOCP, 0x51},
+ {BQ25980_BATOCP_ALM, 0x50},
+ {BQ25980_BATUCP_ALM, 0x28},
+ {BQ25980_CHRGR_CTRL_1, 0x0},
+ {BQ25980_BUSOVP, 0x26},
+ {BQ25980_BUSOVP_ALM, 0x22},
+ {BQ25980_BUSOCP, 0xD},
+ {BQ25980_BUSOCP_ALM, 0xC},
+ {BQ25980_TEMP_CONTROL, 0x30},
+ {BQ25980_TDIE_ALM, 0xC8},
+ {BQ25980_TSBUS_FLT, 0x15},
+ {BQ25980_TSBAT_FLG, 0x15},
+ {BQ25980_VAC_CONTROL, 0x0},
+ {BQ25980_CHRGR_CTRL_2, 0x0},
+ {BQ25980_CHRGR_CTRL_3, 0x20},
+ {BQ25980_CHRGR_CTRL_4, 0x1D},
+ {BQ25980_CHRGR_CTRL_5, 0x18},
+ {BQ25980_STAT1, 0x0},
+ {BQ25980_STAT2, 0x0},
+ {BQ25980_STAT3, 0x0},
+ {BQ25980_STAT4, 0x0},
+ {BQ25980_STAT5, 0x0},
+ {BQ25980_FLAG1, 0x0},
+ {BQ25980_FLAG2, 0x0},
+ {BQ25980_FLAG3, 0x0},
+ {BQ25980_FLAG4, 0x0},
+ {BQ25980_FLAG5, 0x0},
+ {BQ25980_MASK1, 0x0},
+ {BQ25980_MASK2, 0x0},
+ {BQ25980_MASK3, 0x0},
+ {BQ25980_MASK4, 0x0},
+ {BQ25980_MASK5, 0x0},
+ {BQ25980_DEVICE_INFO, 0x8},
+ {BQ25980_ADC_CONTROL1, 0x0},
+ {BQ25980_ADC_CONTROL2, 0x0},
+ {BQ25980_IBUS_ADC_LSB, 0x0},
+ {BQ25980_IBUS_ADC_MSB, 0x0},
+ {BQ25980_VBUS_ADC_LSB, 0x0},
+ {BQ25980_VBUS_ADC_MSB, 0x0},
+ {BQ25980_VAC1_ADC_LSB, 0x0},
+ {BQ25980_VAC2_ADC_LSB, 0x0},
+ {BQ25980_VOUT_ADC_LSB, 0x0},
+ {BQ25980_VBAT_ADC_LSB, 0x0},
+ {BQ25980_IBAT_ADC_MSB, 0x0},
+ {BQ25980_IBAT_ADC_LSB, 0x0},
+ {BQ25980_TSBUS_ADC_LSB, 0x0},
+ {BQ25980_TSBAT_ADC_LSB, 0x0},
+ {BQ25980_TDIE_ADC_LSB, 0x0},
+ {BQ25980_DEGLITCH_TIME, 0x0},
+ {BQ25980_CHRGR_CTRL_6, 0x0},
+};
+
+static int bq25980_watchdog_time[BQ25980_NUM_WD_VAL] = {5000, 10000, 50000,
+ 300000};
+
+static int bq25980_get_input_curr_lim(struct bq25980_device *bq)
+{
+ unsigned int busocp_reg_code;
+ int ret;
+
+ ret = regmap_read(bq->regmap, BQ25980_BUSOCP, &busocp_reg_code);
+ if (ret)
+ return ret;
+
+ return (busocp_reg_code * BQ25980_BUSOCP_STEP_uA) + BQ25980_BUSOCP_OFFSET_uA;
+}
+
+static int bq25980_set_hiz(struct bq25980_device *bq, int setting)
+{
+ return regmap_update_bits(bq->regmap, BQ25980_CHRGR_CTRL_2,
+ BQ25980_EN_HIZ, setting);
+}
+
+static int bq25980_set_input_curr_lim(struct bq25980_device *bq, int busocp)
+{
+ unsigned int busocp_reg_code;
+ int ret;
+
+ if (!busocp)
+ return bq25980_set_hiz(bq, BQ25980_ENABLE_HIZ);
+
+ bq25980_set_hiz(bq, BQ25980_DISABLE_HIZ);
+
+ if (busocp < BQ25980_BUSOCP_MIN_uA)
+ busocp = BQ25980_BUSOCP_MIN_uA;
+
+ if (bq->state.bypass)
+ busocp = min(busocp, bq->chip_info->busocp_sc_max);
+ else
+ busocp = min(busocp, bq->chip_info->busocp_byp_max);
+
+ busocp_reg_code = (busocp - BQ25980_BUSOCP_OFFSET_uA)
+ / BQ25980_BUSOCP_STEP_uA;
+
+ ret = regmap_write(bq->regmap, BQ25980_BUSOCP, busocp_reg_code);
+ if (ret)
+ return ret;
+
+ return regmap_write(bq->regmap, BQ25980_BUSOCP_ALM, busocp_reg_code);
+}
+
+static int bq25980_get_input_volt_lim(struct bq25980_device *bq)
+{
+ unsigned int busovp_reg_code;
+ unsigned int busovp_offset;
+ unsigned int busovp_step;
+ int ret;
+
+ if (bq->state.bypass) {
+ busovp_step = bq->chip_info->busovp_byp_step;
+ busovp_offset = bq->chip_info->busovp_byp_offset;
+ } else {
+ busovp_step = bq->chip_info->busovp_sc_step;
+ busovp_offset = bq->chip_info->busovp_sc_offset;
+ }
+
+ ret = regmap_read(bq->regmap, BQ25980_BUSOVP, &busovp_reg_code);
+ if (ret)
+ return ret;
+
+ return (busovp_reg_code * busovp_step) + busovp_offset;
+}
+
+static int bq25980_set_input_volt_lim(struct bq25980_device *bq, int busovp)
+{
+ unsigned int busovp_reg_code;
+ unsigned int busovp_step;
+ unsigned int busovp_offset;
+ int ret;
+
+ if (bq->state.bypass) {
+ busovp_step = bq->chip_info->busovp_byp_step;
+ busovp_offset = bq->chip_info->busovp_byp_offset;
+ if (busovp > bq->chip_info->busovp_byp_max)
+ busovp = bq->chip_info->busovp_byp_max;
+ else if (busovp < bq->chip_info->busovp_byp_min)
+ busovp = bq->chip_info->busovp_byp_min;
+ } else {
+ busovp_step = bq->chip_info->busovp_sc_step;
+ busovp_offset = bq->chip_info->busovp_sc_offset;
+ if (busovp > bq->chip_info->busovp_sc_max)
+ busovp = bq->chip_info->busovp_sc_max;
+ else if (busovp < bq->chip_info->busovp_sc_min)
+ busovp = bq->chip_info->busovp_sc_min;
+ }
+
+ busovp_reg_code = (busovp - busovp_offset) / busovp_step;
+
+ ret = regmap_write(bq->regmap, BQ25980_BUSOVP, busovp_reg_code);
+ if (ret)
+ return ret;
+
+ return regmap_write(bq->regmap, BQ25980_BUSOVP_ALM, busovp_reg_code);
+}
+
+static int bq25980_get_const_charge_curr(struct bq25980_device *bq)
+{
+ unsigned int batocp_reg_code;
+ int ret;
+
+ ret = regmap_read(bq->regmap, BQ25980_BATOCP, &batocp_reg_code);
+ if (ret)
+ return ret;
+
+ return (batocp_reg_code & BQ25980_BATOCP_MASK) *
+ BQ25980_BATOCP_STEP_uA;
+}
+
+static int bq25980_set_const_charge_curr(struct bq25980_device *bq, int batocp)
+{
+ unsigned int batocp_reg_code;
+ int ret;
+
+ batocp = max(batocp, BQ25980_BATOCP_MIN_uA);
+ batocp = min(batocp, bq->chip_info->batocp_max);
+
+ batocp_reg_code = batocp / BQ25980_BATOCP_STEP_uA;
+
+ ret = regmap_update_bits(bq->regmap, BQ25980_BATOCP,
+ BQ25980_BATOCP_MASK, batocp_reg_code);
+ if (ret)
+ return ret;
+
+ return regmap_update_bits(bq->regmap, BQ25980_BATOCP_ALM,
+ BQ25980_BATOCP_MASK, batocp_reg_code);
+}
+
+static int bq25980_get_const_charge_volt(struct bq25980_device *bq)
+{
+ unsigned int batovp_reg_code;
+ int ret;
+
+ ret = regmap_read(bq->regmap, BQ25980_BATOVP, &batovp_reg_code);
+ if (ret)
+ return ret;
+
+ return ((batovp_reg_code * bq->chip_info->batovp_step) +
+ bq->chip_info->batovp_offset);
+}
+
+static int bq25980_set_const_charge_volt(struct bq25980_device *bq, int batovp)
+{
+ unsigned int batovp_reg_code;
+ int ret;
+
+ if (batovp < bq->chip_info->batovp_min)
+ batovp = bq->chip_info->batovp_min;
+
+ if (batovp > bq->chip_info->batovp_max)
+ batovp = bq->chip_info->batovp_max;
+
+ batovp_reg_code = (batovp - bq->chip_info->batovp_offset) /
+ bq->chip_info->batovp_step;
+
+ ret = regmap_write(bq->regmap, BQ25980_BATOVP, batovp_reg_code);
+ if (ret)
+ return ret;
+
+ return regmap_write(bq->regmap, BQ25980_BATOVP_ALM, batovp_reg_code);
+}
+
+static int bq25980_set_bypass(struct bq25980_device *bq, bool en_bypass)
+{
+ int ret;
+
+ if (en_bypass)
+ ret = regmap_update_bits(bq->regmap, BQ25980_CHRGR_CTRL_2,
+ BQ25980_EN_BYPASS, BQ25980_EN_BYPASS);
+ else
+ ret = regmap_update_bits(bq->regmap, BQ25980_CHRGR_CTRL_2,
+ BQ25980_EN_BYPASS, en_bypass);
+ if (ret)
+ return ret;
+
+ bq->state.bypass = en_bypass;
+
+ return bq->state.bypass;
+}
+
+static int bq25980_set_chg_en(struct bq25980_device *bq, bool en_chg)
+{
+ int ret;
+
+ if (en_chg)
+ ret = regmap_update_bits(bq->regmap, BQ25980_CHRGR_CTRL_2,
+ BQ25980_CHG_EN, BQ25980_CHG_EN);
+ else
+ ret = regmap_update_bits(bq->regmap, BQ25980_CHRGR_CTRL_2,
+ BQ25980_CHG_EN, en_chg);
+ if (ret)
+ return ret;
+
+ bq->state.ce = en_chg;
+
+ return 0;
+}
+
+static int bq25980_get_adc_ibus(struct bq25980_device *bq)
+{
+ int ibus_adc_lsb, ibus_adc_msb;
+ u16 ibus_adc;
+ int ret;
+
+ ret = regmap_read(bq->regmap, BQ25980_IBUS_ADC_MSB, &ibus_adc_msb);
+ if (ret)
+ return ret;
+
+ ret = regmap_read(bq->regmap, BQ25980_IBUS_ADC_LSB, &ibus_adc_lsb);
+ if (ret)
+ return ret;
+
+ ibus_adc = (ibus_adc_msb << 8) | ibus_adc_lsb;
+
+ if (ibus_adc_msb & BQ25980_ADC_POLARITY_BIT)
+ return ((ibus_adc ^ 0xffff) + 1) * BQ25980_ADC_CURR_STEP_uA;
+
+ return ibus_adc * BQ25980_ADC_CURR_STEP_uA;
+}
+
+static int bq25980_get_adc_vbus(struct bq25980_device *bq)
+{
+ int vbus_adc_lsb, vbus_adc_msb;
+ u16 vbus_adc;
+ int ret;
+
+ ret = regmap_read(bq->regmap, BQ25980_VBUS_ADC_MSB, &vbus_adc_msb);
+ if (ret)
+ return ret;
+
+ ret = regmap_read(bq->regmap, BQ25980_VBUS_ADC_LSB, &vbus_adc_lsb);
+ if (ret)
+ return ret;
+
+ vbus_adc = (vbus_adc_msb << 8) | vbus_adc_lsb;
+
+ return vbus_adc * BQ25980_ADC_VOLT_STEP_uV;
+}
+
+static int bq25980_get_ibat_adc(struct bq25980_device *bq)
+{
+ int ret;
+ int ibat_adc_lsb, ibat_adc_msb;
+ int ibat_adc;
+
+ ret = regmap_read(bq->regmap, BQ25980_IBAT_ADC_MSB, &ibat_adc_msb);
+ if (ret)
+ return ret;
+
+ ret = regmap_read(bq->regmap, BQ25980_IBAT_ADC_LSB, &ibat_adc_lsb);
+ if (ret)
+ return ret;
+
+ ibat_adc = (ibat_adc_msb << 8) | ibat_adc_lsb;
+
+ if (ibat_adc_msb & BQ25980_ADC_POLARITY_BIT)
+ return ((ibat_adc ^ 0xffff) + 1) * BQ25980_ADC_CURR_STEP_uA;
+
+ return ibat_adc * BQ25980_ADC_CURR_STEP_uA;
+}
+
+static int bq25980_get_adc_vbat(struct bq25980_device *bq)
+{
+ int vsys_adc_lsb, vsys_adc_msb;
+ u16 vsys_adc;
+ int ret;
+
+ ret = regmap_read(bq->regmap, BQ25980_VBAT_ADC_MSB, &vsys_adc_msb);
+ if (ret)
+ return ret;
+
+ ret = regmap_read(bq->regmap, BQ25980_VBAT_ADC_LSB, &vsys_adc_lsb);
+ if (ret)
+ return ret;
+
+ vsys_adc = (vsys_adc_msb << 8) | vsys_adc_lsb;
+
+ return vsys_adc * BQ25980_ADC_VOLT_STEP_uV;
+}
+
+static int bq25980_get_state(struct bq25980_device *bq,
+ struct bq25980_state *state)
+{
+ unsigned int chg_ctrl_2;
+ unsigned int stat1;
+ unsigned int stat2;
+ unsigned int stat3;
+ unsigned int stat4;
+ unsigned int ibat_adc_msb;
+ int ret;
+
+ ret = regmap_read(bq->regmap, BQ25980_STAT1, &stat1);
+ if (ret)
+ return ret;
+
+ ret = regmap_read(bq->regmap, BQ25980_STAT2, &stat2);
+ if (ret)
+ return ret;
+
+ ret = regmap_read(bq->regmap, BQ25980_STAT3, &stat3);
+ if (ret)
+ return ret;
+
+ ret = regmap_read(bq->regmap, BQ25980_STAT4, &stat4);
+ if (ret)
+ return ret;
+
+ ret = regmap_read(bq->regmap, BQ25980_CHRGR_CTRL_2, &chg_ctrl_2);
+ if (ret)
+ return ret;
+
+ ret = regmap_read(bq->regmap, BQ25980_IBAT_ADC_MSB, &ibat_adc_msb);
+ if (ret)
+ return ret;
+
+ state->dischg = ibat_adc_msb & BQ25980_ADC_POLARITY_BIT;
+ state->ovp = (stat1 & BQ25980_STAT1_OVP_MASK) |
+ (stat3 & BQ25980_STAT3_OVP_MASK);
+ state->ocp = (stat1 & BQ25980_STAT1_OCP_MASK) |
+ (stat2 & BQ25980_STAT2_OCP_MASK);
+ state->tflt = stat4 & BQ25980_STAT4_TFLT_MASK;
+ state->wdt = stat4 & BQ25980_WD_STAT;
+ state->online = stat3 & BQ25980_PRESENT_MASK;
+ state->ce = chg_ctrl_2 & BQ25980_CHG_EN;
+ state->hiz = chg_ctrl_2 & BQ25980_EN_HIZ;
+ state->bypass = chg_ctrl_2 & BQ25980_EN_BYPASS;
+
+ return 0;
+}
+
+static int bq25980_set_battery_property(struct power_supply *psy,
+ enum power_supply_property psp,
+ const union power_supply_propval *val)
+{
+ struct bq25980_device *bq = power_supply_get_drvdata(psy);
+ int ret = 0;
+
+ switch (psp) {
+ case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT:
+ ret = bq25980_set_const_charge_curr(bq, val->intval);
+ if (ret)
+ return ret;
+ break;
+
+ case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE:
+ ret = bq25980_set_const_charge_volt(bq, val->intval);
+ if (ret)
+ return ret;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+static int bq25980_get_battery_property(struct power_supply *psy,
+ enum power_supply_property psp,
+ union power_supply_propval *val)
+{
+ struct bq25980_device *bq = power_supply_get_drvdata(psy);
+ int ret = 0;
+
+ switch (psp) {
+ case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX:
+ val->intval = bq->init_data.ichg_max;
+ break;
+
+ case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE_MAX:
+ val->intval = bq->init_data.vreg_max;
+ break;
+
+ case POWER_SUPPLY_PROP_CURRENT_NOW:
+ ret = bq25980_get_ibat_adc(bq);
+ val->intval = ret;
+ break;
+
+ case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+ ret = bq25980_get_adc_vbat(bq);
+ if (ret < 0)
+ return ret;
+
+ val->intval = ret;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+static int bq25980_set_charger_property(struct power_supply *psy,
+ enum power_supply_property prop,
+ const union power_supply_propval *val)
+{
+ struct bq25980_device *bq = power_supply_get_drvdata(psy);
+ int ret = -EINVAL;
+
+ switch (prop) {
+ case POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT:
+ ret = bq25980_set_input_curr_lim(bq, val->intval);
+ if (ret)
+ return ret;
+ break;
+
+ case POWER_SUPPLY_PROP_INPUT_VOLTAGE_LIMIT:
+ ret = bq25980_set_input_volt_lim(bq, val->intval);
+ if (ret)
+ return ret;
+ break;
+
+ case POWER_SUPPLY_PROP_CHARGE_TYPE:
+ ret = bq25980_set_bypass(bq, val->intval);
+ if (ret)
+ return ret;
+ break;
+
+ case POWER_SUPPLY_PROP_STATUS:
+ ret = bq25980_set_chg_en(bq, val->intval);
+ if (ret)
+ return ret;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+static int bq25980_get_charger_property(struct power_supply *psy,
+ enum power_supply_property psp,
+ union power_supply_propval *val)
+{
+ struct bq25980_device *bq = power_supply_get_drvdata(psy);
+ struct bq25980_state state;
+ int ret = 0;
+
+ mutex_lock(&bq->lock);
+ ret = bq25980_get_state(bq, &state);
+ mutex_unlock(&bq->lock);
+ if (ret)
+ return ret;
+
+ switch (psp) {
+ case POWER_SUPPLY_PROP_MANUFACTURER:
+ val->strval = BQ25980_MANUFACTURER;
+ break;
+ case POWER_SUPPLY_PROP_MODEL_NAME:
+ val->strval = bq->model_name;
+ break;
+ case POWER_SUPPLY_PROP_ONLINE:
+ val->intval = state.online;
+ break;
+
+ case POWER_SUPPLY_PROP_INPUT_VOLTAGE_LIMIT:
+ ret = bq25980_get_input_volt_lim(bq);
+ if (ret < 0)
+ return ret;
+ val->intval = ret;
+ break;
+
+ case POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT:
+ ret = bq25980_get_input_curr_lim(bq);
+ if (ret < 0)
+ return ret;
+
+ val->intval = ret;
+ break;
+
+ case POWER_SUPPLY_PROP_HEALTH:
+ val->intval = POWER_SUPPLY_HEALTH_GOOD;
+
+ if (state.tflt)
+ val->intval = POWER_SUPPLY_HEALTH_OVERHEAT;
+ else if (state.ovp)
+ val->intval = POWER_SUPPLY_HEALTH_OVERVOLTAGE;
+ else if (state.ocp)
+ val->intval = POWER_SUPPLY_HEALTH_OVERCURRENT;
+ else if (state.wdt)
+ val->intval =
+ POWER_SUPPLY_HEALTH_WATCHDOG_TIMER_EXPIRE;
+ break;
+
+ case POWER_SUPPLY_PROP_STATUS:
+ val->intval = POWER_SUPPLY_STATUS_UNKNOWN;
+
+ if ((state.ce) && (!state.hiz))
+ val->intval = POWER_SUPPLY_STATUS_CHARGING;
+ else if (state.dischg)
+ val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
+ else if (!state.ce)
+ val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
+ break;
+
+ case POWER_SUPPLY_PROP_CHARGE_TYPE:
+ val->intval = POWER_SUPPLY_CHARGE_TYPE_UNKNOWN;
+
+ if (!state.ce)
+ val->intval = POWER_SUPPLY_CHARGE_TYPE_NONE;
+ else if (state.bypass)
+ val->intval = POWER_SUPPLY_CHARGE_TYPE_FAST;
+ else if (!state.bypass)
+ val->intval = POWER_SUPPLY_CHARGE_TYPE_STANDARD;
+ break;
+
+ case POWER_SUPPLY_PROP_CURRENT_NOW:
+ ret = bq25980_get_adc_ibus(bq);
+ if (ret < 0)
+ return ret;
+
+ val->intval = ret;
+ break;
+
+ case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+ ret = bq25980_get_adc_vbus(bq);
+ if (ret < 0)
+ return ret;
+
+ val->intval = ret;
+ break;
+
+ case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT:
+ ret = bq25980_get_const_charge_curr(bq);
+ if (ret < 0)
+ return ret;
+
+ val->intval = ret;
+ break;
+
+ case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE:
+ ret = bq25980_get_const_charge_volt(bq);
+ if (ret < 0)
+ return ret;
+
+ val->intval = ret;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+static bool bq25980_state_changed(struct bq25980_device *bq,
+ struct bq25980_state *new_state)
+{
+ struct bq25980_state old_state;
+
+ mutex_lock(&bq->lock);
+ old_state = bq->state;
+ mutex_unlock(&bq->lock);
+
+ return (old_state.dischg != new_state->dischg ||
+ old_state.ovp != new_state->ovp ||
+ old_state.ocp != new_state->ocp ||
+ old_state.online != new_state->online ||
+ old_state.wdt != new_state->wdt ||
+ old_state.tflt != new_state->tflt ||
+ old_state.ce != new_state->ce ||
+ old_state.hiz != new_state->hiz ||
+ old_state.bypass != new_state->bypass);
+}
+
+static irqreturn_t bq25980_irq_handler_thread(int irq, void *private)
+{
+ struct bq25980_device *bq = private;
+ struct bq25980_state state;
+ int ret;
+
+ ret = bq25980_get_state(bq, &state);
+ if (ret < 0)
+ goto irq_out;
+
+ if (!bq25980_state_changed(bq, &state))
+ goto irq_out;
+
+ mutex_lock(&bq->lock);
+ bq->state = state;
+ mutex_unlock(&bq->lock);
+
+ power_supply_changed(bq->charger);
+
+irq_out:
+ return IRQ_HANDLED;
+}
+
+static enum power_supply_property bq25980_power_supply_props[] = {
+ POWER_SUPPLY_PROP_MANUFACTURER,
+ POWER_SUPPLY_PROP_MODEL_NAME,
+ POWER_SUPPLY_PROP_STATUS,
+ POWER_SUPPLY_PROP_ONLINE,
+ POWER_SUPPLY_PROP_HEALTH,
+ POWER_SUPPLY_PROP_INPUT_VOLTAGE_LIMIT,
+ POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT,
+ POWER_SUPPLY_PROP_CHARGE_TYPE,
+ POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT,
+ POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE,
+ POWER_SUPPLY_PROP_CURRENT_NOW,
+ POWER_SUPPLY_PROP_VOLTAGE_NOW,
+};
+
+static enum power_supply_property bq25980_battery_props[] = {
+ POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX,
+ POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE_MAX,
+ POWER_SUPPLY_PROP_CURRENT_NOW,
+ POWER_SUPPLY_PROP_VOLTAGE_NOW,
+};
+
+static char *bq25980_charger_supplied_to[] = {
+ "main-battery",
+};
+
+static int bq25980_property_is_writeable(struct power_supply *psy,
+ enum power_supply_property prop)
+{
+ switch (prop) {
+ case POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT:
+ case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE:
+ case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT:
+ case POWER_SUPPLY_PROP_CHARGE_TYPE:
+ case POWER_SUPPLY_PROP_STATUS:
+ case POWER_SUPPLY_PROP_INPUT_VOLTAGE_LIMIT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static const struct power_supply_desc bq25980_power_supply_desc = {
+ .name = "bq25980-charger",
+ .type = POWER_SUPPLY_TYPE_MAINS,
+ .properties = bq25980_power_supply_props,
+ .num_properties = ARRAY_SIZE(bq25980_power_supply_props),
+ .get_property = bq25980_get_charger_property,
+ .set_property = bq25980_set_charger_property,
+ .property_is_writeable = bq25980_property_is_writeable,
+};
+
+static struct power_supply_desc bq25980_battery_desc = {
+ .name = "bq25980-battery",
+ .type = POWER_SUPPLY_TYPE_BATTERY,
+ .get_property = bq25980_get_battery_property,
+ .set_property = bq25980_set_battery_property,
+ .properties = bq25980_battery_props,
+ .num_properties = ARRAY_SIZE(bq25980_battery_props),
+ .property_is_writeable = bq25980_property_is_writeable,
+};
+
+
+static bool bq25980_is_volatile_reg(struct device *dev, unsigned int reg)
+{
+ switch (reg) {
+ case BQ25980_CHRGR_CTRL_2:
+ case BQ25980_STAT1...BQ25980_FLAG5:
+ case BQ25980_ADC_CONTROL1...BQ25980_TDIE_ADC_LSB:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static const struct regmap_config bq25980_regmap_config = {
+ .reg_bits = 8,
+ .val_bits = 8,
+
+ .max_register = BQ25980_CHRGR_CTRL_6,
+ .reg_defaults = bq25980_reg_defs,
+ .num_reg_defaults = ARRAY_SIZE(bq25980_reg_defs),
+ .cache_type = REGCACHE_RBTREE,
+ .volatile_reg = bq25980_is_volatile_reg,
+};
+
+static const struct regmap_config bq25975_regmap_config = {
+ .reg_bits = 8,
+ .val_bits = 8,
+
+ .max_register = BQ25980_CHRGR_CTRL_6,
+ .reg_defaults = bq25975_reg_defs,
+ .num_reg_defaults = ARRAY_SIZE(bq25975_reg_defs),
+ .cache_type = REGCACHE_RBTREE,
+ .volatile_reg = bq25980_is_volatile_reg,
+};
+
+static const struct regmap_config bq25960_regmap_config = {
+ .reg_bits = 8,
+ .val_bits = 8,
+
+ .max_register = BQ25980_CHRGR_CTRL_6,
+ .reg_defaults = bq25960_reg_defs,
+ .num_reg_defaults = ARRAY_SIZE(bq25960_reg_defs),
+ .cache_type = REGCACHE_RBTREE,
+ .volatile_reg = bq25980_is_volatile_reg,
+};
+
+static const struct bq25980_chip_info bq25980_chip_info_tbl[] = {
+ [BQ25980] = {
+ .model_id = BQ25980,
+ .regmap_config = &bq25980_regmap_config,
+
+ .busocp_def = BQ25980_BUSOCP_DFLT_uA,
+ .busocp_sc_min = BQ25960_BUSOCP_SC_MAX_uA,
+ .busocp_sc_max = BQ25980_BUSOCP_SC_MAX_uA,
+ .busocp_byp_max = BQ25980_BUSOCP_BYP_MAX_uA,
+ .busocp_byp_min = BQ25980_BUSOCP_MIN_uA,
+
+ .busovp_sc_def = BQ25980_BUSOVP_DFLT_uV,
+ .busovp_byp_def = BQ25980_BUSOVP_BYPASS_DFLT_uV,
+ .busovp_sc_step = BQ25980_BUSOVP_SC_STEP_uV,
+ .busovp_sc_offset = BQ25980_BUSOVP_SC_OFFSET_uV,
+ .busovp_byp_step = BQ25980_BUSOVP_BYP_STEP_uV,
+ .busovp_byp_offset = BQ25980_BUSOVP_BYP_OFFSET_uV,
+ .busovp_sc_min = BQ25980_BUSOVP_SC_MIN_uV,
+ .busovp_sc_max = BQ25980_BUSOVP_SC_MAX_uV,
+ .busovp_byp_min = BQ25980_BUSOVP_BYP_MIN_uV,
+ .busovp_byp_max = BQ25980_BUSOVP_BYP_MAX_uV,
+
+ .batovp_def = BQ25980_BATOVP_DFLT_uV,
+ .batovp_max = BQ25980_BATOVP_MAX_uV,
+ .batovp_min = BQ25980_BATOVP_MIN_uV,
+ .batovp_step = BQ25980_BATOVP_STEP_uV,
+ .batovp_offset = BQ25980_BATOVP_OFFSET_uV,
+
+ .batocp_def = BQ25980_BATOCP_DFLT_uA,
+ .batocp_max = BQ25980_BATOCP_MAX_uA,
+ },
+
+ [BQ25975] = {
+ .model_id = BQ25975,
+ .regmap_config = &bq25975_regmap_config,
+
+ .busocp_def = BQ25975_BUSOCP_DFLT_uA,
+ .busocp_sc_min = BQ25975_BUSOCP_SC_MAX_uA,
+ .busocp_sc_max = BQ25975_BUSOCP_SC_MAX_uA,
+ .busocp_byp_min = BQ25980_BUSOCP_MIN_uA,
+ .busocp_byp_max = BQ25975_BUSOCP_BYP_MAX_uA,
+
+ .busovp_sc_def = BQ25975_BUSOVP_DFLT_uV,
+ .busovp_byp_def = BQ25975_BUSOVP_BYPASS_DFLT_uV,
+ .busovp_sc_step = BQ25975_BUSOVP_SC_STEP_uV,
+ .busovp_sc_offset = BQ25975_BUSOVP_SC_OFFSET_uV,
+ .busovp_byp_step = BQ25975_BUSOVP_BYP_STEP_uV,
+ .busovp_byp_offset = BQ25975_BUSOVP_BYP_OFFSET_uV,
+ .busovp_sc_min = BQ25975_BUSOVP_SC_MIN_uV,
+ .busovp_sc_max = BQ25975_BUSOVP_SC_MAX_uV,
+ .busovp_byp_min = BQ25975_BUSOVP_BYP_MIN_uV,
+ .busovp_byp_max = BQ25975_BUSOVP_BYP_MAX_uV,
+
+ .batovp_def = BQ25975_BATOVP_DFLT_uV,
+ .batovp_max = BQ25975_BATOVP_MAX_uV,
+ .batovp_min = BQ25975_BATOVP_MIN_uV,
+ .batovp_step = BQ25975_BATOVP_STEP_uV,
+ .batovp_offset = BQ25975_BATOVP_OFFSET_uV,
+
+ .batocp_def = BQ25980_BATOCP_DFLT_uA,
+ .batocp_max = BQ25980_BATOCP_MAX_uA,
+ },
+
+ [BQ25960] = {
+ .model_id = BQ25960,
+ .regmap_config = &bq25960_regmap_config,
+
+ .busocp_def = BQ25960_BUSOCP_DFLT_uA,
+ .busocp_sc_min = BQ25960_BUSOCP_SC_MAX_uA,
+ .busocp_sc_max = BQ25960_BUSOCP_SC_MAX_uA,
+ .busocp_byp_min = BQ25960_BUSOCP_SC_MAX_uA,
+ .busocp_byp_max = BQ25960_BUSOCP_BYP_MAX_uA,
+
+ .busovp_sc_def = BQ25975_BUSOVP_DFLT_uV,
+ .busovp_byp_def = BQ25975_BUSOVP_BYPASS_DFLT_uV,
+ .busovp_sc_step = BQ25960_BUSOVP_SC_STEP_uV,
+ .busovp_sc_offset = BQ25960_BUSOVP_SC_OFFSET_uV,
+ .busovp_byp_step = BQ25960_BUSOVP_BYP_STEP_uV,
+ .busovp_byp_offset = BQ25960_BUSOVP_BYP_OFFSET_uV,
+ .busovp_sc_min = BQ25960_BUSOVP_SC_MIN_uV,
+ .busovp_sc_max = BQ25960_BUSOVP_SC_MAX_uV,
+ .busovp_byp_min = BQ25960_BUSOVP_BYP_MIN_uV,
+ .busovp_byp_max = BQ25960_BUSOVP_BYP_MAX_uV,
+
+ .batovp_def = BQ25960_BATOVP_DFLT_uV,
+ .batovp_max = BQ25960_BATOVP_MAX_uV,
+ .batovp_min = BQ25960_BATOVP_MIN_uV,
+ .batovp_step = BQ25960_BATOVP_STEP_uV,
+ .batovp_offset = BQ25960_BATOVP_OFFSET_uV,
+
+ .batocp_def = BQ25960_BATOCP_DFLT_uA,
+ .batocp_max = BQ25960_BATOCP_MAX_uA,
+ },
+};
+
+static int bq25980_power_supply_init(struct bq25980_device *bq,
+ struct device *dev)
+{
+ struct power_supply_config psy_cfg = { .drv_data = bq,
+ .of_node = dev->of_node, };
+
+ psy_cfg.supplied_to = bq25980_charger_supplied_to;
+ psy_cfg.num_supplicants = ARRAY_SIZE(bq25980_charger_supplied_to);
+
+ bq->charger = devm_power_supply_register(bq->dev,
+ &bq25980_power_supply_desc,
+ &psy_cfg);
+ if (IS_ERR(bq->charger))
+ return -EINVAL;
+
+ bq->battery = devm_power_supply_register(bq->dev,
+ &bq25980_battery_desc,
+ &psy_cfg);
+ if (IS_ERR(bq->battery))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int bq25980_hw_init(struct bq25980_device *bq)
+{
+ struct power_supply_battery_info bat_info = { };
+ int wd_reg_val = BQ25980_WATCHDOG_DIS;
+ int wd_max_val = BQ25980_NUM_WD_VAL - 1;
+ int ret = 0;
+ int curr_val;
+ int volt_val;
+ int i;
+
+ if (bq->watchdog_timer) {
+ if (bq->watchdog_timer >= bq25980_watchdog_time[wd_max_val])
+ wd_reg_val = wd_max_val;
+ else {
+ for (i = 0; i < wd_max_val; i++) {
+ if (bq->watchdog_timer > bq25980_watchdog_time[i] &&
+ bq->watchdog_timer < bq25980_watchdog_time[i + 1]) {
+ wd_reg_val = i;
+ break;
+ }
+ }
+ }
+ }
+
+ ret = regmap_update_bits(bq->regmap, BQ25980_CHRGR_CTRL_3,
+ BQ25980_WATCHDOG_MASK, wd_reg_val);
+ if (ret)
+ return ret;
+
+ ret = power_supply_get_battery_info(bq->charger, &bat_info);
+ if (ret) {
+ dev_warn(bq->dev, "battery info missing\n");
+ return -EINVAL;
+ }
+
+ bq->init_data.ichg_max = bat_info.constant_charge_current_max_ua;
+ bq->init_data.vreg_max = bat_info.constant_charge_voltage_max_uv;
+
+ if (bq->state.bypass) {
+ ret = regmap_update_bits(bq->regmap, BQ25980_CHRGR_CTRL_2,
+ BQ25980_EN_BYPASS, BQ25980_EN_BYPASS);
+ if (ret)
+ return ret;
+
+ curr_val = bq->init_data.bypass_ilim;
+ volt_val = bq->init_data.bypass_vlim;
+ } else {
+ curr_val = bq->init_data.sc_ilim;
+ volt_val = bq->init_data.sc_vlim;
+ }
+
+ ret = bq25980_set_input_curr_lim(bq, curr_val);
+ if (ret)
+ return ret;
+
+ ret = bq25980_set_input_volt_lim(bq, volt_val);
+ if (ret)
+ return ret;
+
+ return regmap_update_bits(bq->regmap, BQ25980_ADC_CONTROL1,
+ BQ25980_ADC_EN, BQ25980_ADC_EN);
+}
+
+static int bq25980_parse_dt(struct bq25980_device *bq)
+{
+ int ret;
+
+ ret = device_property_read_u32(bq->dev, "ti,watchdog-timeout-ms",
+ &bq->watchdog_timer);
+ if (ret)
+ bq->watchdog_timer = BQ25980_WATCHDOG_MIN;
+
+ if (bq->watchdog_timer > BQ25980_WATCHDOG_MAX ||
+ bq->watchdog_timer < BQ25980_WATCHDOG_MIN)
+ return -EINVAL;
+
+ ret = device_property_read_u32(bq->dev,
+ "ti,sc-ovp-limit-microvolt",
+ &bq->init_data.sc_vlim);
+ if (ret)
+ bq->init_data.sc_vlim = bq->chip_info->busovp_sc_def;
+
+ if (bq->init_data.sc_vlim > bq->chip_info->busovp_sc_max ||
+ bq->init_data.sc_vlim < bq->chip_info->busovp_sc_min) {
+ dev_err(bq->dev, "SC ovp limit is out of range\n");
+ return -EINVAL;
+ }
+
+ ret = device_property_read_u32(bq->dev,
+ "ti,sc-ocp-limit-microamp",
+ &bq->init_data.sc_ilim);
+ if (ret)
+ bq->init_data.sc_ilim = bq->chip_info->busocp_def;
+
+ if (bq->init_data.sc_ilim > bq->chip_info->busocp_sc_max ||
+ bq->init_data.sc_ilim < bq->chip_info->busocp_sc_min) {
+ dev_err(bq->dev, "SC ocp limit is out of range\n");
+ return -EINVAL;
+ }
+
+ ret = device_property_read_u32(bq->dev,
+ "ti,bypass-ovp-limit-microvolt",
+ &bq->init_data.bypass_vlim);
+ if (ret)
+ bq->init_data.bypass_vlim = bq->chip_info->busovp_byp_def;
+
+ if (bq->init_data.bypass_vlim > bq->chip_info->busovp_byp_max ||
+ bq->init_data.bypass_vlim < bq->chip_info->busovp_byp_min) {
+ dev_err(bq->dev, "Bypass ovp limit is out of range\n");
+ return -EINVAL;
+ }
+
+ ret = device_property_read_u32(bq->dev,
+ "ti,bypass-ocp-limit-microamp",
+ &bq->init_data.bypass_ilim);
+ if (ret)
+ bq->init_data.bypass_ilim = bq->chip_info->busocp_def;
+
+ if (bq->init_data.bypass_ilim > bq->chip_info->busocp_byp_max ||
+ bq->init_data.bypass_ilim < bq->chip_info->busocp_byp_min) {
+ dev_err(bq->dev, "Bypass ocp limit is out of range\n");
+ return -EINVAL;
+ }
+
+
+ bq->state.bypass = device_property_read_bool(bq->dev,
+ "ti,bypass-enable");
+ return 0;
+}
+
+static int bq25980_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ struct device *dev = &client->dev;
+ struct bq25980_device *bq;
+ int ret;
+
+ bq = devm_kzalloc(dev, sizeof(*bq), GFP_KERNEL);
+ if (!bq)
+ return -ENOMEM;
+
+ bq->client = client;
+ bq->dev = dev;
+
+ mutex_init(&bq->lock);
+
+ strncpy(bq->model_name, id->name, I2C_NAME_SIZE);
+ bq->chip_info = &bq25980_chip_info_tbl[id->driver_data];
+
+ bq->regmap = devm_regmap_init_i2c(client,
+ bq->chip_info->regmap_config);
+ if (IS_ERR(bq->regmap)) {
+ dev_err(dev, "Failed to allocate register map\n");
+ return PTR_ERR(bq->regmap);
+ }
+
+ i2c_set_clientdata(client, bq);
+
+ ret = bq25980_parse_dt(bq);
+ if (ret) {
+ dev_err(dev, "Failed to read device tree properties%d\n", ret);
+ return ret;
+ }
+
+ if (client->irq) {
+ ret = devm_request_threaded_irq(dev, client->irq, NULL,
+ bq25980_irq_handler_thread,
+ IRQF_TRIGGER_FALLING |
+ IRQF_ONESHOT,
+ dev_name(&client->dev), bq);
+ if (ret)
+ return ret;
+ }
+
+ ret = bq25980_power_supply_init(bq, dev);
+ if (ret) {
+ dev_err(dev, "Failed to register power supply\n");
+ return ret;
+ }
+
+ ret = bq25980_hw_init(bq);
+ if (ret) {
+ dev_err(dev, "Cannot initialize the chip.\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static const struct i2c_device_id bq25980_i2c_ids[] = {
+ { "bq25980", BQ25980 },
+ { "bq25975", BQ25975 },
+ { "bq25975", BQ25975 },
+ {},
+};
+MODULE_DEVICE_TABLE(i2c, bq25980_i2c_ids);
+
+static const struct of_device_id bq25980_of_match[] = {
+ { .compatible = "ti,bq25980", .data = (void *)BQ25980 },
+ { .compatible = "ti,bq25975", .data = (void *)BQ25975 },
+ { .compatible = "ti,bq25960", .data = (void *)BQ25960 },
+ { },
+};
+MODULE_DEVICE_TABLE(of, bq25980_of_match);
+
+static struct i2c_driver bq25980_driver = {
+ .driver = {
+ .name = "bq25980-charger",
+ .of_match_table = bq25980_of_match,
+ },
+ .probe = bq25980_probe,
+ .id_table = bq25980_i2c_ids,
+};
+module_i2c_driver(bq25980_driver);
+
+MODULE_AUTHOR("Dan Murphy <dmurphy@ti.com>");
+MODULE_AUTHOR("Ricardo Rivera-Matos <r-rivera-matos@ti.com>");
+MODULE_DESCRIPTION("bq25980 charger driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/power/supply/bq25980_charger.h b/drivers/power/supply/bq25980_charger.h
new file mode 100644
index 000000000000..39f94eba5f6c
--- /dev/null
+++ b/drivers/power/supply/bq25980_charger.h
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2020 Texas Instruments Incorporated - http://www.ti.com/ */
+
+#ifndef BQ25980_CHARGER_H
+#define BQ25980_CHARGER_H
+
+#define BQ25980_MANUFACTURER "Texas Instruments"
+
+#define BQ25980_BATOVP 0x0
+#define BQ25980_BATOVP_ALM 0x1
+#define BQ25980_BATOCP 0x2
+#define BQ25980_BATOCP_ALM 0x3
+#define BQ25980_BATUCP_ALM 0x4
+#define BQ25980_CHRGR_CTRL_1 0x5
+#define BQ25980_BUSOVP 0x6
+#define BQ25980_BUSOVP_ALM 0x7
+#define BQ25980_BUSOCP 0x8
+#define BQ25980_BUSOCP_ALM 0x9
+#define BQ25980_TEMP_CONTROL 0xA
+#define BQ25980_TDIE_ALM 0xB
+#define BQ25980_TSBUS_FLT 0xC
+#define BQ25980_TSBAT_FLG 0xD
+#define BQ25980_VAC_CONTROL 0xE
+#define BQ25980_CHRGR_CTRL_2 0xF
+#define BQ25980_CHRGR_CTRL_3 0x10
+#define BQ25980_CHRGR_CTRL_4 0x11
+#define BQ25980_CHRGR_CTRL_5 0x12
+#define BQ25980_STAT1 0x13
+#define BQ25980_STAT2 0x14
+#define BQ25980_STAT3 0x15
+#define BQ25980_STAT4 0x16
+#define BQ25980_STAT5 0x17
+#define BQ25980_FLAG1 0x18
+#define BQ25980_FLAG2 0x19
+#define BQ25980_FLAG3 0x1A
+#define BQ25980_FLAG4 0x1B
+#define BQ25980_FLAG5 0x1C
+#define BQ25980_MASK1 0x1D
+#define BQ25980_MASK2 0x1E
+#define BQ25980_MASK3 0x1F
+#define BQ25980_MASK4 0x20
+#define BQ25980_MASK5 0x21
+#define BQ25980_DEVICE_INFO 0x22
+#define BQ25980_ADC_CONTROL1 0x23
+#define BQ25980_ADC_CONTROL2 0x24
+#define BQ25980_IBUS_ADC_MSB 0x25
+#define BQ25980_IBUS_ADC_LSB 0x26
+#define BQ25980_VBUS_ADC_MSB 0x27
+#define BQ25980_VBUS_ADC_LSB 0x28
+#define BQ25980_VAC1_ADC_MSB 0x29
+#define BQ25980_VAC1_ADC_LSB 0x2A
+#define BQ25980_VAC2_ADC_MSB 0x2B
+#define BQ25980_VAC2_ADC_LSB 0x2C
+#define BQ25980_VOUT_ADC_MSB 0x2D
+#define BQ25980_VOUT_ADC_LSB 0x2E
+#define BQ25980_VBAT_ADC_MSB 0x2F
+#define BQ25980_VBAT_ADC_LSB 0x30
+#define BQ25980_IBAT_ADC_MSB 0x31
+#define BQ25980_IBAT_ADC_LSB 0x32
+#define BQ25980_TSBUS_ADC_MSB 0x33
+#define BQ25980_TSBUS_ADC_LSB 0x34
+#define BQ25980_TSBAT_ADC_MSB 0x35
+#define BQ25980_TSBAT_ADC_LSB 0x36
+#define BQ25980_TDIE_ADC_MSB 0x37
+#define BQ25980_TDIE_ADC_LSB 0x38
+#define BQ25980_DEGLITCH_TIME 0x39
+#define BQ25980_CHRGR_CTRL_6 0x3A
+
+#define BQ25980_BUSOCP_STEP_uA 250000
+#define BQ25980_BUSOCP_OFFSET_uA 1000000
+
+#define BQ25980_BUSOCP_DFLT_uA 4250000
+#define BQ25975_BUSOCP_DFLT_uA 4250000
+#define BQ25960_BUSOCP_DFLT_uA 3250000
+
+#define BQ25980_BUSOCP_MIN_uA 1000000
+
+#define BQ25980_BUSOCP_SC_MAX_uA 5750000
+#define BQ25975_BUSOCP_SC_MAX_uA 5750000
+#define BQ25960_BUSOCP_SC_MAX_uA 3750000
+
+#define BQ25980_BUSOCP_BYP_MAX_uA 8500000
+#define BQ25975_BUSOCP_BYP_MAX_uA 8500000
+#define BQ25960_BUSOCP_BYP_MAX_uA 5750000
+
+#define BQ25980_BUSOVP_SC_STEP_uV 100000
+#define BQ25975_BUSOVP_SC_STEP_uV 50000
+#define BQ25960_BUSOVP_SC_STEP_uV 50000
+#define BQ25980_BUSOVP_SC_OFFSET_uV 14000000
+#define BQ25975_BUSOVP_SC_OFFSET_uV 7000000
+#define BQ25960_BUSOVP_SC_OFFSET_uV 7000000
+
+#define BQ25980_BUSOVP_BYP_STEP_uV 50000
+#define BQ25975_BUSOVP_BYP_STEP_uV 25000
+#define BQ25960_BUSOVP_BYP_STEP_uV 25000
+#define BQ25980_BUSOVP_BYP_OFFSET_uV 7000000
+#define BQ25975_BUSOVP_BYP_OFFSET_uV 3500000
+#define BQ25960_BUSOVP_BYP_OFFSET_uV 3500000
+
+#define BQ25980_BUSOVP_DFLT_uV 17800000
+#define BQ25980_BUSOVP_BYPASS_DFLT_uV 8900000
+#define BQ25975_BUSOVP_DFLT_uV 8900000
+#define BQ25975_BUSOVP_BYPASS_DFLT_uV 4450000
+#define BQ25960_BUSOVP_DFLT_uV 8900000
+
+#define BQ25980_BUSOVP_SC_MIN_uV 14000000
+#define BQ25975_BUSOVP_SC_MIN_uV 7000000
+#define BQ25960_BUSOVP_SC_MIN_uV 7000000
+#define BQ25980_BUSOVP_BYP_MIN_uV 7000000
+#define BQ25975_BUSOVP_BYP_MIN_uV 3500000
+#define BQ25960_BUSOVP_BYP_MIN_uV 3500000
+
+#define BQ25980_BUSOVP_SC_MAX_uV 22000000
+#define BQ25975_BUSOVP_SC_MAX_uV 12750000
+#define BQ25960_BUSOVP_SC_MAX_uV 12750000
+
+#define BQ25980_BUSOVP_BYP_MAX_uV 12750000
+#define BQ25975_BUSOVP_BYP_MAX_uV 6500000
+#define BQ25960_BUSOVP_BYP_MAX_uV 6500000
+
+#define BQ25980_BATOVP_STEP_uV 20000
+#define BQ25975_BATOVP_STEP_uV 10000
+#define BQ25960_BATOVP_STEP_uV 10000
+
+#define BQ25980_BATOVP_OFFSET_uV 7000000
+#define BQ25975_BATOVP_OFFSET_uV 3500000
+#define BQ25960_BATOVP_OFFSET_uV 3500000
+
+#define BQ25980_BATOVP_DFLT_uV 14000000
+#define BQ25975_BATOVP_DFLT_uV 8900000
+#define BQ25960_BATOVP_DFLT_uV 8900000
+
+#define BQ25980_BATOVP_MIN_uV 7000000
+#define BQ25975_BATOVP_MIN_uV 3500000
+#define BQ25960_BATOVP_MIN_uV 3500000
+
+#define BQ25980_BATOVP_MAX_uV 9540000
+#define BQ25975_BATOVP_MAX_uV 4770000
+#define BQ25960_BATOVP_MAX_uV 4770000
+
+#define BQ25980_BATOCP_STEP_uA 100000
+
+#define BQ25980_BATOCP_MASK GENMASK(6, 0)
+
+#define BQ25980_BATOCP_DFLT_uA 8100000
+#define BQ25960_BATOCP_DFLT_uA 6100000
+
+#define BQ25980_BATOCP_MIN_uA 2000000
+
+#define BQ25980_BATOCP_MAX_uA 11000000
+#define BQ25975_BATOCP_MAX_uA 11000000
+#define BQ25960_BATOCP_MAX_uA 7000000
+
+#define BQ25980_ENABLE_HIZ 0xff
+#define BQ25980_DISABLE_HIZ 0x0
+#define BQ25980_EN_BYPASS BIT(3)
+#define BQ25980_STAT1_OVP_MASK (BIT(6) | BIT(5) | BIT(0))
+#define BQ25980_STAT3_OVP_MASK (BIT(7) | BIT(6))
+#define BQ25980_STAT1_OCP_MASK BIT(3)
+#define BQ25980_STAT2_OCP_MASK (BIT(6) | BIT(1))
+#define BQ25980_STAT4_TFLT_MASK GENMASK(5, 1)
+#define BQ25980_WD_STAT BIT(0)
+#define BQ25980_PRESENT_MASK GENMASK(4, 2)
+#define BQ25980_CHG_EN BIT(4)
+#define BQ25980_EN_HIZ BIT(6)
+#define BQ25980_ADC_EN BIT(7)
+
+#define BQ25980_ADC_VOLT_STEP_uV 1000
+#define BQ25980_ADC_CURR_STEP_uA 1000
+#define BQ25980_ADC_POLARITY_BIT BIT(7)
+
+#define BQ25980_WATCHDOG_MASK GENMASK(4, 3)
+#define BQ25980_WATCHDOG_DIS BIT(2)
+#define BQ25980_WATCHDOG_MAX 300000
+#define BQ25980_WATCHDOG_MIN 0
+#define BQ25980_NUM_WD_VAL 4
+
+#endif /* BQ25980_CHARGER_H */
diff --git a/drivers/power/supply/bq27xxx_battery.c b/drivers/power/supply/bq27xxx_battery.c
index a123f6e21f08..315e0909e6a4 100644
--- a/drivers/power/supply/bq27xxx_battery.c
+++ b/drivers/power/supply/bq27xxx_battery.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* BQ27xxx battery driver
*
@@ -9,14 +10,6 @@
*
* Based on a previous work by Copyright (C) 2008 Texas Instruments, Inc.
*
- * This package is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * THIS PACKAGE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
- * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
- *
* Datasheets:
* https://www.ti.com/product/bq27000
* https://www.ti.com/product/bq27200
@@ -45,6 +38,7 @@
* https://www.ti.com/product/bq27621-g1
* https://www.ti.com/product/bq27z561
* https://www.ti.com/product/bq28z610
+ * https://www.ti.com/product/bq34z100-g1
*/
#include <linux/device.h>
@@ -83,7 +77,7 @@
/* BQ27Z561 has different layout for Flags register */
#define BQ27Z561_FLAG_FDC BIT(4) /* Battery fully discharged */
-#define BQ27Z561_FLAG_FC BIT(5) /* Battery fully charged */
+#define BQ27Z561_FLAG_FC BIT(5) /* Battery fully charged */
#define BQ27Z561_FLAG_DIS_CH BIT(6) /* Battery is discharging */
/* control register params */
@@ -483,6 +477,26 @@ static u8
[BQ27XXX_REG_DCAP] = 0x3c,
[BQ27XXX_REG_AP] = 0x22,
BQ27XXX_DM_REG_ROWS,
+ },
+ bq34z100_regs[BQ27XXX_REG_MAX] = {
+ [BQ27XXX_REG_CTRL] = 0x00,
+ [BQ27XXX_REG_TEMP] = 0x0c,
+ [BQ27XXX_REG_INT_TEMP] = 0x2a,
+ [BQ27XXX_REG_VOLT] = 0x08,
+ [BQ27XXX_REG_AI] = 0x0a,
+ [BQ27XXX_REG_FLAGS] = 0x0e,
+ [BQ27XXX_REG_TTE] = 0x18,
+ [BQ27XXX_REG_TTF] = 0x1a,
+ [BQ27XXX_REG_TTES] = 0x1e,
+ [BQ27XXX_REG_TTECP] = INVALID_REG_ADDR,
+ [BQ27XXX_REG_NAC] = INVALID_REG_ADDR,
+ [BQ27XXX_REG_FCC] = 0x06,
+ [BQ27XXX_REG_CYCT] = 0x2c,
+ [BQ27XXX_REG_AE] = 0x24,
+ [BQ27XXX_REG_SOC] = 0x02,
+ [BQ27XXX_REG_DCAP] = 0x3c,
+ [BQ27XXX_REG_AP] = 0x22,
+ BQ27XXX_DM_REG_ROWS,
};
static enum power_supply_property bq27000_props[] = {
@@ -757,6 +771,27 @@ static enum power_supply_property bq28z610_props[] = {
POWER_SUPPLY_PROP_MANUFACTURER,
};
+static enum power_supply_property bq34z100_props[] = {
+ POWER_SUPPLY_PROP_STATUS,
+ POWER_SUPPLY_PROP_PRESENT,
+ POWER_SUPPLY_PROP_VOLTAGE_NOW,
+ POWER_SUPPLY_PROP_CURRENT_NOW,
+ POWER_SUPPLY_PROP_CAPACITY,
+ POWER_SUPPLY_PROP_CAPACITY_LEVEL,
+ POWER_SUPPLY_PROP_TEMP,
+ POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW,
+ POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG,
+ POWER_SUPPLY_PROP_TIME_TO_FULL_NOW,
+ POWER_SUPPLY_PROP_TECHNOLOGY,
+ POWER_SUPPLY_PROP_CHARGE_FULL,
+ POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN,
+ POWER_SUPPLY_PROP_CYCLE_COUNT,
+ POWER_SUPPLY_PROP_ENERGY_NOW,
+ POWER_SUPPLY_PROP_POWER_AVG,
+ POWER_SUPPLY_PROP_HEALTH,
+ POWER_SUPPLY_PROP_MANUFACTURER,
+};
+
struct bq27xxx_dm_reg {
u8 subclass_id;
u8 offset;
@@ -854,13 +889,17 @@ static struct bq27xxx_dm_reg bq27621_dm_regs[] = {
#define bq27z561_dm_regs 0
#define bq28z610_dm_regs 0
-
-#define BQ27XXX_O_ZERO 0x00000001
-#define BQ27XXX_O_OTDC 0x00000002 /* has OTC/OTD overtemperature flags */
-#define BQ27XXX_O_UTOT 0x00000004 /* has OT overtemperature flag */
-#define BQ27XXX_O_CFGUP 0x00000008
-#define BQ27XXX_O_RAM 0x00000010
-#define BQ27Z561_O_BITS 0x00000020
+#define bq34z100_dm_regs 0
+
+#define BQ27XXX_O_ZERO BIT(0)
+#define BQ27XXX_O_OTDC BIT(1) /* has OTC/OTD overtemperature flags */
+#define BQ27XXX_O_UTOT BIT(2) /* has OT overtemperature flag */
+#define BQ27XXX_O_CFGUP BIT(3)
+#define BQ27XXX_O_RAM BIT(4)
+#define BQ27Z561_O_BITS BIT(5)
+#define BQ27XXX_O_SOC_SI BIT(6) /* SoC is single register */
+#define BQ27XXX_O_HAS_CI BIT(7) /* has Capacity Inaccurate flag */
+#define BQ27XXX_O_MUL_CHEM BIT(8) /* multiple chemistries supported */
#define BQ27XXX_DATA(ref, key, opt) { \
.opts = (opt), \
@@ -878,8 +917,8 @@ static struct {
enum power_supply_property *props;
size_t props_size;
} bq27xxx_chip_data[] = {
- [BQ27000] = BQ27XXX_DATA(bq27000, 0 , BQ27XXX_O_ZERO),
- [BQ27010] = BQ27XXX_DATA(bq27010, 0 , BQ27XXX_O_ZERO),
+ [BQ27000] = BQ27XXX_DATA(bq27000, 0 , BQ27XXX_O_ZERO | BQ27XXX_O_SOC_SI | BQ27XXX_O_HAS_CI),
+ [BQ27010] = BQ27XXX_DATA(bq27010, 0 , BQ27XXX_O_ZERO | BQ27XXX_O_SOC_SI | BQ27XXX_O_HAS_CI),
[BQ2750X] = BQ27XXX_DATA(bq2750x, 0 , BQ27XXX_O_OTDC),
[BQ2751X] = BQ27XXX_DATA(bq2751x, 0 , BQ27XXX_O_OTDC),
[BQ2752X] = BQ27XXX_DATA(bq2752x, 0 , BQ27XXX_O_OTDC),
@@ -907,6 +946,8 @@ static struct {
[BQ27621] = BQ27XXX_DATA(bq27621, 0x80008000, BQ27XXX_O_UTOT | BQ27XXX_O_CFGUP | BQ27XXX_O_RAM),
[BQ27Z561] = BQ27XXX_DATA(bq27z561, 0 , BQ27Z561_O_BITS),
[BQ28Z610] = BQ27XXX_DATA(bq28z610, 0 , BQ27Z561_O_BITS),
+ [BQ34Z100] = BQ27XXX_DATA(bq34z100, 0 , BQ27XXX_O_OTDC | BQ27XXX_O_SOC_SI | \
+ BQ27XXX_O_HAS_CI | BQ27XXX_O_MUL_CHEM),
};
static DEFINE_MUTEX(bq27xxx_list_lock);
@@ -1426,7 +1467,7 @@ static int bq27xxx_battery_read_soc(struct bq27xxx_device_info *di)
{
int soc;
- if (di->opts & BQ27XXX_O_ZERO)
+ if (di->opts & BQ27XXX_O_SOC_SI)
soc = bq27xxx_read(di, BQ27XXX_REG_SOC, true);
else
soc = bq27xxx_read(di, BQ27XXX_REG_SOC, false);
@@ -1664,7 +1705,7 @@ static int bq27xxx_battery_read_health(struct bq27xxx_device_info *di)
void bq27xxx_battery_update(struct bq27xxx_device_info *di)
{
struct bq27xxx_reg_cache cache = {0, };
- bool has_ci_flag = di->opts & BQ27XXX_O_ZERO;
+ bool has_ci_flag = di->opts & BQ27XXX_O_HAS_CI;
bool has_singe_flag = di->opts & BQ27XXX_O_ZERO;
cache.flags = bq27xxx_read(di, BQ27XXX_REG_FLAGS, has_singe_flag);
@@ -1772,8 +1813,6 @@ static int bq27xxx_battery_status(struct bq27xxx_device_info *di,
status = POWER_SUPPLY_STATUS_FULL;
else if (di->cache.flags & BQ27000_FLAG_CHGS)
status = POWER_SUPPLY_STATUS_CHARGING;
- else if (power_supply_am_i_supplied(di->bat) > 0)
- status = POWER_SUPPLY_STATUS_NOT_CHARGING;
else
status = POWER_SUPPLY_STATUS_DISCHARGING;
} else if (di->opts & BQ27Z561_O_BITS) {
@@ -1792,6 +1831,10 @@ static int bq27xxx_battery_status(struct bq27xxx_device_info *di,
status = POWER_SUPPLY_STATUS_CHARGING;
}
+ if ((status == POWER_SUPPLY_STATUS_DISCHARGING) &&
+ (power_supply_am_i_supplied(di->bat) > 0))
+ status = POWER_SUPPLY_STATUS_NOT_CHARGING;
+
val->intval = status;
return 0;
@@ -1916,7 +1959,10 @@ static int bq27xxx_battery_get_property(struct power_supply *psy,
ret = bq27xxx_simple_value(di->cache.time_to_full, val);
break;
case POWER_SUPPLY_PROP_TECHNOLOGY:
- val->intval = POWER_SUPPLY_TECHNOLOGY_LION;
+ if (di->opts & BQ27XXX_O_MUL_CHEM)
+ val->intval = POWER_SUPPLY_TECHNOLOGY_UNKNOWN;
+ else
+ val->intval = POWER_SUPPLY_TECHNOLOGY_LION;
break;
case POWER_SUPPLY_PROP_CHARGE_NOW:
ret = bq27xxx_simple_value(bq27xxx_battery_read_nac(di), val);
@@ -1992,13 +2038,9 @@ int bq27xxx_battery_setup(struct bq27xxx_device_info *di)
psy_desc->external_power_changed = bq27xxx_external_power_changed;
di->bat = power_supply_register_no_ws(di->dev, psy_desc, &psy_cfg);
- if (IS_ERR(di->bat)) {
- if (PTR_ERR(di->bat) == -EPROBE_DEFER)
- dev_dbg(di->dev, "failed to register battery, deferring probe\n");
- else
- dev_err(di->dev, "failed to register battery\n");
- return PTR_ERR(di->bat);
- }
+ if (IS_ERR(di->bat))
+ return dev_err_probe(di->dev, PTR_ERR(di->bat),
+ "failed to register battery\n");
bq27xxx_battery_settings(di);
bq27xxx_battery_update(di);
diff --git a/drivers/power/supply/bq27xxx_battery_hdq.c b/drivers/power/supply/bq27xxx_battery_hdq.c
index d56b3e19e996..922759ab2e04 100644
--- a/drivers/power/supply/bq27xxx_battery_hdq.c
+++ b/drivers/power/supply/bq27xxx_battery_hdq.c
@@ -1,16 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* BQ27xxx battery monitor HDQ/1-wire driver
*
* Copyright (C) 2007-2017 Texas Instruments Incorporated - https://www.ti.com/
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
*/
#include <linux/kernel.h>
diff --git a/drivers/power/supply/bq27xxx_battery_i2c.c b/drivers/power/supply/bq27xxx_battery_i2c.c
index ab02456d69e5..eb4f4284982f 100644
--- a/drivers/power/supply/bq27xxx_battery_i2c.c
+++ b/drivers/power/supply/bq27xxx_battery_i2c.c
@@ -1,17 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* BQ27xxx battery monitor I2C driver
*
* Copyright (C) 2015 Texas Instruments Incorporated - https://www.ti.com/
* Andrew F. Davis <afd@ti.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
*/
#include <linux/i2c.h>
@@ -255,6 +247,7 @@ static const struct i2c_device_id bq27xxx_i2c_id_table[] = {
{ "bq27621", BQ27621 },
{ "bq27z561", BQ27Z561 },
{ "bq28z610", BQ28Z610 },
+ { "bq34z100", BQ34Z100 },
{},
};
MODULE_DEVICE_TABLE(i2c, bq27xxx_i2c_id_table);
@@ -290,6 +283,7 @@ static const struct of_device_id bq27xxx_battery_i2c_of_match_table[] = {
{ .compatible = "ti,bq27621" },
{ .compatible = "ti,bq27z561" },
{ .compatible = "ti,bq28z610" },
+ { .compatible = "ti,bq34z100" },
{},
};
MODULE_DEVICE_TABLE(of, bq27xxx_battery_i2c_of_match_table);
diff --git a/drivers/power/supply/charger-manager.c b/drivers/power/supply/charger-manager.c
index 2ef53dc1f2fb..6fcebe441552 100644
--- a/drivers/power/supply/charger-manager.c
+++ b/drivers/power/supply/charger-manager.c
@@ -26,6 +26,29 @@
#include <linux/of.h>
#include <linux/thermal.h>
+static struct {
+ const char *name;
+ u64 extcon_type;
+} extcon_mapping[] = {
+ /* Current textual representations */
+ { "USB", EXTCON_USB },
+ { "USB-HOST", EXTCON_USB_HOST },
+ { "SDP", EXTCON_CHG_USB_SDP },
+ { "DCP", EXTCON_CHG_USB_DCP },
+ { "CDP", EXTCON_CHG_USB_CDP },
+ { "ACA", EXTCON_CHG_USB_ACA },
+ { "FAST-CHARGER", EXTCON_CHG_USB_FAST },
+ { "SLOW-CHARGER", EXTCON_CHG_USB_SLOW },
+ { "WPT", EXTCON_CHG_WPT },
+ { "PD", EXTCON_CHG_USB_PD },
+ { "DOCK", EXTCON_DOCK },
+ { "JIG", EXTCON_JIG },
+ { "MECHANICAL", EXTCON_MECHANICAL },
+ /* Deprecated textual representations */
+ { "TA", EXTCON_CHG_USB_SDP },
+ { "CHARGE-DOWNSTREAM", EXTCON_CHG_USB_CDP },
+};
+
/*
* Default temperature threshold for charging.
* Every temperature units are in tenth of centigrade.
@@ -33,18 +56,6 @@
#define CM_DEFAULT_RECHARGE_TEMP_DIFF 50
#define CM_DEFAULT_CHARGE_TEMP_MAX 500
-static const char * const default_event_names[] = {
- [CM_EVENT_UNKNOWN] = "Unknown",
- [CM_EVENT_BATT_FULL] = "Battery Full",
- [CM_EVENT_BATT_IN] = "Battery Inserted",
- [CM_EVENT_BATT_OUT] = "Battery Pulled Out",
- [CM_EVENT_BATT_OVERHEAT] = "Battery Overheat",
- [CM_EVENT_BATT_COLD] = "Battery Cold",
- [CM_EVENT_EXT_PWR_IN_OUT] = "External Power Attach/Detach",
- [CM_EVENT_CHG_START_STOP] = "Charging Start/Stop",
- [CM_EVENT_OTHERS] = "Other battery events"
-};
-
/*
* Regard CM_JIFFIES_SMALL jiffies is small enough to ignore for
* delayed works so that we can run delayed works with CM_JIFFIES_SMALL
@@ -61,8 +72,6 @@ static const char * const default_event_names[] = {
*/
#define CM_RTC_SMALL (2)
-#define UEVENT_BUF_SIZE 32
-
static LIST_HEAD(cm_list);
static DEFINE_MUTEX(cm_list_mtx);
@@ -285,6 +294,19 @@ static bool is_full_charged(struct charger_manager *cm)
if (!fuel_gauge)
return false;
+ /* Full, if it's over the fullbatt voltage */
+ if (desc->fullbatt_uV > 0) {
+ ret = get_batt_uV(cm, &uV);
+ if (!ret) {
+ /* Battery is already full, checks voltage drop. */
+ if (cm->battery_status == POWER_SUPPLY_STATUS_FULL
+ && desc->fullbatt_vchkdrop_uV)
+ uV += desc->fullbatt_vchkdrop_uV;
+ if (uV >= desc->fullbatt_uV)
+ return true;
+ }
+ }
+
if (desc->fullbatt_full_capacity > 0) {
val.intval = 0;
@@ -297,15 +319,6 @@ static bool is_full_charged(struct charger_manager *cm)
}
}
- /* Full, if it's over the fullbatt voltage */
- if (desc->fullbatt_uV > 0) {
- ret = get_batt_uV(cm, &uV);
- if (!ret && uV >= desc->fullbatt_uV) {
- is_full = true;
- goto out;
- }
- }
-
/* Full, if the capacity is more than fullbatt_soc */
if (desc->fullbatt_soc > 0) {
val.intval = 0;
@@ -427,122 +440,6 @@ static int try_charger_enable(struct charger_manager *cm, bool enable)
}
/**
- * try_charger_restart - Restart charging.
- * @cm: the Charger Manager representing the battery.
- *
- * Restart charging by turning off and on the charger.
- */
-static int try_charger_restart(struct charger_manager *cm)
-{
- int err;
-
- if (cm->emergency_stop)
- return -EAGAIN;
-
- err = try_charger_enable(cm, false);
- if (err)
- return err;
-
- return try_charger_enable(cm, true);
-}
-
-/**
- * uevent_notify - Let users know something has changed.
- * @cm: the Charger Manager representing the battery.
- * @event: the event string.
- *
- * If @event is null, it implies that uevent_notify is called
- * by resume function. When called in the resume function, cm_suspended
- * should be already reset to false in order to let uevent_notify
- * notify the recent event during the suspend to users. While
- * suspended, uevent_notify does not notify users, but tracks
- * events so that uevent_notify can notify users later after resumed.
- */
-static void uevent_notify(struct charger_manager *cm, const char *event)
-{
- static char env_str[UEVENT_BUF_SIZE + 1] = "";
- static char env_str_save[UEVENT_BUF_SIZE + 1] = "";
-
- if (cm_suspended) {
- /* Nothing in suspended-event buffer */
- if (env_str_save[0] == 0) {
- if (!strncmp(env_str, event, UEVENT_BUF_SIZE))
- return; /* status not changed */
- strncpy(env_str_save, event, UEVENT_BUF_SIZE);
- return;
- }
-
- if (!strncmp(env_str_save, event, UEVENT_BUF_SIZE))
- return; /* Duplicated. */
- strncpy(env_str_save, event, UEVENT_BUF_SIZE);
- return;
- }
-
- if (event == NULL) {
- /* No messages pending */
- if (!env_str_save[0])
- return;
-
- strncpy(env_str, env_str_save, UEVENT_BUF_SIZE);
- kobject_uevent(&cm->dev->kobj, KOBJ_CHANGE);
- env_str_save[0] = 0;
-
- return;
- }
-
- /* status not changed */
- if (!strncmp(env_str, event, UEVENT_BUF_SIZE))
- return;
-
- /* save the status and notify the update */
- strncpy(env_str, event, UEVENT_BUF_SIZE);
- kobject_uevent(&cm->dev->kobj, KOBJ_CHANGE);
-
- dev_info(cm->dev, "%s\n", event);
-}
-
-/**
- * fullbatt_vchk - Check voltage drop some times after "FULL" event.
- * @work: the work_struct appointing the function
- *
- * If a user has designated "fullbatt_vchkdrop_ms/uV" values with
- * charger_desc, Charger Manager checks voltage drop after the battery
- * "FULL" event. It checks whether the voltage has dropped more than
- * fullbatt_vchkdrop_uV by calling this function after fullbatt_vchkrop_ms.
- */
-static void fullbatt_vchk(struct work_struct *work)
-{
- struct delayed_work *dwork = to_delayed_work(work);
- struct charger_manager *cm = container_of(dwork,
- struct charger_manager, fullbatt_vchk_work);
- struct charger_desc *desc = cm->desc;
- int batt_uV, err, diff;
-
- /* remove the appointment for fullbatt_vchk */
- cm->fullbatt_vchk_jiffies_at = 0;
-
- if (!desc->fullbatt_vchkdrop_uV || !desc->fullbatt_vchkdrop_ms)
- return;
-
- err = get_batt_uV(cm, &batt_uV);
- if (err) {
- dev_err(cm->dev, "%s: get_batt_uV error(%d)\n", __func__, err);
- return;
- }
-
- diff = desc->fullbatt_uV - batt_uV;
- if (diff < 0)
- return;
-
- dev_info(cm->dev, "VBATT dropped %duV after full-batt\n", diff);
-
- if (diff > desc->fullbatt_vchkdrop_uV) {
- try_charger_restart(cm);
- uevent_notify(cm, "Recharging");
- }
-}
-
-/**
* check_charging_duration - Monitor charging/discharging duration
* @cm: the Charger Manager representing the battery.
*
@@ -569,19 +466,14 @@ static int check_charging_duration(struct charger_manager *cm)
if (duration > desc->charging_max_duration_ms) {
dev_info(cm->dev, "Charging duration exceed %ums\n",
desc->charging_max_duration_ms);
- uevent_notify(cm, "Discharging");
- try_charger_enable(cm, false);
ret = true;
}
- } else if (is_ext_pwr_online(cm) && !cm->charger_enabled) {
+ } else if (cm->battery_status == POWER_SUPPLY_STATUS_NOT_CHARGING) {
duration = curr - cm->charging_end_time;
- if (duration > desc->discharging_max_duration_ms &&
- is_ext_pwr_online(cm)) {
+ if (duration > desc->discharging_max_duration_ms) {
dev_info(cm->dev, "Discharging duration exceed %ums\n",
desc->discharging_max_duration_ms);
- uevent_notify(cm, "Recharging");
- try_charger_enable(cm, true);
ret = true;
}
}
@@ -657,14 +549,53 @@ static int cm_check_thermal_status(struct charger_manager *cm)
}
if (temp > upper_limit)
- ret = CM_EVENT_BATT_OVERHEAT;
+ ret = CM_BATT_OVERHEAT;
else if (temp < lower_limit)
- ret = CM_EVENT_BATT_COLD;
+ ret = CM_BATT_COLD;
+ else
+ ret = CM_BATT_OK;
+
+ cm->emergency_stop = ret;
return ret;
}
/**
+ * cm_get_target_status - Check current status and get next target status.
+ * @cm: the Charger Manager representing the battery.
+ */
+static int cm_get_target_status(struct charger_manager *cm)
+{
+ if (!is_ext_pwr_online(cm))
+ return POWER_SUPPLY_STATUS_DISCHARGING;
+
+ if (cm_check_thermal_status(cm)) {
+ /* Check if discharging duration exeeds limit. */
+ if (check_charging_duration(cm))
+ goto charging_ok;
+ return POWER_SUPPLY_STATUS_NOT_CHARGING;
+ }
+
+ switch (cm->battery_status) {
+ case POWER_SUPPLY_STATUS_CHARGING:
+ /* Check if charging duration exeeds limit. */
+ if (check_charging_duration(cm))
+ return POWER_SUPPLY_STATUS_FULL;
+ fallthrough;
+ case POWER_SUPPLY_STATUS_FULL:
+ if (is_full_charged(cm))
+ return POWER_SUPPLY_STATUS_FULL;
+ fallthrough;
+ default:
+ break;
+ }
+
+charging_ok:
+ /* Charging is allowed. */
+ return POWER_SUPPLY_STATUS_CHARGING;
+}
+
+/**
* _cm_monitor - Monitor the temperature and return true for exceptions.
* @cm: the Charger Manager representing the battery.
*
@@ -673,60 +604,18 @@ static int cm_check_thermal_status(struct charger_manager *cm)
*/
static bool _cm_monitor(struct charger_manager *cm)
{
- int temp_alrt;
-
- temp_alrt = cm_check_thermal_status(cm);
-
- /* It has been stopped already */
- if (temp_alrt && cm->emergency_stop)
- return false;
-
- /*
- * Check temperature whether overheat or cold.
- * If temperature is out of range normal state, stop charging.
- */
- if (temp_alrt) {
- cm->emergency_stop = temp_alrt;
- if (!try_charger_enable(cm, false))
- uevent_notify(cm, default_event_names[temp_alrt]);
-
- /*
- * Check whole charging duration and discharging duration
- * after full-batt.
- */
- } else if (!cm->emergency_stop && check_charging_duration(cm)) {
- dev_dbg(cm->dev,
- "Charging/Discharging duration is out of range\n");
- /*
- * Check dropped voltage of battery. If battery voltage is more
- * dropped than fullbatt_vchkdrop_uV after fully charged state,
- * charger-manager have to recharge battery.
- */
- } else if (!cm->emergency_stop && is_ext_pwr_online(cm) &&
- !cm->charger_enabled) {
- fullbatt_vchk(&cm->fullbatt_vchk_work.work);
+ int target;
- /*
- * Check whether fully charged state to protect overcharge
- * if charger-manager is charging for battery.
- */
- } else if (!cm->emergency_stop && is_full_charged(cm) &&
- cm->charger_enabled) {
- dev_info(cm->dev, "EVENT_HANDLE: Battery Fully Charged\n");
- uevent_notify(cm, default_event_names[CM_EVENT_BATT_FULL]);
+ target = cm_get_target_status(cm);
- try_charger_enable(cm, false);
+ try_charger_enable(cm, (target == POWER_SUPPLY_STATUS_CHARGING));
- fullbatt_vchk(&cm->fullbatt_vchk_work.work);
- } else {
- cm->emergency_stop = 0;
- if (is_ext_pwr_online(cm)) {
- if (!try_charger_enable(cm, true))
- uevent_notify(cm, "CHARGING");
- }
+ if (cm->battery_status != target) {
+ cm->battery_status = target;
+ power_supply_changed(cm->charger_psy);
}
- return true;
+ return (cm->battery_status == POWER_SUPPLY_STATUS_NOT_CHARGING);
}
/**
@@ -819,66 +708,6 @@ static void cm_monitor_poller(struct work_struct *work)
schedule_work(&setup_polling);
}
-/**
- * fullbatt_handler - Event handler for CM_EVENT_BATT_FULL
- * @cm: the Charger Manager representing the battery.
- */
-static void fullbatt_handler(struct charger_manager *cm)
-{
- struct charger_desc *desc = cm->desc;
-
- if (!desc->fullbatt_vchkdrop_uV || !desc->fullbatt_vchkdrop_ms)
- goto out;
-
- if (cm_suspended)
- device_set_wakeup_capable(cm->dev, true);
-
- mod_delayed_work(cm_wq, &cm->fullbatt_vchk_work,
- msecs_to_jiffies(desc->fullbatt_vchkdrop_ms));
- cm->fullbatt_vchk_jiffies_at = jiffies + msecs_to_jiffies(
- desc->fullbatt_vchkdrop_ms);
-
- if (cm->fullbatt_vchk_jiffies_at == 0)
- cm->fullbatt_vchk_jiffies_at = 1;
-
-out:
- dev_info(cm->dev, "EVENT_HANDLE: Battery Fully Charged\n");
- uevent_notify(cm, default_event_names[CM_EVENT_BATT_FULL]);
-}
-
-/**
- * battout_handler - Event handler for CM_EVENT_BATT_OUT
- * @cm: the Charger Manager representing the battery.
- */
-static void battout_handler(struct charger_manager *cm)
-{
- if (cm_suspended)
- device_set_wakeup_capable(cm->dev, true);
-
- if (!is_batt_present(cm)) {
- dev_emerg(cm->dev, "Battery Pulled Out!\n");
- uevent_notify(cm, default_event_names[CM_EVENT_BATT_OUT]);
- } else {
- uevent_notify(cm, "Battery Reinserted?");
- }
-}
-
-/**
- * misc_event_handler - Handler for other events
- * @cm: the Charger Manager representing the battery.
- * @type: the Charger Manager representing the battery.
- */
-static void misc_event_handler(struct charger_manager *cm,
- enum cm_event_types type)
-{
- if (cm_suspended)
- device_set_wakeup_capable(cm->dev, true);
-
- if (is_polling_required(cm) && cm->desc->polling_interval_ms)
- schedule_work(&setup_polling);
- uevent_notify(cm, default_event_names[type]);
-}
-
static int charger_get_property(struct power_supply *psy,
enum power_supply_property psp,
union power_supply_propval *val)
@@ -891,12 +720,7 @@ static int charger_get_property(struct power_supply *psy,
switch (psp) {
case POWER_SUPPLY_PROP_STATUS:
- if (is_charging(cm))
- val->intval = POWER_SUPPLY_STATUS_CHARGING;
- else if (is_ext_pwr_online(cm))
- val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
- else
- val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
+ val->intval = cm->battery_status;
break;
case POWER_SUPPLY_PROP_HEALTH:
if (cm->emergency_stop > 0)
@@ -925,7 +749,6 @@ static int charger_get_property(struct power_supply *psy,
POWER_SUPPLY_PROP_CURRENT_NOW, val);
break;
case POWER_SUPPLY_PROP_TEMP:
- case POWER_SUPPLY_PROP_TEMP_AMBIENT:
return cm_get_battery_temperature(cm, &val->intval);
case POWER_SUPPLY_PROP_CAPACITY:
if (!is_batt_present(cm)) {
@@ -981,35 +804,13 @@ static int charger_get_property(struct power_supply *psy,
val->intval = 0;
break;
case POWER_SUPPLY_PROP_CHARGE_FULL:
- if (is_full_charged(cm))
- val->intval = 1;
- else
- val->intval = 0;
- ret = 0;
- break;
case POWER_SUPPLY_PROP_CHARGE_NOW:
- if (is_charging(cm)) {
- fuel_gauge = power_supply_get_by_name(
- cm->desc->psy_fuel_gauge);
- if (!fuel_gauge) {
- ret = -ENODEV;
- break;
- }
-
- ret = power_supply_get_property(fuel_gauge,
- POWER_SUPPLY_PROP_CHARGE_NOW,
- val);
- if (ret) {
- val->intval = 1;
- ret = 0;
- } else {
- /* If CHARGE_NOW is supplied, use it */
- val->intval = (val->intval > 0) ?
- val->intval : 1;
- }
- } else {
- val->intval = 0;
+ fuel_gauge = power_supply_get_by_name(cm->desc->psy_fuel_gauge);
+ if (!fuel_gauge) {
+ ret = -ENODEV;
+ break;
}
+ ret = power_supply_get_property(fuel_gauge, psp, val);
break;
default:
return -EINVAL;
@@ -1028,13 +829,12 @@ static enum power_supply_property default_charger_props[] = {
POWER_SUPPLY_PROP_VOLTAGE_NOW,
POWER_SUPPLY_PROP_CAPACITY,
POWER_SUPPLY_PROP_ONLINE,
- POWER_SUPPLY_PROP_CHARGE_FULL,
/*
* Optional properties are:
+ * POWER_SUPPLY_PROP_CHARGE_FULL,
* POWER_SUPPLY_PROP_CHARGE_NOW,
* POWER_SUPPLY_PROP_CURRENT_NOW,
- * POWER_SUPPLY_PROP_TEMP, and
- * POWER_SUPPLY_PROP_TEMP_AMBIENT,
+ * POWER_SUPPLY_PROP_TEMP,
*/
};
@@ -1069,21 +869,6 @@ static bool cm_setup_timer(void)
mutex_lock(&cm_list_mtx);
list_for_each_entry(cm, &cm_list, entry) {
- unsigned int fbchk_ms = 0;
-
- /* fullbatt_vchk is required. setup timer for that */
- if (cm->fullbatt_vchk_jiffies_at) {
- fbchk_ms = jiffies_to_msecs(cm->fullbatt_vchk_jiffies_at
- - jiffies);
- if (time_is_before_eq_jiffies(
- cm->fullbatt_vchk_jiffies_at) ||
- msecs_to_jiffies(fbchk_ms) < CM_JIFFIES_SMALL) {
- fullbatt_vchk(&cm->fullbatt_vchk_work.work);
- fbchk_ms = 0;
- }
- }
- CM_MIN_VALID(wakeup_ms, fbchk_ms);
-
/* Skip if polling is not required for this CM */
if (!is_polling_required(cm) && !cm->emergency_stop)
continue;
@@ -1145,7 +930,8 @@ static void charger_extcon_work(struct work_struct *work)
cable->min_uA, cable->max_uA);
}
- try_charger_enable(cable->cm, cable->attached);
+ cancel_delayed_work(&cm_monitor_work);
+ queue_delayed_work(cm_wq, &cm_monitor_work, 0);
}
/**
@@ -1169,15 +955,6 @@ static int charger_extcon_notifier(struct notifier_block *self,
cable->attached = event;
/*
- * Setup monitoring to check battery state
- * when charger cable is attached.
- */
- if (cable->attached && is_polling_required(cable->cm)) {
- cancel_work_sync(&setup_polling);
- schedule_work(&setup_polling);
- }
-
- /*
* Setup work for controlling charger(regulator)
* according to charger cable.
*/
@@ -1196,7 +973,8 @@ static int charger_extcon_notifier(struct notifier_block *self,
static int charger_extcon_init(struct charger_manager *cm,
struct charger_cable *cable)
{
- int ret;
+ int ret, i;
+ u64 extcon_type = EXTCON_NONE;
/*
* Charger manager use Extcon framework to identify
@@ -1205,14 +983,39 @@ static int charger_extcon_init(struct charger_manager *cm,
*/
INIT_WORK(&cable->wq, charger_extcon_work);
cable->nb.notifier_call = charger_extcon_notifier;
- ret = extcon_register_interest(&cable->extcon_dev,
- cable->extcon_name, cable->name, &cable->nb);
+
+ cable->extcon_dev = extcon_get_extcon_dev(cable->extcon_name);
+ if (IS_ERR_OR_NULL(cable->extcon_dev)) {
+ pr_err("Cannot find extcon_dev for %s (cable: %s)\n",
+ cable->extcon_name, cable->name);
+ if (cable->extcon_dev == NULL)
+ return -EPROBE_DEFER;
+ else
+ return PTR_ERR(cable->extcon_dev);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(extcon_mapping); i++) {
+ if (!strcmp(cable->name, extcon_mapping[i].name)) {
+ extcon_type = extcon_mapping[i].extcon_type;
+ break;
+ }
+ }
+ if (extcon_type == EXTCON_NONE) {
+ pr_err("Cannot find cable for type %s", cable->name);
+ return -EINVAL;
+ }
+
+ cable->extcon_type = extcon_type;
+
+ ret = devm_extcon_register_notifier(cm->dev, cable->extcon_dev,
+ cable->extcon_type, &cable->nb);
if (ret < 0) {
- pr_info("Cannot register extcon_dev for %s(cable: %s)\n",
+ pr_err("Cannot register extcon_dev for %s (cable: %s)\n",
cable->extcon_name, cable->name);
+ return ret;
}
- return ret;
+ return 0;
}
/**
@@ -1229,6 +1032,7 @@ static int charger_manager_register_extcon(struct charger_manager *cm)
{
struct charger_desc *desc = cm->desc;
struct charger_regulator *charger;
+ unsigned long event;
int ret;
int i;
int j;
@@ -1256,6 +1060,11 @@ static int charger_manager_register_extcon(struct charger_manager *cm)
}
cable->charger = charger;
cable->cm = cm;
+
+ event = extcon_get_state(cable->extcon_dev,
+ cable->extcon_type);
+ charger_extcon_notifier(&cable->nb,
+ event, NULL);
}
}
@@ -1447,7 +1256,7 @@ static int cm_init_thermal_data(struct charger_manager *cm,
return PTR_ERR(cm->tzd_batt);
/* Use external thermometer */
- properties[*num_properties] = POWER_SUPPLY_PROP_TEMP_AMBIENT;
+ properties[*num_properties] = POWER_SUPPLY_PROP_TEMP;
(*num_properties)++;
cm->desc->measure_battery_temp = true;
ret = 0;
@@ -1491,8 +1300,6 @@ static struct charger_desc *of_cm_parse_desc(struct device *dev)
of_property_read_u32(np, "cm-poll-interval",
&desc->polling_interval_ms);
- of_property_read_u32(np, "cm-fullbatt-vchkdrop-ms",
- &desc->fullbatt_vchkdrop_ms);
of_property_read_u32(np, "cm-fullbatt-vchkdrop-volt",
&desc->fullbatt_vchkdrop_uV);
of_property_read_u32(np, "cm-fullbatt-voltage", &desc->fullbatt_uV);
@@ -1504,8 +1311,8 @@ static struct charger_desc *of_cm_parse_desc(struct device *dev)
desc->battery_present = battery_stat;
/* chargers */
- of_property_read_u32(np, "cm-num-chargers", &num_chgs);
- if (num_chgs) {
+ num_chgs = of_property_count_strings(np, "cm-chargers");
+ if (num_chgs > 0) {
int i;
/* Allocate empty bin at the tail of array */
@@ -1618,7 +1425,6 @@ static int charger_manager_probe(struct platform_device *pdev)
struct charger_desc *desc = cm_get_drv_data(pdev);
struct charger_manager *cm;
int ret, i = 0;
- int j = 0;
union power_supply_propval val;
struct power_supply *fuel_gauge;
enum power_supply_property *properties;
@@ -1654,9 +1460,8 @@ static int charger_manager_probe(struct platform_device *pdev)
if (desc->fullbatt_uV == 0) {
dev_info(&pdev->dev, "Ignoring full-battery voltage threshold as it is not supplied\n");
}
- if (!desc->fullbatt_vchkdrop_ms || !desc->fullbatt_vchkdrop_uV) {
+ if (!desc->fullbatt_vchkdrop_uV) {
dev_info(&pdev->dev, "Disabling full-battery voltage drop checking mechanism as it is not supplied\n");
- desc->fullbatt_vchkdrop_ms = 0;
desc->fullbatt_vchkdrop_uV = 0;
}
if (desc->fullbatt_soc == 0) {
@@ -1739,6 +1544,12 @@ static int charger_manager_probe(struct platform_device *pdev)
return -ENODEV;
}
if (!power_supply_get_property(fuel_gauge,
+ POWER_SUPPLY_PROP_CHARGE_FULL, &val)) {
+ properties[num_properties] =
+ POWER_SUPPLY_PROP_CHARGE_FULL;
+ num_properties++;
+ }
+ if (!power_supply_get_property(fuel_gauge,
POWER_SUPPLY_PROP_CHARGE_NOW, &val)) {
properties[num_properties] =
POWER_SUPPLY_PROP_CHARGE_NOW;
@@ -1762,8 +1573,6 @@ static int charger_manager_probe(struct platform_device *pdev)
cm->charger_psy_desc.properties = properties;
cm->charger_psy_desc.num_properties = num_properties;
- INIT_DELAYED_WORK(&cm->fullbatt_vchk_work, fullbatt_vchk);
-
/* Register sysfs entry for charger(regulator) */
ret = charger_manager_prepare_sysfs(cm);
if (ret < 0) {
@@ -1813,19 +1622,8 @@ static int charger_manager_probe(struct platform_device *pdev)
return 0;
err_reg_extcon:
- for (i = 0; i < desc->num_charger_regulators; i++) {
- struct charger_regulator *charger;
-
- charger = &desc->charger_regulators[i];
- for (j = 0; j < charger->num_cables; j++) {
- struct charger_cable *cable = &charger->cables[j];
- /* Remove notifier block if only edev exists */
- if (cable->extcon_dev.edev)
- extcon_unregister_interest(&cable->extcon_dev);
- }
-
+ for (i = 0; i < desc->num_charger_regulators; i++)
regulator_put(desc->charger_regulators[i].consumer);
- }
power_supply_unregister(cm->charger_psy);
@@ -1837,7 +1635,6 @@ static int charger_manager_remove(struct platform_device *pdev)
struct charger_manager *cm = platform_get_drvdata(pdev);
struct charger_desc *desc = cm->desc;
int i = 0;
- int j = 0;
/* Remove from the list */
mutex_lock(&cm_list_mtx);
@@ -1847,15 +1644,6 @@ static int charger_manager_remove(struct platform_device *pdev)
cancel_work_sync(&setup_polling);
cancel_delayed_work_sync(&cm_monitor_work);
- for (i = 0 ; i < desc->num_charger_regulators ; i++) {
- struct charger_regulator *charger
- = &desc->charger_regulators[i];
- for (j = 0 ; j < charger->num_cables ; j++) {
- struct charger_cable *cable = &charger->cables[j];
- extcon_unregister_interest(&cable->extcon_dev);
- }
- }
-
for (i = 0 ; i < desc->num_charger_regulators ; i++)
regulator_put(desc->charger_regulators[i].consumer);
@@ -1903,8 +1691,6 @@ static bool cm_need_to_awake(void)
static int cm_suspend_prepare(struct device *dev)
{
- struct charger_manager *cm = dev_get_drvdata(dev);
-
if (cm_need_to_awake())
return -EBUSY;
@@ -1916,7 +1702,6 @@ static int cm_suspend_prepare(struct device *dev)
if (cm_timer_set) {
cancel_work_sync(&setup_polling);
cancel_delayed_work_sync(&cm_monitor_work);
- cancel_delayed_work(&cm->fullbatt_vchk_work);
}
return 0;
@@ -1941,31 +1726,6 @@ static void cm_suspend_complete(struct device *dev)
_cm_monitor(cm);
- /* Re-enqueue delayed work (fullbatt_vchk_work) */
- if (cm->fullbatt_vchk_jiffies_at) {
- unsigned long delay = 0;
- unsigned long now = jiffies + CM_JIFFIES_SMALL;
-
- if (time_after_eq(now, cm->fullbatt_vchk_jiffies_at)) {
- delay = (unsigned long)((long)now
- - (long)(cm->fullbatt_vchk_jiffies_at));
- delay = jiffies_to_msecs(delay);
- } else {
- delay = 0;
- }
-
- /*
- * Account for cm_suspend_duration_ms with assuming that
- * timer stops in suspend.
- */
- if (delay > cm_suspend_duration_ms)
- delay -= cm_suspend_duration_ms;
- else
- delay = 0;
-
- queue_delayed_work(cm_wq, &cm->fullbatt_vchk_work,
- msecs_to_jiffies(delay));
- }
device_set_wakeup_capable(cm->dev, false);
}
@@ -2007,56 +1767,6 @@ static void __exit charger_manager_cleanup(void)
}
module_exit(charger_manager_cleanup);
-/**
- * cm_notify_event - charger driver notify Charger Manager of charger event
- * @psy: pointer to instance of charger's power_supply
- * @type: type of charger event
- * @msg: optional message passed to uevent_notify function
- */
-void cm_notify_event(struct power_supply *psy, enum cm_event_types type,
- char *msg)
-{
- struct charger_manager *cm;
- bool found_power_supply = false;
-
- if (psy == NULL)
- return;
-
- mutex_lock(&cm_list_mtx);
- list_for_each_entry(cm, &cm_list, entry) {
- if (match_string(cm->desc->psy_charger_stat, -1,
- psy->desc->name) >= 0) {
- found_power_supply = true;
- break;
- }
- }
- mutex_unlock(&cm_list_mtx);
-
- if (!found_power_supply)
- return;
-
- switch (type) {
- case CM_EVENT_BATT_FULL:
- fullbatt_handler(cm);
- break;
- case CM_EVENT_BATT_OUT:
- battout_handler(cm);
- break;
- case CM_EVENT_BATT_IN:
- case CM_EVENT_EXT_PWR_IN_OUT ... CM_EVENT_CHG_START_STOP:
- misc_event_handler(cm, type);
- break;
- case CM_EVENT_UNKNOWN:
- case CM_EVENT_OTHERS:
- uevent_notify(cm, msg ? msg : default_event_names[type]);
- break;
- default:
- dev_err(cm->dev, "%s: type not specified\n", __func__);
- break;
- }
-}
-EXPORT_SYMBOL_GPL(cm_notify_event);
-
MODULE_AUTHOR("MyungJoo Ham <myungjoo.ham@samsung.com>");
MODULE_DESCRIPTION("Charger Manager");
MODULE_LICENSE("GPL");
diff --git a/drivers/power/supply/cpcap-battery.c b/drivers/power/supply/cpcap-battery.c
index 90eba364664b..295611b3b15e 100644
--- a/drivers/power/supply/cpcap-battery.c
+++ b/drivers/power/supply/cpcap-battery.c
@@ -747,11 +747,8 @@ static int cpcap_battery_init_iio(struct cpcap_battery_ddata *ddata)
return 0;
out_err:
- if (error != -EPROBE_DEFER)
- dev_err(ddata->dev, "could not initialize VBUS or ID IIO: %i\n",
- error);
-
- return error;
+ return dev_err_probe(ddata->dev, error,
+ "could not initialize VBUS or ID IIO\n");
}
/* Calibrate coulomb counter */
diff --git a/drivers/power/supply/ds2780_battery.c b/drivers/power/supply/ds2780_battery.c
index db3a25404c9f..dd57a472e878 100644
--- a/drivers/power/supply/ds2780_battery.c
+++ b/drivers/power/supply/ds2780_battery.c
@@ -160,7 +160,7 @@ static int ds2780_get_voltage(struct ds2780_device_info *dev_info,
/*
* The voltage value is located in 10 bits across the voltage MSB
- * and LSB registers in two's compliment form
+ * and LSB registers in two's complement form
* Sign bit of the voltage value is in bit 7 of the voltage MSB register
* Bits 9 - 3 of the voltage value are in bits 6 - 0 of the
* voltage MSB register
@@ -188,7 +188,7 @@ static int ds2780_get_temperature(struct ds2780_device_info *dev_info,
/*
* The temperature value is located in 10 bits across the temperature
- * MSB and LSB registers in two's compliment form
+ * MSB and LSB registers in two's complement form
* Sign bit of the temperature value is in bit 7 of the temperature
* MSB register
* Bits 9 - 3 of the temperature value are in bits 6 - 0 of the
@@ -241,7 +241,7 @@ static int ds2780_get_current(struct ds2780_device_info *dev_info,
/*
* The current value is located in 16 bits across the current MSB
- * and LSB registers in two's compliment form
+ * and LSB registers in two's complement form
* Sign bit of the current value is in bit 7 of the current MSB register
* Bits 14 - 8 of the current value are in bits 6 - 0 of the current
* MSB register
diff --git a/drivers/power/supply/ds2781_battery.c b/drivers/power/supply/ds2781_battery.c
index 130cbdfc14eb..3df3c820b38c 100644
--- a/drivers/power/supply/ds2781_battery.c
+++ b/drivers/power/supply/ds2781_battery.c
@@ -168,7 +168,7 @@ static int ds2781_get_voltage(struct ds2781_device_info *dev_info,
return ret;
/*
* The voltage value is located in 10 bits across the voltage MSB
- * and LSB registers in two's compliment form
+ * and LSB registers in two's complement form
* Sign bit of the voltage value is in bit 7 of the voltage MSB register
* Bits 9 - 3 of the voltage value are in bits 6 - 0 of the
* voltage MSB register
@@ -197,7 +197,7 @@ static int ds2781_get_temperature(struct ds2781_device_info *dev_info,
return ret;
/*
* The temperature value is located in 10 bits across the temperature
- * MSB and LSB registers in two's compliment form
+ * MSB and LSB registers in two's complement form
* Sign bit of the temperature value is in bit 7 of the temperature
* MSB register
* Bits 9 - 3 of the temperature value are in bits 6 - 0 of the
@@ -242,7 +242,7 @@ static int ds2781_get_current(struct ds2781_device_info *dev_info,
/*
* The current value is located in 16 bits across the current MSB
- * and LSB registers in two's compliment form
+ * and LSB registers in two's complement form
* Sign bit of the current value is in bit 7 of the current MSB register
* Bits 14 - 8 of the current value are in bits 6 - 0 of the current
* MSB register
diff --git a/drivers/power/supply/goldfish_battery.c b/drivers/power/supply/goldfish_battery.c
index c2644a9fe80f..bf1754355c9f 100644
--- a/drivers/power/supply/goldfish_battery.c
+++ b/drivers/power/supply/goldfish_battery.c
@@ -266,11 +266,13 @@ static const struct of_device_id goldfish_battery_of_match[] = {
};
MODULE_DEVICE_TABLE(of, goldfish_battery_of_match);
+#ifdef CONFIG_ACPI
static const struct acpi_device_id goldfish_battery_acpi_match[] = {
{ "GFSH0001", 0 },
{ },
};
MODULE_DEVICE_TABLE(acpi, goldfish_battery_acpi_match);
+#endif
static struct platform_driver goldfish_battery_device = {
.probe = goldfish_battery_probe,
diff --git a/drivers/power/supply/gpio-charger.c b/drivers/power/supply/gpio-charger.c
index 875735d50716..68212b39785b 100644
--- a/drivers/power/supply/gpio-charger.c
+++ b/drivers/power/supply/gpio-charger.c
@@ -5,7 +5,6 @@
*/
#include <linux/device.h>
-#include <linux/gpio.h> /* For legacy platform data */
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/kernel.h>
@@ -18,7 +17,13 @@
#include <linux/power/gpio-charger.h>
+struct gpio_mapping {
+ u32 limit_ua;
+ u32 gpiodata;
+} __packed;
+
struct gpio_charger {
+ struct device *dev;
unsigned int irq;
unsigned int charge_status_irq;
bool wakeup_enabled;
@@ -27,6 +32,11 @@ struct gpio_charger {
struct power_supply_desc charger_desc;
struct gpio_desc *gpiod;
struct gpio_desc *charge_status;
+
+ struct gpio_descs *current_limit_gpios;
+ struct gpio_mapping *current_limit_map;
+ u32 current_limit_map_size;
+ u32 charge_current_limit;
};
static irqreturn_t gpio_charger_irq(int irq, void *devid)
@@ -43,6 +53,35 @@ static inline struct gpio_charger *psy_to_gpio_charger(struct power_supply *psy)
return power_supply_get_drvdata(psy);
}
+static int set_charge_current_limit(struct gpio_charger *gpio_charger, int val)
+{
+ struct gpio_mapping mapping;
+ int ndescs = gpio_charger->current_limit_gpios->ndescs;
+ struct gpio_desc **gpios = gpio_charger->current_limit_gpios->desc;
+ int i;
+
+ if (!gpio_charger->current_limit_map_size)
+ return -EINVAL;
+
+ for (i = 0; i < gpio_charger->current_limit_map_size; i++) {
+ if (gpio_charger->current_limit_map[i].limit_ua <= val)
+ break;
+ }
+ mapping = gpio_charger->current_limit_map[i];
+
+ for (i = 0; i < ndescs; i++) {
+ bool val = (mapping.gpiodata >> i) & 1;
+ gpiod_set_value_cansleep(gpios[ndescs-i-1], val);
+ }
+
+ gpio_charger->charge_current_limit = mapping.limit_ua;
+
+ dev_dbg(gpio_charger->dev, "set charge current limit to %d (requested: %d)\n",
+ gpio_charger->charge_current_limit, val);
+
+ return 0;
+}
+
static int gpio_charger_get_property(struct power_supply *psy,
enum power_supply_property psp, union power_supply_propval *val)
{
@@ -58,6 +97,24 @@ static int gpio_charger_get_property(struct power_supply *psy,
else
val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
break;
+ case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX:
+ val->intval = gpio_charger->charge_current_limit;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int gpio_charger_set_property(struct power_supply *psy,
+ enum power_supply_property psp, const union power_supply_propval *val)
+{
+ struct gpio_charger *gpio_charger = psy_to_gpio_charger(psy);
+
+ switch (psp) {
+ case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX:
+ return set_charge_current_limit(gpio_charger, val->intval);
default:
return -EINVAL;
}
@@ -65,6 +122,19 @@ static int gpio_charger_get_property(struct power_supply *psy,
return 0;
}
+static int gpio_charger_property_is_writeable(struct power_supply *psy,
+ enum power_supply_property psp)
+{
+ switch (psp) {
+ case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX:
+ return 1;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
static enum power_supply_type gpio_charger_get_type(struct device *dev)
{
const char *chargetype;
@@ -112,6 +182,61 @@ static int gpio_charger_get_irq(struct device *dev, void *dev_id,
return irq;
}
+static int init_charge_current_limit(struct device *dev,
+ struct gpio_charger *gpio_charger)
+{
+ int i, len;
+ u32 cur_limit = U32_MAX;
+
+ gpio_charger->current_limit_gpios = devm_gpiod_get_array_optional(dev,
+ "charge-current-limit", GPIOD_OUT_LOW);
+ if (IS_ERR(gpio_charger->current_limit_gpios)) {
+ dev_err(dev, "error getting current-limit GPIOs\n");
+ return PTR_ERR(gpio_charger->current_limit_gpios);
+ }
+
+ if (!gpio_charger->current_limit_gpios)
+ return 0;
+
+ len = device_property_read_u32_array(dev, "charge-current-limit-mapping",
+ NULL, 0);
+ if (len < 0)
+ return len;
+
+ if (len == 0 || len % 2) {
+ dev_err(dev, "invalid charge-current-limit-mapping length\n");
+ return -EINVAL;
+ }
+
+ gpio_charger->current_limit_map = devm_kmalloc_array(dev,
+ len / 2, sizeof(*gpio_charger->current_limit_map), GFP_KERNEL);
+ if (!gpio_charger->current_limit_map)
+ return -ENOMEM;
+
+ gpio_charger->current_limit_map_size = len / 2;
+
+ len = device_property_read_u32_array(dev, "charge-current-limit-mapping",
+ (u32*) gpio_charger->current_limit_map, len);
+ if (len < 0)
+ return len;
+
+ for (i=0; i < gpio_charger->current_limit_map_size; i++) {
+ if (gpio_charger->current_limit_map[i].limit_ua > cur_limit) {
+ dev_err(dev, "charge-current-limit-mapping not sorted by current in descending order\n");
+ return -EINVAL;
+ }
+
+ cur_limit = gpio_charger->current_limit_map[i].limit_ua;
+ }
+
+ /* default to smallest current limitation for safety reasons */
+ len = gpio_charger->current_limit_map_size - 1;
+ set_charge_current_limit(gpio_charger,
+ gpio_charger->current_limit_map[len].limit_ua);
+
+ return 0;
+}
+
/*
* The entries will be overwritten by driver's probe routine depending
* on the available features. This list ensures, that the array is big
@@ -120,6 +245,7 @@ static int gpio_charger_get_irq(struct device *dev, void *dev_id,
static enum power_supply_property gpio_charger_properties[] = {
POWER_SUPPLY_PROP_ONLINE,
POWER_SUPPLY_PROP_STATUS,
+ POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX,
};
static int gpio_charger_probe(struct platform_device *pdev)
@@ -131,7 +257,6 @@ static int gpio_charger_probe(struct platform_device *pdev)
struct power_supply_desc *charger_desc;
struct gpio_desc *charge_status;
int charge_status_irq;
- unsigned long flags;
int ret;
int num_props = 0;
@@ -143,40 +268,17 @@ static int gpio_charger_probe(struct platform_device *pdev)
gpio_charger = devm_kzalloc(dev, sizeof(*gpio_charger), GFP_KERNEL);
if (!gpio_charger)
return -ENOMEM;
+ gpio_charger->dev = dev;
/*
* This will fetch a GPIO descriptor from device tree, ACPI or
* boardfile descriptor tables. It's good to try this first.
*/
gpio_charger->gpiod = devm_gpiod_get_optional(dev, NULL, GPIOD_IN);
-
- /*
- * Fallback to legacy platform data method, if no GPIO is specified
- * using boardfile descriptor tables.
- */
- if (!gpio_charger->gpiod && pdata) {
- /* Non-DT: use legacy GPIO numbers */
- if (!gpio_is_valid(pdata->gpio)) {
- dev_err(dev, "Invalid gpio pin in pdata\n");
- return -EINVAL;
- }
- flags = GPIOF_IN;
- if (pdata->gpio_active_low)
- flags |= GPIOF_ACTIVE_LOW;
- ret = devm_gpio_request_one(dev, pdata->gpio, flags,
- dev_name(dev));
- if (ret) {
- dev_err(dev, "Failed to request gpio pin: %d\n", ret);
- return ret;
- }
- /* Then convert this to gpiod for now */
- gpio_charger->gpiod = gpio_to_desc(pdata->gpio);
- } else if (IS_ERR(gpio_charger->gpiod)) {
+ if (IS_ERR(gpio_charger->gpiod)) {
/* Just try again if this happens */
- if (PTR_ERR(gpio_charger->gpiod) == -EPROBE_DEFER)
- return -EPROBE_DEFER;
- dev_err(dev, "error getting GPIO descriptor\n");
- return PTR_ERR(gpio_charger->gpiod);
+ return dev_err_probe(dev, PTR_ERR(gpio_charger->gpiod),
+ "error getting GPIO descriptor\n");
}
if (gpio_charger->gpiod) {
@@ -193,10 +295,22 @@ static int gpio_charger_probe(struct platform_device *pdev)
num_props++;
}
+ ret = init_charge_current_limit(dev, gpio_charger);
+ if (ret < 0)
+ return ret;
+ if (gpio_charger->current_limit_map) {
+ gpio_charger_properties[num_props] =
+ POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX;
+ num_props++;
+ }
+
charger_desc = &gpio_charger->charger_desc;
charger_desc->properties = gpio_charger_properties;
charger_desc->num_properties = num_props;
charger_desc->get_property = gpio_charger_get_property;
+ charger_desc->set_property = gpio_charger_set_property;
+ charger_desc->property_is_writeable =
+ gpio_charger_property_is_writeable;
psy_cfg.of_node = dev->of_node;
psy_cfg.drv_data = gpio_charger;
diff --git a/drivers/power/supply/ingenic-battery.c b/drivers/power/supply/ingenic-battery.c
index dd3d93dfe3eb..32dc77fd9a95 100644
--- a/drivers/power/supply/ingenic-battery.c
+++ b/drivers/power/supply/ingenic-battery.c
@@ -147,11 +147,9 @@ static int ingenic_battery_probe(struct platform_device *pdev)
psy_cfg.of_node = dev->of_node;
bat->battery = devm_power_supply_register(dev, desc, &psy_cfg);
- if (IS_ERR(bat->battery)) {
- if (PTR_ERR(bat->battery) != -EPROBE_DEFER)
- dev_err(dev, "Unable to register battery\n");
- return PTR_ERR(bat->battery);
- }
+ if (IS_ERR(bat->battery))
+ return dev_err_probe(dev, PTR_ERR(bat->battery),
+ "Unable to register battery\n");
ret = power_supply_get_battery_info(bat->battery, &bat->info);
if (ret) {
diff --git a/drivers/power/supply/lego_ev3_battery.c b/drivers/power/supply/lego_ev3_battery.c
index 1ae3710909b7..ccb00be38e2c 100644
--- a/drivers/power/supply/lego_ev3_battery.c
+++ b/drivers/power/supply/lego_ev3_battery.c
@@ -166,27 +166,21 @@ static int lego_ev3_battery_probe(struct platform_device *pdev)
batt->iio_v = devm_iio_channel_get(dev, "voltage");
err = PTR_ERR_OR_ZERO(batt->iio_v);
- if (err) {
- if (err != -EPROBE_DEFER)
- dev_err(dev, "Failed to get voltage iio channel\n");
- return err;
- }
+ if (err)
+ return dev_err_probe(dev, err,
+ "Failed to get voltage iio channel\n");
batt->iio_i = devm_iio_channel_get(dev, "current");
err = PTR_ERR_OR_ZERO(batt->iio_i);
- if (err) {
- if (err != -EPROBE_DEFER)
- dev_err(dev, "Failed to get current iio channel\n");
- return err;
- }
+ if (err)
+ return dev_err_probe(dev, err,
+ "Failed to get current iio channel\n");
batt->rechargeable_gpio = devm_gpiod_get(dev, "rechargeable", GPIOD_IN);
err = PTR_ERR_OR_ZERO(batt->rechargeable_gpio);
- if (err) {
- if (err != -EPROBE_DEFER)
- dev_err(dev, "Failed to get rechargeable gpio\n");
- return err;
- }
+ if (err)
+ return dev_err_probe(dev, err,
+ "Failed to get rechargeable gpio\n");
/*
* The rechargeable battery indication switch cannot be changed without
diff --git a/drivers/power/supply/ltc2941-battery-gauge.c b/drivers/power/supply/ltc2941-battery-gauge.c
index 30a9014b2f95..10cd617516ec 100644
--- a/drivers/power/supply/ltc2941-battery-gauge.c
+++ b/drivers/power/supply/ltc2941-battery-gauge.c
@@ -473,7 +473,8 @@ static int ltc294x_i2c_probe(struct i2c_client *client,
np = of_node_get(client->dev.of_node);
- info->id = (enum ltc294x_id)of_device_get_match_data(&client->dev);
+ info->id = (enum ltc294x_id) (uintptr_t) of_device_get_match_data(
+ &client->dev);
info->supply_desc.name = np->name;
/* r_sense can be negative, when sense+ is connected to the battery
diff --git a/drivers/power/supply/max17040_battery.c b/drivers/power/supply/max17040_battery.c
index 6cb31b9a958d..d956c67d5155 100644
--- a/drivers/power/supply/max17040_battery.c
+++ b/drivers/power/supply/max17040_battery.c
@@ -15,196 +15,289 @@
#include <linux/delay.h>
#include <linux/interrupt.h>
#include <linux/power_supply.h>
+#include <linux/of_device.h>
#include <linux/max17040_battery.h>
+#include <linux/regmap.h>
#include <linux/slab.h>
#define MAX17040_VCELL 0x02
#define MAX17040_SOC 0x04
#define MAX17040_MODE 0x06
#define MAX17040_VER 0x08
-#define MAX17040_RCOMP 0x0C
+#define MAX17040_CONFIG 0x0C
+#define MAX17040_STATUS 0x1A
#define MAX17040_CMD 0xFE
#define MAX17040_DELAY 1000
#define MAX17040_BATTERY_FULL 95
+#define MAX17040_RCOMP_DEFAULT 0x9700
-#define MAX17040_ATHD_MASK 0xFFC0
+#define MAX17040_ATHD_MASK 0x3f
+#define MAX17040_ALSC_MASK 0x40
#define MAX17040_ATHD_DEFAULT_POWER_UP 4
+#define MAX17040_STATUS_HD_MASK 0x1000
+#define MAX17040_STATUS_SC_MASK 0x2000
+#define MAX17040_CFG_RCOMP_MASK 0xff00
+
+enum chip_id {
+ ID_MAX17040,
+ ID_MAX17041,
+ ID_MAX17043,
+ ID_MAX17044,
+ ID_MAX17048,
+ ID_MAX17049,
+ ID_MAX17058,
+ ID_MAX17059,
+};
+
+/* values that differ by chip_id */
+struct chip_data {
+ u16 reset_val;
+ u16 vcell_shift;
+ u16 vcell_mul;
+ u16 vcell_div;
+ u8 has_low_soc_alert;
+ u8 rcomp_bytes;
+ u8 has_soc_alert;
+};
+
+static struct chip_data max17040_family[] = {
+ [ID_MAX17040] = {
+ .reset_val = 0x0054,
+ .vcell_shift = 4,
+ .vcell_mul = 1250,
+ .vcell_div = 1,
+ .has_low_soc_alert = 0,
+ .rcomp_bytes = 2,
+ .has_soc_alert = 0,
+ },
+ [ID_MAX17041] = {
+ .reset_val = 0x0054,
+ .vcell_shift = 4,
+ .vcell_mul = 2500,
+ .vcell_div = 1,
+ .has_low_soc_alert = 0,
+ .rcomp_bytes = 2,
+ .has_soc_alert = 0,
+ },
+ [ID_MAX17043] = {
+ .reset_val = 0x0054,
+ .vcell_shift = 4,
+ .vcell_mul = 1250,
+ .vcell_div = 1,
+ .has_low_soc_alert = 1,
+ .rcomp_bytes = 1,
+ .has_soc_alert = 0,
+ },
+ [ID_MAX17044] = {
+ .reset_val = 0x0054,
+ .vcell_shift = 4,
+ .vcell_mul = 2500,
+ .vcell_div = 1,
+ .has_low_soc_alert = 1,
+ .rcomp_bytes = 1,
+ .has_soc_alert = 0,
+ },
+ [ID_MAX17048] = {
+ .reset_val = 0x5400,
+ .vcell_shift = 0,
+ .vcell_mul = 625,
+ .vcell_div = 8,
+ .has_low_soc_alert = 1,
+ .rcomp_bytes = 1,
+ .has_soc_alert = 1,
+ },
+ [ID_MAX17049] = {
+ .reset_val = 0x5400,
+ .vcell_shift = 0,
+ .vcell_mul = 625,
+ .vcell_div = 4,
+ .has_low_soc_alert = 1,
+ .rcomp_bytes = 1,
+ .has_soc_alert = 1,
+ },
+ [ID_MAX17058] = {
+ .reset_val = 0x5400,
+ .vcell_shift = 0,
+ .vcell_mul = 625,
+ .vcell_div = 8,
+ .has_low_soc_alert = 1,
+ .rcomp_bytes = 1,
+ .has_soc_alert = 0,
+ },
+ [ID_MAX17059] = {
+ .reset_val = 0x5400,
+ .vcell_shift = 0,
+ .vcell_mul = 625,
+ .vcell_div = 4,
+ .has_low_soc_alert = 1,
+ .rcomp_bytes = 1,
+ .has_soc_alert = 0,
+ },
+};
struct max17040_chip {
struct i2c_client *client;
+ struct regmap *regmap;
struct delayed_work work;
struct power_supply *battery;
struct max17040_platform_data *pdata;
+ struct chip_data data;
- /* State Of Connect */
- int online;
- /* battery voltage */
- int vcell;
/* battery capacity */
int soc;
/* State Of Charge */
int status;
/* Low alert threshold from 32% to 1% of the State of Charge */
u32 low_soc_alert;
+ /* some devices return twice the capacity */
+ bool quirk_double_soc;
+ /* higher 8 bits for 17043+, 16 bits for 17040,41 */
+ u16 rcomp;
};
-static int max17040_get_property(struct power_supply *psy,
- enum power_supply_property psp,
- union power_supply_propval *val)
+static int max17040_reset(struct max17040_chip *chip)
{
- struct max17040_chip *chip = power_supply_get_drvdata(psy);
-
- switch (psp) {
- case POWER_SUPPLY_PROP_STATUS:
- val->intval = chip->status;
- break;
- case POWER_SUPPLY_PROP_ONLINE:
- val->intval = chip->online;
- break;
- case POWER_SUPPLY_PROP_VOLTAGE_NOW:
- val->intval = chip->vcell;
- break;
- case POWER_SUPPLY_PROP_CAPACITY:
- val->intval = chip->soc;
- break;
- case POWER_SUPPLY_PROP_CAPACITY_ALERT_MIN:
- val->intval = chip->low_soc_alert;
- break;
- default:
- return -EINVAL;
- }
- return 0;
+ return regmap_write(chip->regmap, MAX17040_CMD, chip->data.reset_val);
}
-static int max17040_write_reg(struct i2c_client *client, int reg, u16 value)
+static int max17040_set_low_soc_alert(struct max17040_chip *chip, u32 level)
{
- int ret;
-
- ret = i2c_smbus_write_word_swapped(client, reg, value);
-
- if (ret < 0)
- dev_err(&client->dev, "%s: err %d\n", __func__, ret);
-
- return ret;
+ level = 32 - level * (chip->quirk_double_soc ? 2 : 1);
+ return regmap_update_bits(chip->regmap, MAX17040_CONFIG,
+ MAX17040_ATHD_MASK, level);
}
-static int max17040_read_reg(struct i2c_client *client, int reg)
+static int max17040_set_soc_alert(struct max17040_chip *chip, bool enable)
{
- int ret;
-
- ret = i2c_smbus_read_word_swapped(client, reg);
-
- if (ret < 0)
- dev_err(&client->dev, "%s: err %d\n", __func__, ret);
-
- return ret;
+ return regmap_update_bits(chip->regmap, MAX17040_CONFIG,
+ MAX17040_ALSC_MASK, enable ? MAX17040_ALSC_MASK : 0);
}
-static void max17040_reset(struct i2c_client *client)
+static int max17040_set_rcomp(struct max17040_chip *chip, u16 rcomp)
{
- max17040_write_reg(client, MAX17040_CMD, 0x0054);
+ u16 mask = chip->data.rcomp_bytes == 2 ?
+ 0xffff : MAX17040_CFG_RCOMP_MASK;
+
+ return regmap_update_bits(chip->regmap, MAX17040_CONFIG, mask, rcomp);
}
-static int max17040_set_low_soc_alert(struct i2c_client *client, u32 level)
+static int max17040_raw_vcell_to_uvolts(struct max17040_chip *chip, u16 vcell)
{
- int ret;
- u16 data;
+ struct chip_data *d = &chip->data;
- level = 32 - level;
- data = max17040_read_reg(client, MAX17040_RCOMP);
- /* clear the alrt bit and set LSb 5 bits */
- data &= MAX17040_ATHD_MASK;
- data |= level;
- ret = max17040_write_reg(client, MAX17040_RCOMP, data);
-
- return ret;
+ return (vcell >> d->vcell_shift) * d->vcell_mul / d->vcell_div;
}
-static void max17040_get_vcell(struct i2c_client *client)
+
+static int max17040_get_vcell(struct max17040_chip *chip)
{
- struct max17040_chip *chip = i2c_get_clientdata(client);
- u16 vcell;
+ u32 vcell;
- vcell = max17040_read_reg(client, MAX17040_VCELL);
+ regmap_read(chip->regmap, MAX17040_VCELL, &vcell);
- chip->vcell = (vcell >> 4) * 1250;
+ return max17040_raw_vcell_to_uvolts(chip, vcell);
}
-static void max17040_get_soc(struct i2c_client *client)
+static int max17040_get_soc(struct max17040_chip *chip)
{
- struct max17040_chip *chip = i2c_get_clientdata(client);
- u16 soc;
+ u32 soc;
- soc = max17040_read_reg(client, MAX17040_SOC);
+ regmap_read(chip->regmap, MAX17040_SOC, &soc);
- chip->soc = (soc >> 8);
+ return soc >> (chip->quirk_double_soc ? 9 : 8);
}
-static void max17040_get_version(struct i2c_client *client)
+static int max17040_get_version(struct max17040_chip *chip)
{
- u16 version;
+ int ret;
+ u32 version;
- version = max17040_read_reg(client, MAX17040_VER);
+ ret = regmap_read(chip->regmap, MAX17040_VER, &version);
- dev_info(&client->dev, "MAX17040 Fuel-Gauge Ver 0x%x\n", version);
+ return ret ? ret : version;
}
-static void max17040_get_online(struct i2c_client *client)
+static int max17040_get_online(struct max17040_chip *chip)
{
- struct max17040_chip *chip = i2c_get_clientdata(client);
-
- if (chip->pdata && chip->pdata->battery_online)
- chip->online = chip->pdata->battery_online();
- else
- chip->online = 1;
+ return chip->pdata && chip->pdata->battery_online ?
+ chip->pdata->battery_online() : 1;
}
-static void max17040_get_status(struct i2c_client *client)
+static int max17040_get_status(struct max17040_chip *chip)
{
- struct max17040_chip *chip = i2c_get_clientdata(client);
-
if (!chip->pdata || !chip->pdata->charger_online
- || !chip->pdata->charger_enable) {
- chip->status = POWER_SUPPLY_STATUS_UNKNOWN;
- return;
- }
+ || !chip->pdata->charger_enable)
+ return POWER_SUPPLY_STATUS_UNKNOWN;
- if (chip->pdata->charger_online()) {
+ if (max17040_get_soc(chip) > MAX17040_BATTERY_FULL)
+ return POWER_SUPPLY_STATUS_FULL;
+
+ if (chip->pdata->charger_online())
if (chip->pdata->charger_enable())
- chip->status = POWER_SUPPLY_STATUS_CHARGING;
+ return POWER_SUPPLY_STATUS_CHARGING;
else
- chip->status = POWER_SUPPLY_STATUS_NOT_CHARGING;
- } else {
- chip->status = POWER_SUPPLY_STATUS_DISCHARGING;
- }
-
- if (chip->soc > MAX17040_BATTERY_FULL)
- chip->status = POWER_SUPPLY_STATUS_FULL;
+ return POWER_SUPPLY_STATUS_NOT_CHARGING;
+ else
+ return POWER_SUPPLY_STATUS_DISCHARGING;
}
static int max17040_get_of_data(struct max17040_chip *chip)
{
struct device *dev = &chip->client->dev;
+ struct chip_data *data = &max17040_family[
+ (uintptr_t) of_device_get_match_data(dev)];
+ int rcomp_len;
+ u8 rcomp[2];
+
+ chip->quirk_double_soc = device_property_read_bool(dev,
+ "maxim,double-soc");
chip->low_soc_alert = MAX17040_ATHD_DEFAULT_POWER_UP;
device_property_read_u32(dev,
"maxim,alert-low-soc-level",
&chip->low_soc_alert);
- if (chip->low_soc_alert <= 0 || chip->low_soc_alert >= 33)
+ if (chip->low_soc_alert <= 0 ||
+ chip->low_soc_alert > (chip->quirk_double_soc ? 16 : 32)) {
+ dev_err(dev, "maxim,alert-low-soc-level out of bounds\n");
return -EINVAL;
+ }
+
+ rcomp_len = device_property_count_u8(dev, "maxim,rcomp");
+ chip->rcomp = MAX17040_RCOMP_DEFAULT;
+ if (rcomp_len == data->rcomp_bytes) {
+ device_property_read_u8_array(dev, "maxim,rcomp",
+ rcomp, rcomp_len);
+ chip->rcomp = rcomp_len == 2 ?
+ rcomp[0] << 8 | rcomp[1] :
+ rcomp[0] << 8;
+ } else if (rcomp_len > 0) {
+ dev_err(dev, "maxim,rcomp has incorrect length\n");
+ return -EINVAL;
+ }
return 0;
}
-static void max17040_check_changes(struct i2c_client *client)
+static void max17040_check_changes(struct max17040_chip *chip)
{
- max17040_get_vcell(client);
- max17040_get_soc(client);
- max17040_get_online(client);
- max17040_get_status(client);
+ chip->soc = max17040_get_soc(chip);
+ chip->status = max17040_get_status(chip);
+}
+
+static void max17040_queue_work(struct max17040_chip *chip)
+{
+ queue_delayed_work(system_power_efficient_wq, &chip->work,
+ MAX17040_DELAY);
+}
+
+static void max17040_stop_work(void *data)
+{
+ struct max17040_chip *chip = data;
+
+ cancel_delayed_work_sync(&chip->work);
}
static void max17040_work(struct work_struct *work)
@@ -217,30 +310,51 @@ static void max17040_work(struct work_struct *work)
/* store SOC and status to check changes */
last_soc = chip->soc;
last_status = chip->status;
- max17040_check_changes(chip->client);
+ max17040_check_changes(chip);
/* check changes and send uevent */
if (last_soc != chip->soc || last_status != chip->status)
power_supply_changed(chip->battery);
- queue_delayed_work(system_power_efficient_wq, &chip->work,
- MAX17040_DELAY);
+ max17040_queue_work(chip);
+}
+
+/* Returns true if alert cause was SOC change, not low SOC */
+static bool max17040_handle_soc_alert(struct max17040_chip *chip)
+{
+ bool ret = true;
+ u32 data;
+
+ regmap_read(chip->regmap, MAX17040_STATUS, &data);
+
+ if (data & MAX17040_STATUS_HD_MASK) {
+ // this alert was caused by low soc
+ ret = false;
+ }
+ if (data & MAX17040_STATUS_SC_MASK) {
+ // soc change bit -- deassert to mark as handled
+ regmap_write(chip->regmap, MAX17040_STATUS,
+ data & ~MAX17040_STATUS_SC_MASK);
+ }
+
+ return ret;
}
static irqreturn_t max17040_thread_handler(int id, void *dev)
{
struct max17040_chip *chip = dev;
- struct i2c_client *client = chip->client;
- dev_warn(&client->dev, "IRQ: Alert battery low level");
+ if (!(chip->data.has_soc_alert && max17040_handle_soc_alert(chip)))
+ dev_warn(&chip->client->dev, "IRQ: Alert battery low level\n");
+
/* read registers */
- max17040_check_changes(chip->client);
+ max17040_check_changes(chip);
/* send uevent */
power_supply_changed(chip->battery);
/* reset alert bit */
- max17040_set_low_soc_alert(client, chip->low_soc_alert);
+ max17040_set_low_soc_alert(chip, chip->low_soc_alert);
return IRQ_HANDLED;
}
@@ -279,12 +393,13 @@ static int max17040_set_property(struct power_supply *psy,
switch (psp) {
case POWER_SUPPLY_PROP_CAPACITY_ALERT_MIN:
- /* alert threshold can be programmed from 1% up to 32% */
- if ((val->intval < 1) || (val->intval > 32)) {
+ /* alert threshold can be programmed from 1% up to 16/32% */
+ if ((val->intval < 1) ||
+ (val->intval > (chip->quirk_double_soc ? 16 : 32))) {
ret = -EINVAL;
break;
}
- ret = max17040_set_low_soc_alert(chip->client, val->intval);
+ ret = max17040_set_low_soc_alert(chip, val->intval);
chip->low_soc_alert = val->intval;
break;
default:
@@ -294,6 +409,41 @@ static int max17040_set_property(struct power_supply *psy,
return ret;
}
+static int max17040_get_property(struct power_supply *psy,
+ enum power_supply_property psp,
+ union power_supply_propval *val)
+{
+ struct max17040_chip *chip = power_supply_get_drvdata(psy);
+
+ switch (psp) {
+ case POWER_SUPPLY_PROP_STATUS:
+ val->intval = max17040_get_status(chip);
+ break;
+ case POWER_SUPPLY_PROP_ONLINE:
+ val->intval = max17040_get_online(chip);
+ break;
+ case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+ val->intval = max17040_get_vcell(chip);
+ break;
+ case POWER_SUPPLY_PROP_CAPACITY:
+ val->intval = max17040_get_soc(chip);
+ break;
+ case POWER_SUPPLY_PROP_CAPACITY_ALERT_MIN:
+ val->intval = chip->low_soc_alert;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static const struct regmap_config max17040_regmap = {
+ .reg_bits = 8,
+ .reg_stride = 2,
+ .val_bits = 16,
+ .val_format_endian = REGMAP_ENDIAN_BIG,
+};
+
static enum power_supply_property max17040_battery_props[] = {
POWER_SUPPLY_PROP_STATUS,
POWER_SUPPLY_PROP_ONLINE,
@@ -318,6 +468,8 @@ static int max17040_probe(struct i2c_client *client,
struct i2c_adapter *adapter = client->adapter;
struct power_supply_config psy_cfg = {};
struct max17040_chip *chip;
+ enum chip_id chip_id;
+ bool enable_irq = false;
int ret;
if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE))
@@ -328,37 +480,68 @@ static int max17040_probe(struct i2c_client *client,
return -ENOMEM;
chip->client = client;
+ chip->regmap = devm_regmap_init_i2c(client, &max17040_regmap);
chip->pdata = client->dev.platform_data;
- ret = max17040_get_of_data(chip);
- if (ret) {
- dev_err(&client->dev,
- "failed: low SOC alert OF data out of bounds\n");
- return ret;
+ chip_id = (enum chip_id) id->driver_data;
+ if (client->dev.of_node) {
+ ret = max17040_get_of_data(chip);
+ if (ret)
+ return ret;
+ chip_id = (enum chip_id) (uintptr_t)
+ of_device_get_match_data(&client->dev);
}
+ chip->data = max17040_family[chip_id];
i2c_set_clientdata(client, chip);
psy_cfg.drv_data = chip;
- chip->battery = power_supply_register(&client->dev,
+ chip->battery = devm_power_supply_register(&client->dev,
&max17040_battery_desc, &psy_cfg);
if (IS_ERR(chip->battery)) {
dev_err(&client->dev, "failed: power supply register\n");
return PTR_ERR(chip->battery);
}
- max17040_reset(client);
- max17040_get_version(client);
+ ret = max17040_get_version(chip);
+ if (ret < 0)
+ return ret;
+ dev_dbg(&chip->client->dev, "MAX17040 Fuel-Gauge Ver 0x%x\n", ret);
+
+ if (chip_id == ID_MAX17040 || chip_id == ID_MAX17041)
+ max17040_reset(chip);
+
+ max17040_set_rcomp(chip, chip->rcomp);
/* check interrupt */
- if (client->irq && of_device_is_compatible(client->dev.of_node,
- "maxim,max77836-battery")) {
- ret = max17040_set_low_soc_alert(client, chip->low_soc_alert);
+ if (client->irq && chip->data.has_low_soc_alert) {
+ ret = max17040_set_low_soc_alert(chip, chip->low_soc_alert);
if (ret) {
dev_err(&client->dev,
"Failed to set low SOC alert: err %d\n", ret);
return ret;
}
+ enable_irq = true;
+ }
+
+ if (client->irq && chip->data.has_soc_alert) {
+ ret = max17040_set_soc_alert(chip, 1);
+ if (ret) {
+ dev_err(&client->dev,
+ "Failed to set SOC alert: err %d\n", ret);
+ return ret;
+ }
+ enable_irq = true;
+ } else {
+ /* soc alerts negate the need for polling */
+ INIT_DEFERRABLE_WORK(&chip->work, max17040_work);
+ ret = devm_add_action(&client->dev, max17040_stop_work, chip);
+ if (ret)
+ return ret;
+ max17040_queue_work(chip);
+ }
+
+ if (enable_irq) {
ret = max17040_enable_alert_irq(chip);
if (ret) {
client->irq = 0;
@@ -367,19 +550,6 @@ static int max17040_probe(struct i2c_client *client,
}
}
- INIT_DEFERRABLE_WORK(&chip->work, max17040_work);
- queue_delayed_work(system_power_efficient_wq, &chip->work,
- MAX17040_DELAY);
-
- return 0;
-}
-
-static int max17040_remove(struct i2c_client *client)
-{
- struct max17040_chip *chip = i2c_get_clientdata(client);
-
- power_supply_unregister(chip->battery);
- cancel_delayed_work(&chip->work);
return 0;
}
@@ -390,7 +560,11 @@ static int max17040_suspend(struct device *dev)
struct i2c_client *client = to_i2c_client(dev);
struct max17040_chip *chip = i2c_get_clientdata(client);
- cancel_delayed_work(&chip->work);
+ if (client->irq && chip->data.has_soc_alert)
+ // disable soc alert to prevent wakeup
+ max17040_set_soc_alert(chip, 0);
+ else
+ cancel_delayed_work(&chip->work);
if (client->irq && device_may_wakeup(dev))
enable_irq_wake(client->irq);
@@ -403,12 +577,14 @@ static int max17040_resume(struct device *dev)
struct i2c_client *client = to_i2c_client(dev);
struct max17040_chip *chip = i2c_get_clientdata(client);
- queue_delayed_work(system_power_efficient_wq, &chip->work,
- MAX17040_DELAY);
-
if (client->irq && device_may_wakeup(dev))
disable_irq_wake(client->irq);
+ if (client->irq && chip->data.has_soc_alert)
+ max17040_set_soc_alert(chip, 1);
+ else
+ max17040_queue_work(chip);
+
return 0;
}
@@ -422,16 +598,30 @@ static SIMPLE_DEV_PM_OPS(max17040_pm_ops, max17040_suspend, max17040_resume);
#endif /* CONFIG_PM_SLEEP */
static const struct i2c_device_id max17040_id[] = {
- { "max17040" },
- { "max77836-battery" },
- { }
+ { "max17040", ID_MAX17040 },
+ { "max17041", ID_MAX17041 },
+ { "max17043", ID_MAX17043 },
+ { "max77836-battery", ID_MAX17043 },
+ { "max17044", ID_MAX17044 },
+ { "max17048", ID_MAX17048 },
+ { "max17049", ID_MAX17049 },
+ { "max17058", ID_MAX17058 },
+ { "max17059", ID_MAX17059 },
+ { /* sentinel */ }
};
MODULE_DEVICE_TABLE(i2c, max17040_id);
static const struct of_device_id max17040_of_match[] = {
- { .compatible = "maxim,max17040" },
- { .compatible = "maxim,max77836-battery" },
- { },
+ { .compatible = "maxim,max17040", .data = (void *) ID_MAX17040 },
+ { .compatible = "maxim,max17041", .data = (void *) ID_MAX17041 },
+ { .compatible = "maxim,max17043", .data = (void *) ID_MAX17043 },
+ { .compatible = "maxim,max77836-battery", .data = (void *) ID_MAX17043 },
+ { .compatible = "maxim,max17044", .data = (void *) ID_MAX17044 },
+ { .compatible = "maxim,max17048", .data = (void *) ID_MAX17048 },
+ { .compatible = "maxim,max17049", .data = (void *) ID_MAX17049 },
+ { .compatible = "maxim,max17058", .data = (void *) ID_MAX17058 },
+ { .compatible = "maxim,max17059", .data = (void *) ID_MAX17059 },
+ { /* sentinel */ },
};
MODULE_DEVICE_TABLE(of, max17040_of_match);
@@ -442,7 +632,6 @@ static struct i2c_driver max17040_i2c_driver = {
.pm = MAX17040_PM_OPS,
},
.probe = max17040_probe,
- .remove = max17040_remove,
.id_table = max17040_id,
};
module_i2c_driver(max17040_i2c_driver);
diff --git a/drivers/power/supply/pm2301_charger.c b/drivers/power/supply/pm2301_charger.c
index 17749fc90e16..2df6a2459d1f 100644
--- a/drivers/power/supply/pm2301_charger.c
+++ b/drivers/power/supply/pm2301_charger.c
@@ -104,11 +104,6 @@ static int pm2xxx_charger_current_map[] = {
3000,
};
-static const struct i2c_device_id pm2xxx_ident[] = {
- { "pm2301", 0 },
- { }
-};
-
static void set_lpn_pin(struct pm2xxx_charger *pm2)
{
if (!pm2->ac.charger_connected && gpio_is_valid(pm2->lpn_pin)) {
@@ -396,7 +391,7 @@ static int pm2_int_reg3(void *pm2_data, int val)
if (val & (PM2XXX_INT4_ITCHARGINGON)) {
dev_dbg(pm2->dev ,
- "chargind operation has started\n");
+ "charging operation has started\n");
}
if (val & (PM2XXX_INT4_ITVRESUME)) {
diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c
index ccbad435ed12..38e3aa642131 100644
--- a/drivers/power/supply/power_supply_core.c
+++ b/drivers/power/supply/power_supply_core.c
@@ -579,6 +579,12 @@ int power_supply_get_battery_info(struct power_supply *psy,
info->charge_term_current_ua = -EINVAL;
info->constant_charge_current_max_ua = -EINVAL;
info->constant_charge_voltage_max_uv = -EINVAL;
+ info->temp_ambient_alert_min = INT_MIN;
+ info->temp_ambient_alert_max = INT_MAX;
+ info->temp_alert_min = INT_MIN;
+ info->temp_alert_max = INT_MAX;
+ info->temp_min = INT_MIN;
+ info->temp_max = INT_MAX;
info->factory_internal_resistance_uohm = -EINVAL;
info->resist_table = NULL;
@@ -639,6 +645,19 @@ int power_supply_get_battery_info(struct power_supply *psy,
of_property_read_u32(battery_np, "factory-internal-resistance-micro-ohms",
&info->factory_internal_resistance_uohm);
+ of_property_read_u32_index(battery_np, "ambient-celsius",
+ 0, &info->temp_ambient_alert_min);
+ of_property_read_u32_index(battery_np, "ambient-celsius",
+ 1, &info->temp_ambient_alert_max);
+ of_property_read_u32_index(battery_np, "alert-celsius",
+ 0, &info->temp_alert_min);
+ of_property_read_u32_index(battery_np, "alert-celsius",
+ 1, &info->temp_alert_max);
+ of_property_read_u32_index(battery_np, "operating-range-celsius",
+ 0, &info->temp_min);
+ of_property_read_u32_index(battery_np, "operating-range-celsius",
+ 1, &info->temp_max);
+
len = of_property_count_u32_elems(battery_np, "ocv-capacity-celsius");
if (len < 0 && len != -EINVAL) {
err = len;
diff --git a/drivers/power/supply/power_supply_sysfs.c b/drivers/power/supply/power_supply_sysfs.c
index 3d383086018c..a616b9d8f43c 100644
--- a/drivers/power/supply/power_supply_sysfs.c
+++ b/drivers/power/supply/power_supply_sysfs.c
@@ -56,6 +56,7 @@ static const char * const POWER_SUPPLY_TYPE_TEXT[] = {
[POWER_SUPPLY_TYPE_USB_PD] = "USB_PD",
[POWER_SUPPLY_TYPE_USB_PD_DRP] = "USB_PD_DRP",
[POWER_SUPPLY_TYPE_APPLE_BRICK_ID] = "BrickID",
+ [POWER_SUPPLY_TYPE_WIRELESS] = "Wireless",
};
static const char * const POWER_SUPPLY_USB_TYPE_TEXT[] = {
diff --git a/drivers/power/supply/rn5t618_power.c b/drivers/power/supply/rn5t618_power.c
new file mode 100644
index 000000000000..dee520f0fdf5
--- /dev/null
+++ b/drivers/power/supply/rn5t618_power.c
@@ -0,0 +1,556 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Power supply driver for the RICOH RN5T618 power management chip family
+ *
+ * Copyright (C) 2020 Andreas Kemnade
+ */
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/bitops.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/mfd/rn5t618.h>
+#include <linux/platform_device.h>
+#include <linux/power_supply.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+#define CHG_STATE_ADP_INPUT 0x40
+#define CHG_STATE_USB_INPUT 0x80
+#define CHG_STATE_MASK 0x1f
+#define CHG_STATE_CHG_OFF 0
+#define CHG_STATE_CHG_READY_VADP 1
+#define CHG_STATE_CHG_TRICKLE 2
+#define CHG_STATE_CHG_RAPID 3
+#define CHG_STATE_CHG_COMPLETE 4
+#define CHG_STATE_SUSPEND 5
+#define CHG_STATE_VCHG_OVER_VOL 6
+#define CHG_STATE_BAT_ERROR 7
+#define CHG_STATE_NO_BAT 8
+#define CHG_STATE_BAT_OVER_VOL 9
+#define CHG_STATE_BAT_TEMP_ERR 10
+#define CHG_STATE_DIE_ERR 11
+#define CHG_STATE_DIE_SHUTDOWN 12
+#define CHG_STATE_NO_BAT2 13
+#define CHG_STATE_CHG_READY_VUSB 14
+
+#define FG_ENABLE 1
+
+struct rn5t618_power_info {
+ struct rn5t618 *rn5t618;
+ struct platform_device *pdev;
+ struct power_supply *battery;
+ struct power_supply *usb;
+ struct power_supply *adp;
+ int irq;
+};
+
+static enum power_supply_property rn5t618_usb_props[] = {
+ POWER_SUPPLY_PROP_STATUS,
+ POWER_SUPPLY_PROP_ONLINE,
+};
+
+static enum power_supply_property rn5t618_adp_props[] = {
+ POWER_SUPPLY_PROP_STATUS,
+ POWER_SUPPLY_PROP_ONLINE,
+};
+
+
+static enum power_supply_property rn5t618_battery_props[] = {
+ POWER_SUPPLY_PROP_STATUS,
+ POWER_SUPPLY_PROP_PRESENT,
+ POWER_SUPPLY_PROP_VOLTAGE_NOW,
+ POWER_SUPPLY_PROP_CURRENT_NOW,
+ POWER_SUPPLY_PROP_CAPACITY,
+ POWER_SUPPLY_PROP_TEMP,
+ POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW,
+ POWER_SUPPLY_PROP_TIME_TO_FULL_NOW,
+ POWER_SUPPLY_PROP_TECHNOLOGY,
+ POWER_SUPPLY_PROP_CHARGE_FULL,
+ POWER_SUPPLY_PROP_CHARGE_NOW,
+};
+
+static int rn5t618_battery_read_doublereg(struct rn5t618_power_info *info,
+ u8 reg, u16 *result)
+{
+ int ret, i;
+ u8 data[2];
+ u16 old, new;
+
+ old = 0;
+ /* Prevent races when registers are changing. */
+ for (i = 0; i < 3; i++) {
+ ret = regmap_bulk_read(info->rn5t618->regmap,
+ reg, data, sizeof(data));
+ if (ret)
+ return ret;
+
+ new = data[0] << 8;
+ new |= data[1];
+ if (new == old)
+ break;
+
+ old = new;
+ }
+
+ *result = new;
+
+ return 0;
+}
+
+static int rn5t618_decode_status(unsigned int status)
+{
+ switch (status & CHG_STATE_MASK) {
+ case CHG_STATE_CHG_OFF:
+ case CHG_STATE_SUSPEND:
+ case CHG_STATE_VCHG_OVER_VOL:
+ case CHG_STATE_DIE_SHUTDOWN:
+ return POWER_SUPPLY_STATUS_DISCHARGING;
+
+ case CHG_STATE_CHG_TRICKLE:
+ case CHG_STATE_CHG_RAPID:
+ return POWER_SUPPLY_STATUS_CHARGING;
+
+ case CHG_STATE_CHG_COMPLETE:
+ return POWER_SUPPLY_STATUS_FULL;
+
+ default:
+ return POWER_SUPPLY_STATUS_NOT_CHARGING;
+ }
+}
+
+static int rn5t618_battery_status(struct rn5t618_power_info *info,
+ union power_supply_propval *val)
+{
+ unsigned int v;
+ int ret;
+
+ ret = regmap_read(info->rn5t618->regmap, RN5T618_CHGSTATE, &v);
+ if (ret)
+ return ret;
+
+ val->intval = POWER_SUPPLY_STATUS_UNKNOWN;
+
+ if (v & 0xc0) { /* USB or ADP plugged */
+ val->intval = rn5t618_decode_status(v);
+ } else
+ val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
+
+ return ret;
+}
+
+static int rn5t618_battery_present(struct rn5t618_power_info *info,
+ union power_supply_propval *val)
+{
+ unsigned int v;
+ int ret;
+
+ ret = regmap_read(info->rn5t618->regmap, RN5T618_CHGSTATE, &v);
+ if (ret)
+ return ret;
+
+ v &= CHG_STATE_MASK;
+ if ((v == CHG_STATE_NO_BAT) || (v == CHG_STATE_NO_BAT2))
+ val->intval = 0;
+ else
+ val->intval = 1;
+
+ return ret;
+}
+
+static int rn5t618_battery_voltage_now(struct rn5t618_power_info *info,
+ union power_supply_propval *val)
+{
+ u16 res;
+ int ret;
+
+ ret = rn5t618_battery_read_doublereg(info, RN5T618_VOLTAGE_1, &res);
+ if (ret)
+ return ret;
+
+ val->intval = res * 2 * 2500 / 4095 * 1000;
+
+ return 0;
+}
+
+static int rn5t618_battery_current_now(struct rn5t618_power_info *info,
+ union power_supply_propval *val)
+{
+ u16 res;
+ int ret;
+
+ ret = rn5t618_battery_read_doublereg(info, RN5T618_CC_AVEREG1, &res);
+ if (ret)
+ return ret;
+
+ /* current is negative when discharging */
+ val->intval = sign_extend32(res, 13) * 1000;
+
+ return 0;
+}
+
+static int rn5t618_battery_capacity(struct rn5t618_power_info *info,
+ union power_supply_propval *val)
+{
+ unsigned int v;
+ int ret;
+
+ ret = regmap_read(info->rn5t618->regmap, RN5T618_SOC, &v);
+ if (ret)
+ return ret;
+
+ val->intval = v;
+
+ return 0;
+}
+
+static int rn5t618_battery_temp(struct rn5t618_power_info *info,
+ union power_supply_propval *val)
+{
+ u16 res;
+ int ret;
+
+ ret = rn5t618_battery_read_doublereg(info, RN5T618_TEMP_1, &res);
+ if (ret)
+ return ret;
+
+ val->intval = sign_extend32(res, 11) * 10 / 16;
+
+ return 0;
+}
+
+static int rn5t618_battery_tte(struct rn5t618_power_info *info,
+ union power_supply_propval *val)
+{
+ u16 res;
+ int ret;
+
+ ret = rn5t618_battery_read_doublereg(info, RN5T618_TT_EMPTY_H, &res);
+ if (ret)
+ return ret;
+
+ if (res == 65535)
+ return -ENODATA;
+
+ val->intval = res * 60;
+
+ return 0;
+}
+
+static int rn5t618_battery_ttf(struct rn5t618_power_info *info,
+ union power_supply_propval *val)
+{
+ u16 res;
+ int ret;
+
+ ret = rn5t618_battery_read_doublereg(info, RN5T618_TT_FULL_H, &res);
+ if (ret)
+ return ret;
+
+ if (res == 65535)
+ return -ENODATA;
+
+ val->intval = res * 60;
+
+ return 0;
+}
+
+static int rn5t618_battery_charge_full(struct rn5t618_power_info *info,
+ union power_supply_propval *val)
+{
+ u16 res;
+ int ret;
+
+ ret = rn5t618_battery_read_doublereg(info, RN5T618_FA_CAP_H, &res);
+ if (ret)
+ return ret;
+
+ val->intval = res * 1000;
+
+ return 0;
+}
+
+static int rn5t618_battery_charge_now(struct rn5t618_power_info *info,
+ union power_supply_propval *val)
+{
+ u16 res;
+ int ret;
+
+ ret = rn5t618_battery_read_doublereg(info, RN5T618_RE_CAP_H, &res);
+ if (ret)
+ return ret;
+
+ val->intval = res * 1000;
+
+ return 0;
+}
+
+static int rn5t618_battery_get_property(struct power_supply *psy,
+ enum power_supply_property psp,
+ union power_supply_propval *val)
+{
+ int ret = 0;
+ struct rn5t618_power_info *info = power_supply_get_drvdata(psy);
+
+ switch (psp) {
+ case POWER_SUPPLY_PROP_STATUS:
+ ret = rn5t618_battery_status(info, val);
+ break;
+ case POWER_SUPPLY_PROP_PRESENT:
+ ret = rn5t618_battery_present(info, val);
+ break;
+ case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+ ret = rn5t618_battery_voltage_now(info, val);
+ break;
+ case POWER_SUPPLY_PROP_CURRENT_NOW:
+ ret = rn5t618_battery_current_now(info, val);
+ break;
+ case POWER_SUPPLY_PROP_CAPACITY:
+ ret = rn5t618_battery_capacity(info, val);
+ break;
+ case POWER_SUPPLY_PROP_TEMP:
+ ret = rn5t618_battery_temp(info, val);
+ break;
+ case POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW:
+ ret = rn5t618_battery_tte(info, val);
+ break;
+ case POWER_SUPPLY_PROP_TIME_TO_FULL_NOW:
+ ret = rn5t618_battery_ttf(info, val);
+ break;
+ case POWER_SUPPLY_PROP_TECHNOLOGY:
+ val->intval = POWER_SUPPLY_TECHNOLOGY_LION;
+ break;
+ case POWER_SUPPLY_PROP_CHARGE_FULL:
+ ret = rn5t618_battery_charge_full(info, val);
+ break;
+ case POWER_SUPPLY_PROP_CHARGE_NOW:
+ ret = rn5t618_battery_charge_now(info, val);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+static int rn5t618_adp_get_property(struct power_supply *psy,
+ enum power_supply_property psp,
+ union power_supply_propval *val)
+{
+ struct rn5t618_power_info *info = power_supply_get_drvdata(psy);
+ unsigned int chgstate;
+ bool online;
+ int ret;
+
+ ret = regmap_read(info->rn5t618->regmap, RN5T618_CHGSTATE, &chgstate);
+ if (ret)
+ return ret;
+
+ online = !!(chgstate & CHG_STATE_ADP_INPUT);
+
+ switch (psp) {
+ case POWER_SUPPLY_PROP_ONLINE:
+ val->intval = online;
+ break;
+ case POWER_SUPPLY_PROP_STATUS:
+ if (!online) {
+ val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
+ break;
+ }
+ val->intval = rn5t618_decode_status(chgstate);
+ if (val->intval != POWER_SUPPLY_STATUS_CHARGING)
+ val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
+
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int rn5t618_usb_get_property(struct power_supply *psy,
+ enum power_supply_property psp,
+ union power_supply_propval *val)
+{
+ struct rn5t618_power_info *info = power_supply_get_drvdata(psy);
+ unsigned int chgstate;
+ bool online;
+ int ret;
+
+ ret = regmap_read(info->rn5t618->regmap, RN5T618_CHGSTATE, &chgstate);
+ if (ret)
+ return ret;
+
+ online = !!(chgstate & CHG_STATE_USB_INPUT);
+
+ switch (psp) {
+ case POWER_SUPPLY_PROP_ONLINE:
+ val->intval = online;
+ break;
+ case POWER_SUPPLY_PROP_STATUS:
+ if (!online) {
+ val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
+ break;
+ }
+ val->intval = rn5t618_decode_status(chgstate);
+ if (val->intval != POWER_SUPPLY_STATUS_CHARGING)
+ val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
+
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const struct power_supply_desc rn5t618_battery_desc = {
+ .name = "rn5t618-battery",
+ .type = POWER_SUPPLY_TYPE_BATTERY,
+ .properties = rn5t618_battery_props,
+ .num_properties = ARRAY_SIZE(rn5t618_battery_props),
+ .get_property = rn5t618_battery_get_property,
+};
+
+static const struct power_supply_desc rn5t618_adp_desc = {
+ .name = "rn5t618-adp",
+ .type = POWER_SUPPLY_TYPE_MAINS,
+ .properties = rn5t618_adp_props,
+ .num_properties = ARRAY_SIZE(rn5t618_adp_props),
+ .get_property = rn5t618_adp_get_property,
+};
+
+static const struct power_supply_desc rn5t618_usb_desc = {
+ .name = "rn5t618-usb",
+ .type = POWER_SUPPLY_TYPE_USB,
+ .properties = rn5t618_usb_props,
+ .num_properties = ARRAY_SIZE(rn5t618_usb_props),
+ .get_property = rn5t618_usb_get_property,
+};
+
+static irqreturn_t rn5t618_charger_irq(int irq, void *data)
+{
+ struct device *dev = data;
+ struct rn5t618_power_info *info = dev_get_drvdata(dev);
+
+ unsigned int ctrl, stat1, stat2, err;
+
+ regmap_read(info->rn5t618->regmap, RN5T618_CHGERR_IRR, &err);
+ regmap_read(info->rn5t618->regmap, RN5T618_CHGCTRL_IRR, &ctrl);
+ regmap_read(info->rn5t618->regmap, RN5T618_CHGSTAT_IRR1, &stat1);
+ regmap_read(info->rn5t618->regmap, RN5T618_CHGSTAT_IRR2, &stat2);
+
+ regmap_write(info->rn5t618->regmap, RN5T618_CHGERR_IRR, 0);
+ regmap_write(info->rn5t618->regmap, RN5T618_CHGCTRL_IRR, 0);
+ regmap_write(info->rn5t618->regmap, RN5T618_CHGSTAT_IRR1, 0);
+ regmap_write(info->rn5t618->regmap, RN5T618_CHGSTAT_IRR2, 0);
+
+ dev_dbg(dev, "chgerr: %x chgctrl: %x chgstat: %x chgstat2: %x\n",
+ err, ctrl, stat1, stat2);
+
+ power_supply_changed(info->usb);
+ power_supply_changed(info->adp);
+ power_supply_changed(info->battery);
+
+ return IRQ_HANDLED;
+}
+
+static int rn5t618_power_probe(struct platform_device *pdev)
+{
+ int ret = 0;
+ unsigned int v;
+ struct power_supply_config psy_cfg = {};
+ struct rn5t618_power_info *info;
+
+ info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+
+ info->pdev = pdev;
+ info->rn5t618 = dev_get_drvdata(pdev->dev.parent);
+ info->irq = -1;
+
+ platform_set_drvdata(pdev, info);
+
+ ret = regmap_read(info->rn5t618->regmap, RN5T618_CONTROL, &v);
+ if (ret)
+ return ret;
+
+ if (!(v & FG_ENABLE)) {
+ /* E.g. the vendor kernels of various Kobo and Tolino Ebook
+ * readers disable the fuel gauge on shutdown. If a kernel
+ * without fuel gauge support is booted after that, the fuel
+ * gauge will get decalibrated.
+ */
+ dev_info(&pdev->dev, "Fuel gauge not enabled, enabling now\n");
+ dev_info(&pdev->dev, "Expect imprecise results\n");
+ regmap_update_bits(info->rn5t618->regmap, RN5T618_CONTROL,
+ FG_ENABLE, FG_ENABLE);
+ }
+
+ psy_cfg.drv_data = info;
+ info->battery = devm_power_supply_register(&pdev->dev,
+ &rn5t618_battery_desc,
+ &psy_cfg);
+ if (IS_ERR(info->battery)) {
+ ret = PTR_ERR(info->battery);
+ dev_err(&pdev->dev, "failed to register battery: %d\n", ret);
+ return ret;
+ }
+
+ info->adp = devm_power_supply_register(&pdev->dev,
+ &rn5t618_adp_desc,
+ &psy_cfg);
+ if (IS_ERR(info->adp)) {
+ ret = PTR_ERR(info->adp);
+ dev_err(&pdev->dev, "failed to register adp: %d\n", ret);
+ return ret;
+ }
+
+ info->usb = devm_power_supply_register(&pdev->dev,
+ &rn5t618_usb_desc,
+ &psy_cfg);
+ if (IS_ERR(info->usb)) {
+ ret = PTR_ERR(info->usb);
+ dev_err(&pdev->dev, "failed to register usb: %d\n", ret);
+ return ret;
+ }
+
+ if (info->rn5t618->irq_data)
+ info->irq = regmap_irq_get_virq(info->rn5t618->irq_data,
+ RN5T618_IRQ_CHG);
+
+ if (info->irq < 0)
+ info->irq = -1;
+ else {
+ ret = devm_request_threaded_irq(&pdev->dev, info->irq, NULL,
+ rn5t618_charger_irq,
+ IRQF_ONESHOT,
+ "rn5t618_power",
+ &pdev->dev);
+
+ if (ret < 0) {
+ dev_err(&pdev->dev, "request IRQ:%d fail\n",
+ info->irq);
+ info->irq = -1;
+ }
+ }
+
+ return 0;
+}
+
+static struct platform_driver rn5t618_power_driver = {
+ .driver = {
+ .name = "rn5t618-power",
+ },
+ .probe = rn5t618_power_probe,
+};
+
+module_platform_driver(rn5t618_power_driver);
+MODULE_ALIAS("platform:rn5t618-power");
+MODULE_DESCRIPTION("Power supply driver for RICOH RN5T618");
+MODULE_LICENSE("GPL");
diff --git a/drivers/power/supply/rt9455_charger.c b/drivers/power/supply/rt9455_charger.c
index 29161ae90245..594bb3b8a4d1 100644
--- a/drivers/power/supply/rt9455_charger.c
+++ b/drivers/power/supply/rt9455_charger.c
@@ -1731,11 +1731,13 @@ static const struct of_device_id rt9455_of_match[] = {
};
MODULE_DEVICE_TABLE(of, rt9455_of_match);
+#ifdef CONFIG_ACPI
static const struct acpi_device_id rt9455_i2c_acpi_match[] = {
{ "RT945500", 0 },
{ }
};
MODULE_DEVICE_TABLE(acpi, rt9455_i2c_acpi_match);
+#endif
static struct i2c_driver rt9455_driver = {
.probe = rt9455_probe,
diff --git a/drivers/power/supply/sbs-battery.c b/drivers/power/supply/sbs-battery.c
index 7439753fac87..b6a538ebb378 100644
--- a/drivers/power/supply/sbs-battery.c
+++ b/drivers/power/supply/sbs-battery.c
@@ -193,7 +193,6 @@ struct sbs_info {
struct power_supply *power_supply;
bool is_present;
struct gpio_desc *gpio_detect;
- bool enable_detection;
bool charger_broadcasts;
int last_state;
int poll_time;
@@ -480,37 +479,6 @@ static bool sbs_bat_needs_calibration(struct i2c_client *client)
return !!(ret & BIT(7));
}
-static int sbs_get_battery_presence_and_health(
- struct i2c_client *client, enum power_supply_property psp,
- union power_supply_propval *val)
-{
- int ret;
-
- /* Dummy command; if it succeeds, battery is present. */
- ret = sbs_read_word_data(client, sbs_data[REG_STATUS].addr);
-
- if (ret < 0) { /* battery not present*/
- if (psp == POWER_SUPPLY_PROP_PRESENT) {
- val->intval = 0;
- return 0;
- }
- return ret;
- }
-
- if (psp == POWER_SUPPLY_PROP_PRESENT)
- val->intval = 1; /* battery present */
- else { /* POWER_SUPPLY_PROP_HEALTH */
- if (sbs_bat_needs_calibration(client)) {
- val->intval = POWER_SUPPLY_HEALTH_CALIBRATION_REQUIRED;
- } else {
- /* SBS spec doesn't have a general health command. */
- val->intval = POWER_SUPPLY_HEALTH_UNKNOWN;
- }
- }
-
- return 0;
-}
-
static int sbs_get_ti_battery_presence_and_health(
struct i2c_client *client, enum power_supply_property psp,
union power_supply_propval *val)
@@ -569,6 +537,41 @@ static int sbs_get_ti_battery_presence_and_health(
return 0;
}
+static int sbs_get_battery_presence_and_health(
+ struct i2c_client *client, enum power_supply_property psp,
+ union power_supply_propval *val)
+{
+ struct sbs_info *chip = i2c_get_clientdata(client);
+ int ret;
+
+ if (chip->flags & SBS_FLAGS_TI_BQ20ZX5)
+ return sbs_get_ti_battery_presence_and_health(client, psp, val);
+
+ /* Dummy command; if it succeeds, battery is present. */
+ ret = sbs_read_word_data(client, sbs_data[REG_STATUS].addr);
+
+ if (ret < 0) { /* battery not present*/
+ if (psp == POWER_SUPPLY_PROP_PRESENT) {
+ val->intval = 0;
+ return 0;
+ }
+ return ret;
+ }
+
+ if (psp == POWER_SUPPLY_PROP_PRESENT)
+ val->intval = 1; /* battery present */
+ else { /* POWER_SUPPLY_PROP_HEALTH */
+ if (sbs_bat_needs_calibration(client)) {
+ val->intval = POWER_SUPPLY_HEALTH_CALIBRATION_REQUIRED;
+ } else {
+ /* SBS spec doesn't have a general health command. */
+ val->intval = POWER_SUPPLY_HEALTH_UNKNOWN;
+ }
+ }
+
+ return 0;
+}
+
static int sbs_get_battery_property(struct i2c_client *client,
int reg_offset, enum power_supply_property psp,
union power_supply_propval *val)
@@ -871,12 +874,7 @@ static int sbs_get_property(struct power_supply *psy,
switch (psp) {
case POWER_SUPPLY_PROP_PRESENT:
case POWER_SUPPLY_PROP_HEALTH:
- if (chip->flags & SBS_FLAGS_TI_BQ20ZX5)
- ret = sbs_get_ti_battery_presence_and_health(client,
- psp, val);
- else
- ret = sbs_get_battery_presence_and_health(client, psp,
- val);
+ ret = sbs_get_battery_presence_and_health(client, psp, val);
/* this can only be true if no gpio is used */
if (psp == POWER_SUPPLY_PROP_PRESENT)
@@ -967,32 +965,30 @@ static int sbs_get_property(struct power_supply *psy,
return -EINVAL;
}
- if (!chip->enable_detection)
- goto done;
+ if (!chip->gpio_detect && chip->is_present != (ret >= 0)) {
+ bool old_present = chip->is_present;
+ union power_supply_propval val;
+ int err = sbs_get_battery_presence_and_health(
+ client, POWER_SUPPLY_PROP_PRESENT, &val);
- if (!chip->gpio_detect &&
- chip->is_present != (ret >= 0)) {
- sbs_update_presence(chip, (ret >= 0));
- power_supply_changed(chip->power_supply);
+ sbs_update_presence(chip, !err && val.intval);
+
+ if (old_present != chip->is_present)
+ power_supply_changed(chip->power_supply);
}
done:
if (!ret) {
/* Convert units to match requirements for power supply class */
sbs_unit_adjustment(client, psp, val);
+ dev_dbg(&client->dev,
+ "%s: property = %d, value = %x\n", __func__,
+ psp, val->intval);
+ } else if (!chip->is_present) {
+ /* battery not present, so return NODATA for properties */
+ ret = -ENODATA;
}
-
- dev_dbg(&client->dev,
- "%s: property = %d, value = %x\n", __func__, psp, val->intval);
-
- if (ret && chip->is_present)
- return ret;
-
- /* battery not present, so return NODATA for properties */
- if (ret)
- return -ENODATA;
-
- return 0;
+ return ret;
}
static void sbs_supply_changed(struct sbs_info *chip)
@@ -1098,7 +1094,6 @@ static int sbs_probe(struct i2c_client *client)
chip->flags = (u32)(uintptr_t)device_get_match_data(&client->dev);
chip->client = client;
- chip->enable_detection = false;
psy_cfg.of_node = client->dev.of_node;
psy_cfg.drv_data = chip;
chip->last_state = POWER_SUPPLY_STATUS_UNKNOWN;
@@ -1159,15 +1154,19 @@ skip_gpio:
* to the battery.
*/
if (!(force_load || chip->gpio_detect)) {
- rc = sbs_read_word_data(client, sbs_data[REG_STATUS].addr);
+ union power_supply_propval val;
- if (rc < 0) {
- dev_err(&client->dev, "%s: Failed to get device status\n",
- __func__);
+ rc = sbs_get_battery_presence_and_health(
+ client, POWER_SUPPLY_PROP_PRESENT, &val);
+ if (rc < 0 || !val.intval) {
+ dev_err(&client->dev, "Failed to get present status\n");
+ rc = -ENODEV;
goto exit_psupply;
}
}
+ INIT_DELAYED_WORK(&chip->work, sbs_delayed_work);
+
chip->power_supply = devm_power_supply_register(&client->dev, sbs_desc,
&psy_cfg);
if (IS_ERR(chip->power_supply)) {
@@ -1180,10 +1179,6 @@ skip_gpio:
dev_info(&client->dev,
"%s: battery gas gauge device registered\n", client->name);
- INIT_DELAYED_WORK(&chip->work, sbs_delayed_work);
-
- chip->enable_detection = true;
-
return 0;
exit_psupply:
diff --git a/drivers/power/supply/smb347-charger.c b/drivers/power/supply/smb347-charger.c
index f99026d81f2a..d3bf35ed12ce 100644
--- a/drivers/power/supply/smb347-charger.c
+++ b/drivers/power/supply/smb347-charger.c
@@ -16,11 +16,18 @@
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/i2c.h>
-#include <linux/mutex.h>
#include <linux/power_supply.h>
-#include <linux/power/smb347-charger.h>
+#include <linux/property.h>
#include <linux/regmap.h>
+#include <dt-bindings/power/summit,smb347-charger.h>
+
+/* Use the default compensation method */
+#define SMB3XX_SOFT_TEMP_COMPENSATE_DEFAULT -1
+
+/* Use default factory programmed value for hard/soft temperature limit */
+#define SMB3XX_TEMP_USE_DEFAULT -273
+
/*
* Configuration registers. These are mirrored to volatile RAM and can be
* written once %CMD_A_ALLOW_WRITE is set in %CMD_A register. They will be
@@ -122,82 +129,140 @@
/**
* struct smb347_charger - smb347 charger instance
- * @lock: protects concurrent access to online variables
* @dev: pointer to device
* @regmap: pointer to driver regmap
* @mains: power_supply instance for AC/DC power
* @usb: power_supply instance for USB power
- * @battery: power_supply instance for battery
+ * @id: SMB charger ID
* @mains_online: is AC/DC input connected
* @usb_online: is USB input connected
* @charging_enabled: is charging enabled
- * @pdata: pointer to platform data
+ * @max_charge_current: maximum current (in uA) the battery can be charged
+ * @max_charge_voltage: maximum voltage (in uV) the battery can be charged
+ * @pre_charge_current: current (in uA) to use in pre-charging phase
+ * @termination_current: current (in uA) used to determine when the
+ * charging cycle terminates
+ * @pre_to_fast_voltage: voltage (in uV) treshold used for transitioning to
+ * pre-charge to fast charge mode
+ * @mains_current_limit: maximum input current drawn from AC/DC input (in uA)
+ * @usb_hc_current_limit: maximum input high current (in uA) drawn from USB
+ * input
+ * @chip_temp_threshold: die temperature where device starts limiting charge
+ * current [%100 - %130] (in degree C)
+ * @soft_cold_temp_limit: soft cold temperature limit [%0 - %15] (in degree C),
+ * granularity is 5 deg C.
+ * @soft_hot_temp_limit: soft hot temperature limit [%40 - %55] (in degree C),
+ * granularity is 5 deg C.
+ * @hard_cold_temp_limit: hard cold temperature limit [%-5 - %10] (in degree C),
+ * granularity is 5 deg C.
+ * @hard_hot_temp_limit: hard hot temperature limit [%50 - %65] (in degree C),
+ * granularity is 5 deg C.
+ * @suspend_on_hard_temp_limit: suspend charging when hard limit is hit
+ * @soft_temp_limit_compensation: compensation method when soft temperature
+ * limit is hit
+ * @charge_current_compensation: current (in uA) for charging compensation
+ * current when temperature hits soft limits
+ * @use_mains: AC/DC input can be used
+ * @use_usb: USB input can be used
+ * @use_usb_otg: USB OTG output can be used (not implemented yet)
+ * @enable_control: how charging enable/disable is controlled
+ * (driver/pin controls)
+ *
+ * @use_main, @use_usb, and @use_usb_otg are means to enable/disable
+ * hardware support for these. This is useful when we want to have for
+ * example OTG charging controlled via OTG transceiver driver and not by
+ * the SMB347 hardware.
+ *
+ * Hard and soft temperature limit values are given as described in the
+ * device data sheet and assuming NTC beta value is %3750. Even if this is
+ * not the case, these values should be used. They can be mapped to the
+ * corresponding NTC beta values with the help of table %2 in the data
+ * sheet. So for example if NTC beta is %3375 and we want to program hard
+ * hot limit to be %53 deg C, @hard_hot_temp_limit should be set to %50.
+ *
+ * If zero value is given in any of the current and voltage values, the
+ * factory programmed default will be used. For soft/hard temperature
+ * values, pass in %SMB3XX_TEMP_USE_DEFAULT instead.
*/
struct smb347_charger {
- struct mutex lock;
struct device *dev;
struct regmap *regmap;
struct power_supply *mains;
struct power_supply *usb;
- struct power_supply *battery;
+ unsigned int id;
bool mains_online;
bool usb_online;
bool charging_enabled;
- const struct smb347_charger_platform_data *pdata;
+
+ unsigned int max_charge_current;
+ unsigned int max_charge_voltage;
+ unsigned int pre_charge_current;
+ unsigned int termination_current;
+ unsigned int pre_to_fast_voltage;
+ unsigned int mains_current_limit;
+ unsigned int usb_hc_current_limit;
+ unsigned int chip_temp_threshold;
+ int soft_cold_temp_limit;
+ int soft_hot_temp_limit;
+ int hard_cold_temp_limit;
+ int hard_hot_temp_limit;
+ bool suspend_on_hard_temp_limit;
+ unsigned int soft_temp_limit_compensation;
+ unsigned int charge_current_compensation;
+ bool use_mains;
+ bool use_usb;
+ bool use_usb_otg;
+ unsigned int enable_control;
};
-/* Fast charge current in uA */
-static const unsigned int fcc_tbl[] = {
- 700000,
- 900000,
- 1200000,
- 1500000,
- 1800000,
- 2000000,
- 2200000,
- 2500000,
+enum smb_charger_chipid {
+ SMB345,
+ SMB347,
+ SMB358,
+ NUM_CHIP_TYPES,
};
+/* Fast charge current in uA */
+static const unsigned int fcc_tbl[NUM_CHIP_TYPES][8] = {
+ [SMB345] = { 200000, 450000, 600000, 900000,
+ 1300000, 1500000, 1800000, 2000000 },
+ [SMB347] = { 700000, 900000, 1200000, 1500000,
+ 1800000, 2000000, 2200000, 2500000 },
+ [SMB358] = { 200000, 450000, 600000, 900000,
+ 1300000, 1500000, 1800000, 2000000 },
+};
/* Pre-charge current in uA */
-static const unsigned int pcc_tbl[] = {
- 100000,
- 150000,
- 200000,
- 250000,
+static const unsigned int pcc_tbl[NUM_CHIP_TYPES][4] = {
+ [SMB345] = { 150000, 250000, 350000, 450000 },
+ [SMB347] = { 100000, 150000, 200000, 250000 },
+ [SMB358] = { 150000, 250000, 350000, 450000 },
};
/* Termination current in uA */
-static const unsigned int tc_tbl[] = {
- 37500,
- 50000,
- 100000,
- 150000,
- 200000,
- 250000,
- 500000,
- 600000,
+static const unsigned int tc_tbl[NUM_CHIP_TYPES][8] = {
+ [SMB345] = { 30000, 40000, 60000, 80000,
+ 100000, 125000, 150000, 200000 },
+ [SMB347] = { 37500, 50000, 100000, 150000,
+ 200000, 250000, 500000, 600000 },
+ [SMB358] = { 30000, 40000, 60000, 80000,
+ 100000, 125000, 150000, 200000 },
};
/* Input current limit in uA */
-static const unsigned int icl_tbl[] = {
- 300000,
- 500000,
- 700000,
- 900000,
- 1200000,
- 1500000,
- 1800000,
- 2000000,
- 2200000,
- 2500000,
+static const unsigned int icl_tbl[NUM_CHIP_TYPES][10] = {
+ [SMB345] = { 300000, 500000, 700000, 1000000, 1500000,
+ 1800000, 2000000, 2000000, 2000000, 2000000 },
+ [SMB347] = { 300000, 500000, 700000, 900000, 1200000,
+ 1500000, 1800000, 2000000, 2200000, 2500000 },
+ [SMB358] = { 300000, 500000, 700000, 1000000, 1500000,
+ 1800000, 2000000, 2000000, 2000000, 2000000 },
};
/* Charge current compensation in uA */
-static const unsigned int ccc_tbl[] = {
- 250000,
- 700000,
- 900000,
- 1200000,
+static const unsigned int ccc_tbl[NUM_CHIP_TYPES][4] = {
+ [SMB345] = { 200000, 450000, 600000, 900000 },
+ [SMB347] = { 250000, 700000, 900000, 1200000 },
+ [SMB358] = { 200000, 450000, 600000, 900000 },
};
/* Convert register value to current using lookup table */
@@ -242,16 +307,14 @@ static int smb347_update_ps_status(struct smb347_charger *smb)
* Dc and usb are set depending on whether they are enabled in
* platform data _and_ whether corresponding undervoltage is set.
*/
- if (smb->pdata->use_mains)
+ if (smb->use_mains)
dc = !(val & IRQSTAT_E_DCIN_UV_STAT);
- if (smb->pdata->use_usb)
+ if (smb->use_usb)
usb = !(val & IRQSTAT_E_USBIN_UV_STAT);
- mutex_lock(&smb->lock);
ret = smb->mains_online != dc || smb->usb_online != usb;
smb->mains_online = dc;
smb->usb_online = usb;
- mutex_unlock(&smb->lock);
return ret;
}
@@ -267,13 +330,7 @@ static int smb347_update_ps_status(struct smb347_charger *smb)
*/
static bool smb347_is_ps_online(struct smb347_charger *smb)
{
- bool ret;
-
- mutex_lock(&smb->lock);
- ret = smb->usb_online || smb->mains_online;
- mutex_unlock(&smb->lock);
-
- return ret;
+ return smb->usb_online || smb->mains_online;
}
/**
@@ -302,19 +359,18 @@ static int smb347_charging_set(struct smb347_charger *smb, bool enable)
{
int ret = 0;
- if (smb->pdata->enable_control != SMB347_CHG_ENABLE_SW) {
+ if (smb->enable_control != SMB3XX_CHG_ENABLE_SW) {
dev_dbg(smb->dev, "charging enable/disable in SW disabled\n");
return 0;
}
- mutex_lock(&smb->lock);
if (smb->charging_enabled != enable) {
ret = regmap_update_bits(smb->regmap, CMD_A, CMD_A_CHG_ENABLED,
enable ? CMD_A_CHG_ENABLED : 0);
if (!ret)
smb->charging_enabled = enable;
}
- mutex_unlock(&smb->lock);
+
return ret;
}
@@ -352,11 +408,12 @@ static int smb347_start_stop_charging(struct smb347_charger *smb)
static int smb347_set_charge_current(struct smb347_charger *smb)
{
+ unsigned int id = smb->id;
int ret;
- if (smb->pdata->max_charge_current) {
- ret = current_to_hw(fcc_tbl, ARRAY_SIZE(fcc_tbl),
- smb->pdata->max_charge_current);
+ if (smb->max_charge_current) {
+ ret = current_to_hw(fcc_tbl[id], ARRAY_SIZE(fcc_tbl[id]),
+ smb->max_charge_current);
if (ret < 0)
return ret;
@@ -367,9 +424,9 @@ static int smb347_set_charge_current(struct smb347_charger *smb)
return ret;
}
- if (smb->pdata->pre_charge_current) {
- ret = current_to_hw(pcc_tbl, ARRAY_SIZE(pcc_tbl),
- smb->pdata->pre_charge_current);
+ if (smb->pre_charge_current) {
+ ret = current_to_hw(pcc_tbl[id], ARRAY_SIZE(pcc_tbl[id]),
+ smb->pre_charge_current);
if (ret < 0)
return ret;
@@ -380,9 +437,9 @@ static int smb347_set_charge_current(struct smb347_charger *smb)
return ret;
}
- if (smb->pdata->termination_current) {
- ret = current_to_hw(tc_tbl, ARRAY_SIZE(tc_tbl),
- smb->pdata->termination_current);
+ if (smb->termination_current) {
+ ret = current_to_hw(tc_tbl[id], ARRAY_SIZE(tc_tbl[id]),
+ smb->termination_current);
if (ret < 0)
return ret;
@@ -397,11 +454,12 @@ static int smb347_set_charge_current(struct smb347_charger *smb)
static int smb347_set_current_limits(struct smb347_charger *smb)
{
+ unsigned int id = smb->id;
int ret;
- if (smb->pdata->mains_current_limit) {
- ret = current_to_hw(icl_tbl, ARRAY_SIZE(icl_tbl),
- smb->pdata->mains_current_limit);
+ if (smb->mains_current_limit) {
+ ret = current_to_hw(icl_tbl[id], ARRAY_SIZE(icl_tbl[id]),
+ smb->mains_current_limit);
if (ret < 0)
return ret;
@@ -412,9 +470,9 @@ static int smb347_set_current_limits(struct smb347_charger *smb)
return ret;
}
- if (smb->pdata->usb_hc_current_limit) {
- ret = current_to_hw(icl_tbl, ARRAY_SIZE(icl_tbl),
- smb->pdata->usb_hc_current_limit);
+ if (smb->usb_hc_current_limit) {
+ ret = current_to_hw(icl_tbl[id], ARRAY_SIZE(icl_tbl[id]),
+ smb->usb_hc_current_limit);
if (ret < 0)
return ret;
@@ -431,8 +489,8 @@ static int smb347_set_voltage_limits(struct smb347_charger *smb)
{
int ret;
- if (smb->pdata->pre_to_fast_voltage) {
- ret = smb->pdata->pre_to_fast_voltage;
+ if (smb->pre_to_fast_voltage) {
+ ret = smb->pre_to_fast_voltage;
/* uV */
ret = clamp_val(ret, 2400000, 3000000) - 2400000;
@@ -445,8 +503,8 @@ static int smb347_set_voltage_limits(struct smb347_charger *smb)
return ret;
}
- if (smb->pdata->max_charge_voltage) {
- ret = smb->pdata->max_charge_voltage;
+ if (smb->max_charge_voltage) {
+ ret = smb->max_charge_voltage;
/* uV */
ret = clamp_val(ret, 3500000, 4500000) - 3500000;
@@ -463,12 +521,13 @@ static int smb347_set_voltage_limits(struct smb347_charger *smb)
static int smb347_set_temp_limits(struct smb347_charger *smb)
{
+ unsigned int id = smb->id;
bool enable_therm_monitor = false;
int ret = 0;
int val;
- if (smb->pdata->chip_temp_threshold) {
- val = smb->pdata->chip_temp_threshold;
+ if (smb->chip_temp_threshold) {
+ val = smb->chip_temp_threshold;
/* degree C */
val = clamp_val(val, 100, 130) - 100;
@@ -481,8 +540,8 @@ static int smb347_set_temp_limits(struct smb347_charger *smb)
return ret;
}
- if (smb->pdata->soft_cold_temp_limit != SMB347_TEMP_USE_DEFAULT) {
- val = smb->pdata->soft_cold_temp_limit;
+ if (smb->soft_cold_temp_limit != SMB3XX_TEMP_USE_DEFAULT) {
+ val = smb->soft_cold_temp_limit;
val = clamp_val(val, 0, 15);
val /= 5;
@@ -498,8 +557,8 @@ static int smb347_set_temp_limits(struct smb347_charger *smb)
enable_therm_monitor = true;
}
- if (smb->pdata->soft_hot_temp_limit != SMB347_TEMP_USE_DEFAULT) {
- val = smb->pdata->soft_hot_temp_limit;
+ if (smb->soft_hot_temp_limit != SMB3XX_TEMP_USE_DEFAULT) {
+ val = smb->soft_hot_temp_limit;
val = clamp_val(val, 40, 55) - 40;
val /= 5;
@@ -513,8 +572,8 @@ static int smb347_set_temp_limits(struct smb347_charger *smb)
enable_therm_monitor = true;
}
- if (smb->pdata->hard_cold_temp_limit != SMB347_TEMP_USE_DEFAULT) {
- val = smb->pdata->hard_cold_temp_limit;
+ if (smb->hard_cold_temp_limit != SMB3XX_TEMP_USE_DEFAULT) {
+ val = smb->hard_cold_temp_limit;
val = clamp_val(val, -5, 10) + 5;
val /= 5;
@@ -530,8 +589,8 @@ static int smb347_set_temp_limits(struct smb347_charger *smb)
enable_therm_monitor = true;
}
- if (smb->pdata->hard_hot_temp_limit != SMB347_TEMP_USE_DEFAULT) {
- val = smb->pdata->hard_hot_temp_limit;
+ if (smb->hard_hot_temp_limit != SMB3XX_TEMP_USE_DEFAULT) {
+ val = smb->hard_hot_temp_limit;
val = clamp_val(val, 50, 65) - 50;
val /= 5;
@@ -562,16 +621,16 @@ static int smb347_set_temp_limits(struct smb347_charger *smb)
return ret;
}
- if (smb->pdata->suspend_on_hard_temp_limit) {
+ if (smb->suspend_on_hard_temp_limit) {
ret = regmap_update_bits(smb->regmap, CFG_SYSOK,
CFG_SYSOK_SUSPEND_HARD_LIMIT_DISABLED, 0);
if (ret < 0)
return ret;
}
- if (smb->pdata->soft_temp_limit_compensation !=
- SMB347_SOFT_TEMP_COMPENSATE_DEFAULT) {
- val = smb->pdata->soft_temp_limit_compensation & 0x3;
+ if (smb->soft_temp_limit_compensation !=
+ SMB3XX_SOFT_TEMP_COMPENSATE_DEFAULT) {
+ val = smb->soft_temp_limit_compensation & 0x3;
ret = regmap_update_bits(smb->regmap, CFG_THERM,
CFG_THERM_SOFT_HOT_COMPENSATION_MASK,
@@ -586,9 +645,9 @@ static int smb347_set_temp_limits(struct smb347_charger *smb)
return ret;
}
- if (smb->pdata->charge_current_compensation) {
- val = current_to_hw(ccc_tbl, ARRAY_SIZE(ccc_tbl),
- smb->pdata->charge_current_compensation);
+ if (smb->charge_current_compensation) {
+ val = current_to_hw(ccc_tbl[id], ARRAY_SIZE(ccc_tbl[id]),
+ smb->charge_current_compensation);
if (val < 0)
return val;
@@ -647,7 +706,7 @@ static int smb347_hw_init(struct smb347_charger *smb)
goto fail;
/* If USB charging is disabled we put the USB in suspend mode */
- if (!smb->pdata->use_usb) {
+ if (!smb->use_usb) {
ret = regmap_update_bits(smb->regmap, CMD_A,
CMD_A_SUSPEND_ENABLED,
CMD_A_SUSPEND_ENABLED);
@@ -660,7 +719,7 @@ static int smb347_hw_init(struct smb347_charger *smb)
* support for driving VBUS. Otherwise we disable it.
*/
ret = regmap_update_bits(smb->regmap, CFG_OTHER, CFG_OTHER_RID_MASK,
- smb->pdata->use_usb_otg ? CFG_OTHER_RID_ENABLED_AUTO_OTG : 0);
+ smb->use_usb_otg ? CFG_OTHER_RID_ENABLED_AUTO_OTG : 0);
if (ret < 0)
goto fail;
@@ -669,11 +728,11 @@ static int smb347_hw_init(struct smb347_charger *smb)
* command register unless pin control is specified in the platform
* data.
*/
- switch (smb->pdata->enable_control) {
- case SMB347_CHG_ENABLE_PIN_ACTIVE_LOW:
+ switch (smb->enable_control) {
+ case SMB3XX_CHG_ENABLE_PIN_ACTIVE_LOW:
val = CFG_PIN_EN_CTRL_ACTIVE_LOW;
break;
- case SMB347_CHG_ENABLE_PIN_ACTIVE_HIGH:
+ case SMB3XX_CHG_ENABLE_PIN_ACTIVE_HIGH:
val = CFG_PIN_EN_CTRL_ACTIVE_HIGH;
break;
default:
@@ -742,7 +801,10 @@ static irqreturn_t smb347_interrupt(int irq, void *data)
*/
if (stat_c & STAT_C_CHARGER_ERROR) {
dev_err(smb->dev, "charging stopped due to charger error\n");
- power_supply_changed(smb->battery);
+ if (smb->use_mains)
+ power_supply_changed(smb->mains);
+ if (smb->use_usb)
+ power_supply_changed(smb->usb);
handled = true;
}
@@ -752,8 +814,12 @@ static irqreturn_t smb347_interrupt(int irq, void *data)
* disabled by the hardware.
*/
if (irqstat_c & (IRQSTAT_C_TERMINATION_IRQ | IRQSTAT_C_TAPER_IRQ)) {
- if (irqstat_c & IRQSTAT_C_TERMINATION_STAT)
- power_supply_changed(smb->battery);
+ if (irqstat_c & IRQSTAT_C_TERMINATION_STAT) {
+ if (smb->use_mains)
+ power_supply_changed(smb->mains);
+ if (smb->use_usb)
+ power_supply_changed(smb->usb);
+ }
dev_dbg(smb->dev, "going to HW maintenance mode\n");
handled = true;
}
@@ -767,7 +833,10 @@ static irqreturn_t smb347_interrupt(int irq, void *data)
if (irqstat_d & IRQSTAT_D_CHARGE_TIMEOUT_STAT)
dev_warn(smb->dev, "charging stopped due to timeout\n");
- power_supply_changed(smb->battery);
+ if (smb->use_mains)
+ power_supply_changed(smb->mains);
+ if (smb->use_usb)
+ power_supply_changed(smb->usb);
handled = true;
}
@@ -778,9 +847,9 @@ static irqreturn_t smb347_interrupt(int irq, void *data)
if (irqstat_e & (IRQSTAT_E_USBIN_UV_IRQ | IRQSTAT_E_DCIN_UV_IRQ)) {
if (smb347_update_ps_status(smb) > 0) {
smb347_start_stop_charging(smb);
- if (smb->pdata->use_mains)
+ if (smb->use_mains)
power_supply_changed(smb->mains);
- if (smb->pdata->use_usb)
+ if (smb->use_usb)
power_supply_changed(smb->usb);
}
handled = true;
@@ -835,22 +904,17 @@ static inline int smb347_irq_disable(struct smb347_charger *smb)
static int smb347_irq_init(struct smb347_charger *smb,
struct i2c_client *client)
{
- const struct smb347_charger_platform_data *pdata = smb->pdata;
- int ret, irq = gpio_to_irq(pdata->irq_gpio);
-
- ret = gpio_request_one(pdata->irq_gpio, GPIOF_IN, client->name);
- if (ret < 0)
- goto fail;
+ int ret;
- ret = request_threaded_irq(irq, NULL, smb347_interrupt,
- IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
- client->name, smb);
+ ret = devm_request_threaded_irq(smb->dev, client->irq, NULL,
+ smb347_interrupt, IRQF_ONESHOT,
+ client->name, smb);
if (ret < 0)
- goto fail_gpio;
+ return ret;
ret = smb347_set_writable(smb, true);
if (ret < 0)
- goto fail_irq;
+ return ret;
/*
* Configure the STAT output to be suitable for interrupts: disable
@@ -860,20 +924,10 @@ static int smb347_irq_init(struct smb347_charger *smb,
CFG_STAT_ACTIVE_HIGH | CFG_STAT_DISABLED,
CFG_STAT_DISABLED);
if (ret < 0)
- goto fail_readonly;
+ client->irq = 0;
smb347_set_writable(smb, false);
- client->irq = irq;
- return 0;
-fail_readonly:
- smb347_set_writable(smb, false);
-fail_irq:
- free_irq(irq, smb);
-fail_gpio:
- gpio_free(pdata->irq_gpio);
-fail:
- client->irq = 0;
return ret;
}
@@ -883,6 +937,7 @@ fail:
*/
static int get_const_charge_current(struct smb347_charger *smb)
{
+ unsigned int id = smb->id;
int ret, intval;
unsigned int v;
@@ -898,10 +953,12 @@ static int get_const_charge_current(struct smb347_charger *smb)
* and we can detect which table to use from bit 5.
*/
if (v & 0x20) {
- intval = hw_to_current(fcc_tbl, ARRAY_SIZE(fcc_tbl), v & 7);
+ intval = hw_to_current(fcc_tbl[id],
+ ARRAY_SIZE(fcc_tbl[id]), v & 7);
} else {
v >>= 3;
- intval = hw_to_current(pcc_tbl, ARRAY_SIZE(pcc_tbl), v & 7);
+ intval = hw_to_current(pcc_tbl[id],
+ ARRAY_SIZE(pcc_tbl[id]), v & 7);
}
return intval;
@@ -932,95 +989,19 @@ static int get_const_charge_voltage(struct smb347_charger *smb)
return intval;
}
-static int smb347_mains_get_property(struct power_supply *psy,
- enum power_supply_property prop,
- union power_supply_propval *val)
-{
- struct smb347_charger *smb = power_supply_get_drvdata(psy);
- int ret;
-
- switch (prop) {
- case POWER_SUPPLY_PROP_ONLINE:
- val->intval = smb->mains_online;
- break;
-
- case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE:
- ret = get_const_charge_voltage(smb);
- if (ret < 0)
- return ret;
- else
- val->intval = ret;
- break;
-
- case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT:
- ret = get_const_charge_current(smb);
- if (ret < 0)
- return ret;
- else
- val->intval = ret;
- break;
-
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-static enum power_supply_property smb347_mains_properties[] = {
- POWER_SUPPLY_PROP_ONLINE,
- POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT,
- POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE,
-};
-
-static int smb347_usb_get_property(struct power_supply *psy,
- enum power_supply_property prop,
- union power_supply_propval *val)
-{
- struct smb347_charger *smb = power_supply_get_drvdata(psy);
- int ret;
-
- switch (prop) {
- case POWER_SUPPLY_PROP_ONLINE:
- val->intval = smb->usb_online;
- break;
-
- case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE:
- ret = get_const_charge_voltage(smb);
- if (ret < 0)
- return ret;
- else
- val->intval = ret;
- break;
-
- case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT:
- ret = get_const_charge_current(smb);
- if (ret < 0)
- return ret;
- else
- val->intval = ret;
- break;
-
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-static enum power_supply_property smb347_usb_properties[] = {
- POWER_SUPPLY_PROP_ONLINE,
- POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT,
- POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE,
-};
-
-static int smb347_get_charging_status(struct smb347_charger *smb)
+static int smb347_get_charging_status(struct smb347_charger *smb,
+ struct power_supply *psy)
{
int ret, status;
unsigned int val;
- if (!smb347_is_ps_online(smb))
- return POWER_SUPPLY_STATUS_DISCHARGING;
+ if (psy->desc->type == POWER_SUPPLY_TYPE_USB) {
+ if (!smb->usb_online)
+ return POWER_SUPPLY_STATUS_DISCHARGING;
+ } else {
+ if (!smb->mains_online)
+ return POWER_SUPPLY_STATUS_DISCHARGING;
+ }
ret = regmap_read(smb->regmap, STAT_C, &val);
if (ret < 0)
@@ -1059,29 +1040,29 @@ static int smb347_get_charging_status(struct smb347_charger *smb)
return status;
}
-static int smb347_battery_get_property(struct power_supply *psy,
- enum power_supply_property prop,
- union power_supply_propval *val)
+static int smb347_get_property_locked(struct power_supply *psy,
+ enum power_supply_property prop,
+ union power_supply_propval *val)
{
struct smb347_charger *smb = power_supply_get_drvdata(psy);
- const struct smb347_charger_platform_data *pdata = smb->pdata;
int ret;
- ret = smb347_update_ps_status(smb);
- if (ret < 0)
- return ret;
-
switch (prop) {
case POWER_SUPPLY_PROP_STATUS:
- ret = smb347_get_charging_status(smb);
+ ret = smb347_get_charging_status(smb, psy);
if (ret < 0)
return ret;
val->intval = ret;
break;
case POWER_SUPPLY_PROP_CHARGE_TYPE:
- if (!smb347_is_ps_online(smb))
- return -ENODATA;
+ if (psy->desc->type == POWER_SUPPLY_TYPE_USB) {
+ if (!smb->usb_online)
+ return -ENODATA;
+ } else {
+ if (!smb->mains_online)
+ return -ENODATA;
+ }
/*
* We handle trickle and pre-charging the same, and taper
@@ -1100,24 +1081,25 @@ static int smb347_battery_get_property(struct power_supply *psy,
}
break;
- case POWER_SUPPLY_PROP_TECHNOLOGY:
- val->intval = pdata->battery_info.technology;
- break;
-
- case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN:
- val->intval = pdata->battery_info.voltage_min_design;
- break;
-
- case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN:
- val->intval = pdata->battery_info.voltage_max_design;
+ case POWER_SUPPLY_PROP_ONLINE:
+ if (psy->desc->type == POWER_SUPPLY_TYPE_USB)
+ val->intval = smb->usb_online;
+ else
+ val->intval = smb->mains_online;
break;
- case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN:
- val->intval = pdata->battery_info.charge_full_design;
+ case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE:
+ ret = get_const_charge_voltage(smb);
+ if (ret < 0)
+ return ret;
+ val->intval = ret;
break;
- case POWER_SUPPLY_PROP_MODEL_NAME:
- val->strval = pdata->battery_info.name;
+ case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT:
+ ret = get_const_charge_current(smb);
+ if (ret < 0)
+ return ret;
+ val->intval = ret;
break;
default:
@@ -1127,14 +1109,27 @@ static int smb347_battery_get_property(struct power_supply *psy,
return 0;
}
-static enum power_supply_property smb347_battery_properties[] = {
+static int smb347_get_property(struct power_supply *psy,
+ enum power_supply_property prop,
+ union power_supply_propval *val)
+{
+ struct smb347_charger *smb = power_supply_get_drvdata(psy);
+ struct i2c_client *client = to_i2c_client(smb->dev);
+ int ret;
+
+ disable_irq(client->irq);
+ ret = smb347_get_property_locked(psy, prop, val);
+ enable_irq(client->irq);
+
+ return ret;
+}
+
+static enum power_supply_property smb347_properties[] = {
POWER_SUPPLY_PROP_STATUS,
POWER_SUPPLY_PROP_CHARGE_TYPE,
- POWER_SUPPLY_PROP_TECHNOLOGY,
- POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN,
- POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN,
- POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN,
- POWER_SUPPLY_PROP_MODEL_NAME,
+ POWER_SUPPLY_PROP_ONLINE,
+ POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT,
+ POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE,
};
static bool smb347_volatile_reg(struct device *dev, unsigned int reg)
@@ -1180,6 +1175,96 @@ static bool smb347_readable_reg(struct device *dev, unsigned int reg)
return smb347_volatile_reg(dev, reg);
}
+static void smb347_dt_parse_dev_info(struct smb347_charger *smb)
+{
+ struct device *dev = smb->dev;
+
+ smb->soft_temp_limit_compensation =
+ SMB3XX_SOFT_TEMP_COMPENSATE_DEFAULT;
+ /*
+ * These properties come from the battery info, still we need to
+ * pre-initialize the values. See smb347_get_battery_info() below.
+ */
+ smb->soft_cold_temp_limit = SMB3XX_TEMP_USE_DEFAULT;
+ smb->hard_cold_temp_limit = SMB3XX_TEMP_USE_DEFAULT;
+ smb->soft_hot_temp_limit = SMB3XX_TEMP_USE_DEFAULT;
+ smb->hard_hot_temp_limit = SMB3XX_TEMP_USE_DEFAULT;
+
+ /* Charging constraints */
+ device_property_read_u32(dev, "summit,fast-voltage-threshold-microvolt",
+ &smb->pre_to_fast_voltage);
+ device_property_read_u32(dev, "summit,mains-current-limit-microamp",
+ &smb->mains_current_limit);
+ device_property_read_u32(dev, "summit,usb-current-limit-microamp",
+ &smb->usb_hc_current_limit);
+
+ /* For thermometer monitoring */
+ device_property_read_u32(dev, "summit,chip-temperature-threshold-celsius",
+ &smb->chip_temp_threshold);
+ device_property_read_u32(dev, "summit,soft-compensation-method",
+ &smb->soft_temp_limit_compensation);
+ device_property_read_u32(dev, "summit,charge-current-compensation-microamp",
+ &smb->charge_current_compensation);
+
+ /* Supported charging mode */
+ smb->use_mains = device_property_read_bool(dev, "summit,enable-mains-charging");
+ smb->use_usb = device_property_read_bool(dev, "summit,enable-usb-charging");
+ smb->use_usb_otg = device_property_read_bool(dev, "summit,enable-otg-charging");
+
+ /* Select charging control */
+ device_property_read_u32(dev, "summit,enable-charge-control",
+ &smb->enable_control);
+}
+
+static int smb347_get_battery_info(struct smb347_charger *smb)
+{
+ struct power_supply_battery_info info = {};
+ struct power_supply *supply;
+ int err;
+
+ if (smb->mains)
+ supply = smb->mains;
+ else
+ supply = smb->usb;
+
+ err = power_supply_get_battery_info(supply, &info);
+ if (err == -ENXIO || err == -ENODEV)
+ return 0;
+ if (err)
+ return err;
+
+ if (info.constant_charge_current_max_ua != -EINVAL)
+ smb->max_charge_current = info.constant_charge_current_max_ua;
+
+ if (info.constant_charge_voltage_max_uv != -EINVAL)
+ smb->max_charge_voltage = info.constant_charge_voltage_max_uv;
+
+ if (info.precharge_current_ua != -EINVAL)
+ smb->pre_charge_current = info.precharge_current_ua;
+
+ if (info.charge_term_current_ua != -EINVAL)
+ smb->termination_current = info.charge_term_current_ua;
+
+ if (info.temp_alert_min != INT_MIN)
+ smb->soft_cold_temp_limit = info.temp_alert_min;
+
+ if (info.temp_alert_max != INT_MAX)
+ smb->soft_hot_temp_limit = info.temp_alert_max;
+
+ if (info.temp_min != INT_MIN)
+ smb->hard_cold_temp_limit = info.temp_min;
+
+ if (info.temp_max != INT_MAX)
+ smb->hard_hot_temp_limit = info.temp_max;
+
+ /* Suspend when battery temperature is outside hard limits */
+ if (smb->hard_cold_temp_limit != SMB3XX_TEMP_USE_DEFAULT ||
+ smb->hard_hot_temp_limit != SMB3XX_TEMP_USE_DEFAULT)
+ smb->suspend_on_hard_temp_limit = true;
+
+ return 0;
+}
+
static const struct regmap_config smb347_regmap = {
.reg_bits = 8,
.val_bits = 8,
@@ -1191,98 +1276,71 @@ static const struct regmap_config smb347_regmap = {
static const struct power_supply_desc smb347_mains_desc = {
.name = "smb347-mains",
.type = POWER_SUPPLY_TYPE_MAINS,
- .get_property = smb347_mains_get_property,
- .properties = smb347_mains_properties,
- .num_properties = ARRAY_SIZE(smb347_mains_properties),
+ .get_property = smb347_get_property,
+ .properties = smb347_properties,
+ .num_properties = ARRAY_SIZE(smb347_properties),
};
static const struct power_supply_desc smb347_usb_desc = {
.name = "smb347-usb",
.type = POWER_SUPPLY_TYPE_USB,
- .get_property = smb347_usb_get_property,
- .properties = smb347_usb_properties,
- .num_properties = ARRAY_SIZE(smb347_usb_properties),
-};
-
-static const struct power_supply_desc smb347_battery_desc = {
- .name = "smb347-battery",
- .type = POWER_SUPPLY_TYPE_BATTERY,
- .get_property = smb347_battery_get_property,
- .properties = smb347_battery_properties,
- .num_properties = ARRAY_SIZE(smb347_battery_properties),
+ .get_property = smb347_get_property,
+ .properties = smb347_properties,
+ .num_properties = ARRAY_SIZE(smb347_properties),
};
static int smb347_probe(struct i2c_client *client,
const struct i2c_device_id *id)
{
- static char *battery[] = { "smb347-battery" };
- const struct smb347_charger_platform_data *pdata;
- struct power_supply_config mains_usb_cfg = {}, battery_cfg = {};
+ struct power_supply_config mains_usb_cfg = {};
struct device *dev = &client->dev;
struct smb347_charger *smb;
int ret;
- pdata = dev->platform_data;
- if (!pdata)
- return -EINVAL;
-
- if (!pdata->use_mains && !pdata->use_usb)
- return -EINVAL;
-
smb = devm_kzalloc(dev, sizeof(*smb), GFP_KERNEL);
if (!smb)
return -ENOMEM;
-
+ smb->dev = &client->dev;
+ smb->id = id->driver_data;
i2c_set_clientdata(client, smb);
- mutex_init(&smb->lock);
- smb->dev = &client->dev;
- smb->pdata = pdata;
+ smb347_dt_parse_dev_info(smb);
+ if (!smb->use_mains && !smb->use_usb)
+ return -EINVAL;
smb->regmap = devm_regmap_init_i2c(client, &smb347_regmap);
if (IS_ERR(smb->regmap))
return PTR_ERR(smb->regmap);
- ret = smb347_hw_init(smb);
- if (ret < 0)
- return ret;
-
- mains_usb_cfg.supplied_to = battery;
- mains_usb_cfg.num_supplicants = ARRAY_SIZE(battery);
mains_usb_cfg.drv_data = smb;
- if (smb->pdata->use_mains) {
- smb->mains = power_supply_register(dev, &smb347_mains_desc,
- &mains_usb_cfg);
+ mains_usb_cfg.of_node = dev->of_node;
+ if (smb->use_mains) {
+ smb->mains = devm_power_supply_register(dev, &smb347_mains_desc,
+ &mains_usb_cfg);
if (IS_ERR(smb->mains))
return PTR_ERR(smb->mains);
}
- if (smb->pdata->use_usb) {
- smb->usb = power_supply_register(dev, &smb347_usb_desc,
- &mains_usb_cfg);
- if (IS_ERR(smb->usb)) {
- if (smb->pdata->use_mains)
- power_supply_unregister(smb->mains);
+ if (smb->use_usb) {
+ smb->usb = devm_power_supply_register(dev, &smb347_usb_desc,
+ &mains_usb_cfg);
+ if (IS_ERR(smb->usb))
return PTR_ERR(smb->usb);
- }
}
- battery_cfg.drv_data = smb;
- smb->battery = power_supply_register(dev, &smb347_battery_desc,
- &battery_cfg);
- if (IS_ERR(smb->battery)) {
- if (smb->pdata->use_usb)
- power_supply_unregister(smb->usb);
- if (smb->pdata->use_mains)
- power_supply_unregister(smb->mains);
- return PTR_ERR(smb->battery);
- }
+ ret = smb347_get_battery_info(smb);
+ if (ret)
+ return ret;
+
+ ret = smb347_hw_init(smb);
+ if (ret < 0)
+ return ret;
/*
* Interrupt pin is optional. If it is connected, we setup the
* interrupt support here.
*/
- if (pdata->irq_gpio >= 0) {
+ if (client->irq) {
ret = smb347_irq_init(smb, client);
if (ret < 0) {
dev_warn(dev, "failed to initialize IRQ: %d\n", ret);
@@ -1299,29 +1357,31 @@ static int smb347_remove(struct i2c_client *client)
{
struct smb347_charger *smb = i2c_get_clientdata(client);
- if (client->irq) {
+ if (client->irq)
smb347_irq_disable(smb);
- free_irq(client->irq, smb);
- gpio_free(smb->pdata->irq_gpio);
- }
-
- power_supply_unregister(smb->battery);
- if (smb->pdata->use_usb)
- power_supply_unregister(smb->usb);
- if (smb->pdata->use_mains)
- power_supply_unregister(smb->mains);
return 0;
}
static const struct i2c_device_id smb347_id[] = {
- { "smb347", 0 },
- { }
+ { "smb345", SMB345 },
+ { "smb347", SMB347 },
+ { "smb358", SMB358 },
+ { },
};
MODULE_DEVICE_TABLE(i2c, smb347_id);
+static const struct of_device_id smb3xx_of_match[] = {
+ { .compatible = "summit,smb345" },
+ { .compatible = "summit,smb347" },
+ { .compatible = "summit,smb358" },
+ { },
+};
+MODULE_DEVICE_TABLE(of, smb3xx_of_match);
+
static struct i2c_driver smb347_driver = {
.driver = {
.name = "smb347",
+ .of_match_table = smb3xx_of_match,
},
.probe = smb347_probe,
.remove = smb347_remove,
diff --git a/drivers/power/supply/test_power.c b/drivers/power/supply/test_power.c
index 04acd76bbaa1..5f510ddc946d 100644
--- a/drivers/power/supply/test_power.c
+++ b/drivers/power/supply/test_power.c
@@ -352,8 +352,8 @@ static int param_set_ac_online(const char *key, const struct kernel_param *kp)
static int param_get_ac_online(char *buffer, const struct kernel_param *kp)
{
- strcpy(buffer, map_get_key(map_ac_online, ac_online, "unknown"));
- return strlen(buffer);
+ return sprintf(buffer, "%s\n",
+ map_get_key(map_ac_online, ac_online, "unknown"));
}
static int param_set_usb_online(const char *key, const struct kernel_param *kp)
@@ -365,8 +365,8 @@ static int param_set_usb_online(const char *key, const struct kernel_param *kp)
static int param_get_usb_online(char *buffer, const struct kernel_param *kp)
{
- strcpy(buffer, map_get_key(map_ac_online, usb_online, "unknown"));
- return strlen(buffer);
+ return sprintf(buffer, "%s\n",
+ map_get_key(map_ac_online, usb_online, "unknown"));
}
static int param_set_battery_status(const char *key,
@@ -379,8 +379,8 @@ static int param_set_battery_status(const char *key,
static int param_get_battery_status(char *buffer, const struct kernel_param *kp)
{
- strcpy(buffer, map_get_key(map_status, battery_status, "unknown"));
- return strlen(buffer);
+ return sprintf(buffer, "%s\n",
+ map_get_key(map_ac_online, battery_status, "unknown"));
}
static int param_set_battery_health(const char *key,
@@ -393,8 +393,8 @@ static int param_set_battery_health(const char *key,
static int param_get_battery_health(char *buffer, const struct kernel_param *kp)
{
- strcpy(buffer, map_get_key(map_health, battery_health, "unknown"));
- return strlen(buffer);
+ return sprintf(buffer, "%s\n",
+ map_get_key(map_ac_online, battery_health, "unknown"));
}
static int param_set_battery_present(const char *key,
@@ -408,8 +408,8 @@ static int param_set_battery_present(const char *key,
static int param_get_battery_present(char *buffer,
const struct kernel_param *kp)
{
- strcpy(buffer, map_get_key(map_present, battery_present, "unknown"));
- return strlen(buffer);
+ return sprintf(buffer, "%s\n",
+ map_get_key(map_ac_online, battery_present, "unknown"));
}
static int param_set_battery_technology(const char *key,
@@ -424,9 +424,9 @@ static int param_set_battery_technology(const char *key,
static int param_get_battery_technology(char *buffer,
const struct kernel_param *kp)
{
- strcpy(buffer,
- map_get_key(map_technology, battery_technology, "unknown"));
- return strlen(buffer);
+ return sprintf(buffer, "%s\n",
+ map_get_key(map_ac_online, battery_technology,
+ "unknown"));
}
static int param_set_battery_capacity(const char *key,
diff --git a/drivers/power/supply/ucs1002_power.c b/drivers/power/supply/ucs1002_power.c
index cdb9a23d825f..ef673ec3db56 100644
--- a/drivers/power/supply/ucs1002_power.c
+++ b/drivers/power/supply/ucs1002_power.c
@@ -38,6 +38,7 @@
/* Interrupt Status */
#define UCS1002_REG_INTERRUPT_STATUS 0x10
+# define F_ERR BIT(7)
# define F_DISCHARGE_ERR BIT(6)
# define F_RESET BIT(5)
# define F_MIN_KEEP_OUT BIT(4)
@@ -103,6 +104,9 @@ struct ucs1002_info {
struct regulator_dev *rdev;
bool present;
bool output_disable;
+ struct delayed_work health_poll;
+ int health;
+
};
static enum power_supply_property ucs1002_props[] = {
@@ -362,32 +366,6 @@ static int ucs1002_get_usb_type(struct ucs1002_info *info,
return 0;
}
-static int ucs1002_get_health(struct ucs1002_info *info,
- union power_supply_propval *val)
-{
- unsigned int reg;
- int ret, health;
-
- ret = regmap_read(info->regmap, UCS1002_REG_INTERRUPT_STATUS, &reg);
- if (ret)
- return ret;
-
- if (reg & F_TSD)
- health = POWER_SUPPLY_HEALTH_OVERHEAT;
- else if (reg & (F_OVER_VOLT | F_BACK_VOLT))
- health = POWER_SUPPLY_HEALTH_OVERVOLTAGE;
- else if (reg & F_OVER_ILIM)
- health = POWER_SUPPLY_HEALTH_OVERCURRENT;
- else if (reg & (F_DISCHARGE_ERR | F_MIN_KEEP_OUT))
- health = POWER_SUPPLY_HEALTH_UNSPEC_FAILURE;
- else
- health = POWER_SUPPLY_HEALTH_GOOD;
-
- val->intval = health;
-
- return 0;
-}
-
static int ucs1002_get_property(struct power_supply *psy,
enum power_supply_property psp,
union power_supply_propval *val)
@@ -406,7 +384,7 @@ static int ucs1002_get_property(struct power_supply *psy,
case POWER_SUPPLY_PROP_USB_TYPE:
return ucs1002_get_usb_type(info, val);
case POWER_SUPPLY_PROP_HEALTH:
- return ucs1002_get_health(info, val);
+ return val->intval = info->health;
case POWER_SUPPLY_PROP_PRESENT:
val->intval = info->present;
return 0;
@@ -458,6 +436,38 @@ static const struct power_supply_desc ucs1002_charger_desc = {
.num_properties = ARRAY_SIZE(ucs1002_props),
};
+static void ucs1002_health_poll(struct work_struct *work)
+{
+ struct ucs1002_info *info = container_of(work, struct ucs1002_info,
+ health_poll.work);
+ int ret;
+ u32 reg;
+
+ ret = regmap_read(info->regmap, UCS1002_REG_INTERRUPT_STATUS, &reg);
+ if (ret)
+ return;
+
+ /* bad health and no status change, just schedule us again in a while */
+ if ((reg & F_ERR) && info->health != POWER_SUPPLY_HEALTH_GOOD) {
+ schedule_delayed_work(&info->health_poll,
+ msecs_to_jiffies(2000));
+ return;
+ }
+
+ if (reg & F_TSD)
+ info->health = POWER_SUPPLY_HEALTH_OVERHEAT;
+ else if (reg & (F_OVER_VOLT | F_BACK_VOLT))
+ info->health = POWER_SUPPLY_HEALTH_OVERVOLTAGE;
+ else if (reg & F_OVER_ILIM)
+ info->health = POWER_SUPPLY_HEALTH_OVERCURRENT;
+ else if (reg & (F_DISCHARGE_ERR | F_MIN_KEEP_OUT))
+ info->health = POWER_SUPPLY_HEALTH_UNSPEC_FAILURE;
+ else
+ info->health = POWER_SUPPLY_HEALTH_GOOD;
+
+ sysfs_notify(&info->charger->dev.kobj, NULL, "health");
+}
+
static irqreturn_t ucs1002_charger_irq(int irq, void *data)
{
int ret, regval;
@@ -484,7 +494,7 @@ static irqreturn_t ucs1002_alert_irq(int irq, void *data)
{
struct ucs1002_info *info = data;
- power_supply_changed(info->charger);
+ mod_delayed_work(system_wq, &info->health_poll, 0);
return IRQ_HANDLED;
}
@@ -632,6 +642,9 @@ static int ucs1002_probe(struct i2c_client *client,
return ret;
}
+ info->health = POWER_SUPPLY_HEALTH_GOOD;
+ INIT_DELAYED_WORK(&info->health_poll, ucs1002_health_poll);
+
if (irq_a_det > 0) {
ret = devm_request_threaded_irq(dev, irq_a_det, NULL,
ucs1002_charger_irq,
@@ -645,10 +658,8 @@ static int ucs1002_probe(struct i2c_client *client,
}
if (irq_alert > 0) {
- ret = devm_request_threaded_irq(dev, irq_alert, NULL,
- ucs1002_alert_irq,
- IRQF_ONESHOT,
- "ucs1002-alert", info);
+ ret = devm_request_irq(dev, irq_alert, ucs1002_alert_irq,
+ 0,"ucs1002-alert", info);
if (ret) {
dev_err(dev, "Failed to request ALERT threaded irq: %d\n",
ret);
diff --git a/drivers/rapidio/devices/rio_mport_cdev.c b/drivers/rapidio/devices/rio_mport_cdev.c
index a30342942e26..94331d999d27 100644
--- a/drivers/rapidio/devices/rio_mport_cdev.c
+++ b/drivers/rapidio/devices/rio_mport_cdev.c
@@ -871,15 +871,16 @@ rio_dma_transfer(struct file *filp, u32 transfer_mode,
rmcd_error("pin_user_pages_fast err=%ld",
pinned);
nr_pages = 0;
- } else
+ } else {
rmcd_error("pinned %ld out of %ld pages",
pinned, nr_pages);
+ /*
+ * Set nr_pages up to mean "how many pages to unpin, in
+ * the error handler:
+ */
+ nr_pages = pinned;
+ }
ret = -EFAULT;
- /*
- * Set nr_pages up to mean "how many pages to unpin, in
- * the error handler:
- */
- nr_pages = pinned;
goto err_pg;
}
@@ -1679,6 +1680,7 @@ static int rio_mport_add_riodev(struct mport_cdev_priv *priv,
struct rio_dev *rdev;
struct rio_switch *rswitch = NULL;
struct rio_mport *mport;
+ struct device *dev;
size_t size;
u32 rval;
u32 swpinfo = 0;
@@ -1693,8 +1695,10 @@ static int rio_mport_add_riodev(struct mport_cdev_priv *priv,
rmcd_debug(RDEV, "name:%s ct:0x%x did:0x%x hc:0x%x", dev_info.name,
dev_info.comptag, dev_info.destid, dev_info.hopcount);
- if (bus_find_device_by_name(&rio_bus_type, NULL, dev_info.name)) {
+ dev = bus_find_device_by_name(&rio_bus_type, NULL, dev_info.name);
+ if (dev) {
rmcd_debug(RDEV, "device %s already exists", dev_info.name);
+ put_device(dev);
return -EEXIST;
}
diff --git a/drivers/s390/char/Makefile b/drivers/s390/char/Makefile
index 845e12ac5954..c6fdb81a068a 100644
--- a/drivers/s390/char/Makefile
+++ b/drivers/s390/char/Makefile
@@ -34,6 +34,8 @@ obj-$(CONFIG_SCLP_VT220_TTY) += sclp_vt220.o
obj-$(CONFIG_PCI) += sclp_pci.o
+obj-$(subst m,y,$(CONFIG_ZCRYPT)) += sclp_ap.o
+
obj-$(CONFIG_VMLOGRDR) += vmlogrdr.o
obj-$(CONFIG_VMCP) += vmcp.o
diff --git a/drivers/s390/char/con3215.c b/drivers/s390/char/con3215.c
index 92757f9bd010..d8acabbb1ed3 100644
--- a/drivers/s390/char/con3215.c
+++ b/drivers/s390/char/con3215.c
@@ -978,7 +978,6 @@ static int tty3215_install(struct tty_driver *driver, struct tty_struct *tty)
static int tty3215_open(struct tty_struct *tty, struct file * filp)
{
struct raw3215_info *raw = tty->driver_data;
- int retval;
tty_port_tty_set(&raw->port, tty);
@@ -986,11 +985,7 @@ static int tty3215_open(struct tty_struct *tty, struct file * filp)
/*
* Start up 3215 device
*/
- retval = raw3215_startup(raw);
- if (retval)
- return retval;
-
- return 0;
+ return raw3215_startup(raw);
}
/*
diff --git a/drivers/s390/char/raw3270.h b/drivers/s390/char/raw3270.h
index 08f36e973b43..8d979e0ee605 100644
--- a/drivers/s390/char/raw3270.h
+++ b/drivers/s390/char/raw3270.h
@@ -110,7 +110,6 @@ struct raw3270_request {
};
struct raw3270_request *raw3270_request_alloc(size_t size);
-struct raw3270_request *raw3270_request_alloc_bootmem(size_t size);
void raw3270_request_free(struct raw3270_request *);
void raw3270_request_reset(struct raw3270_request *);
void raw3270_request_set_cmd(struct raw3270_request *, u8 cmd);
diff --git a/drivers/s390/char/sclp.h b/drivers/s390/char/sclp.h
index 196333013e54..69d9cde9ff5a 100644
--- a/drivers/s390/char/sclp.h
+++ b/drivers/s390/char/sclp.h
@@ -229,7 +229,7 @@ static inline void sclp_fill_core_info(struct sclp_core_info *info,
#define SCLP_HAS_CPU_INFO (sclp.facilities & 0x0800000000000000ULL)
#define SCLP_HAS_CPU_RECONFIG (sclp.facilities & 0x0400000000000000ULL)
#define SCLP_HAS_PCI_RECONFIG (sclp.facilities & 0x0000000040000000ULL)
-
+#define SCLP_HAS_AP_RECONFIG (sclp.facilities & 0x0000000100000000ULL)
struct gds_subvector {
u8 length;
@@ -305,9 +305,7 @@ int sclp_deactivate(void);
int sclp_reactivate(void);
int sclp_sync_request(sclp_cmdw_t command, void *sccb);
int sclp_sync_request_timeout(sclp_cmdw_t command, void *sccb, int timeout);
-
int sclp_sdias_init(void);
-void sclp_sdias_exit(void);
enum {
sclp_init_state_uninitialized,
diff --git a/drivers/s390/char/sclp_ap.c b/drivers/s390/char/sclp_ap.c
new file mode 100644
index 000000000000..0dd1ca712795
--- /dev/null
+++ b/drivers/s390/char/sclp_ap.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * s390 crypto adapter related sclp functions.
+ *
+ * Copyright IBM Corp. 2020
+ */
+#define KMSG_COMPONENT "sclp_cmd"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <asm/sclp.h>
+#include "sclp.h"
+
+#define SCLP_CMDW_CONFIGURE_AP 0x001f0001
+#define SCLP_CMDW_DECONFIGURE_AP 0x001e0001
+
+struct ap_cfg_sccb {
+ struct sccb_header header;
+} __packed;
+
+static int do_ap_configure(sclp_cmdw_t cmd, u32 apid)
+{
+ struct ap_cfg_sccb *sccb;
+ int rc;
+
+ if (!SCLP_HAS_AP_RECONFIG)
+ return -EOPNOTSUPP;
+
+ sccb = (struct ap_cfg_sccb *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
+ if (!sccb)
+ return -ENOMEM;
+
+ sccb->header.length = PAGE_SIZE;
+ cmd |= (apid & 0xFF) << 8;
+ rc = sclp_sync_request(cmd, sccb);
+ if (rc)
+ goto out;
+ switch (sccb->header.response_code) {
+ case 0x0020: case 0x0120: case 0x0440: case 0x0450:
+ break;
+ default:
+ pr_warn("configure AP adapter %u failed: cmd=0x%08x response=0x%04x\n",
+ apid, cmd, sccb->header.response_code);
+ rc = -EIO;
+ break;
+ }
+out:
+ free_page((unsigned long) sccb);
+ return rc;
+}
+
+int sclp_ap_configure(u32 apid)
+{
+ return do_ap_configure(SCLP_CMDW_CONFIGURE_AP, apid);
+}
+EXPORT_SYMBOL(sclp_ap_configure);
+
+int sclp_ap_deconfigure(u32 apid)
+{
+ return do_ap_configure(SCLP_CMDW_DECONFIGURE_AP, apid);
+}
+EXPORT_SYMBOL(sclp_ap_deconfigure);
diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c
index a864b21af602..f6e97f0830f6 100644
--- a/drivers/s390/char/sclp_cmd.c
+++ b/drivers/s390/char/sclp_cmd.c
@@ -406,7 +406,7 @@ static void __init add_memory_merged(u16 rn)
if (!size)
goto skip_add;
for (addr = start; addr < start + size; addr += block_size)
- add_memory(0, addr, block_size);
+ add_memory(0, addr, block_size, MHP_NONE);
skip_add:
first_rn = rn;
num = 1;
diff --git a/drivers/s390/char/sclp_early_core.c b/drivers/s390/char/sclp_early_core.c
index 7737470f8498..a960afa974bf 100644
--- a/drivers/s390/char/sclp_early_core.c
+++ b/drivers/s390/char/sclp_early_core.c
@@ -17,12 +17,12 @@
static struct read_info_sccb __bootdata(sclp_info_sccb);
static int __bootdata(sclp_info_sccb_valid);
char *sclp_early_sccb = (char *) EARLY_SCCB_OFFSET;
-int sclp_init_state __section(.data) = sclp_init_state_uninitialized;
+int sclp_init_state = sclp_init_state_uninitialized;
/*
* Used to keep track of the size of the event masks. Qemu until version 2.11
* only supports 4 and needs a workaround.
*/
-bool sclp_mask_compat_mode __section(.data);
+bool sclp_mask_compat_mode;
void sclp_early_wait_irq(void)
{
@@ -214,11 +214,11 @@ static int sclp_early_setup(int disable, int *have_linemode, int *have_vt220)
* Output one or more lines of text on the SCLP console (VT220 and /
* or line-mode).
*/
-void __sclp_early_printk(const char *str, unsigned int len, unsigned int force)
+void __sclp_early_printk(const char *str, unsigned int len)
{
int have_linemode, have_vt220;
- if (!force && sclp_init_state != sclp_init_state_uninitialized)
+ if (sclp_init_state != sclp_init_state_uninitialized)
return;
if (sclp_early_setup(0, &have_linemode, &have_vt220) != 0)
return;
@@ -231,12 +231,7 @@ void __sclp_early_printk(const char *str, unsigned int len, unsigned int force)
void sclp_early_printk(const char *str)
{
- __sclp_early_printk(str, strlen(str), 0);
-}
-
-void sclp_early_printk_force(const char *str)
-{
- __sclp_early_printk(str, strlen(str), 1);
+ __sclp_early_printk(str, strlen(str));
}
int __init sclp_early_read_info(void)
diff --git a/drivers/s390/char/sclp_rw.c b/drivers/s390/char/sclp_rw.c
index 44594a492553..d6c84e354df5 100644
--- a/drivers/s390/char/sclp_rw.c
+++ b/drivers/s390/char/sclp_rw.c
@@ -337,24 +337,6 @@ sclp_chars_in_buffer(struct sclp_buffer *buffer)
}
/*
- * sets or provides some values that influence the drivers behaviour
- */
-void
-sclp_set_columns(struct sclp_buffer *buffer, unsigned short columns)
-{
- buffer->columns = columns;
- if (buffer->current_line != NULL &&
- buffer->current_length > buffer->columns)
- sclp_finalize_mto(buffer);
-}
-
-void
-sclp_set_htab(struct sclp_buffer *buffer, unsigned short htab)
-{
- buffer->htab = htab;
-}
-
-/*
* called by sclp_console_init and/or sclp_tty_init
*/
int
diff --git a/drivers/s390/char/sclp_rw.h b/drivers/s390/char/sclp_rw.h
index a2eb22f67393..93d706e4935c 100644
--- a/drivers/s390/char/sclp_rw.h
+++ b/drivers/s390/char/sclp_rw.h
@@ -86,8 +86,6 @@ void *sclp_unmake_buffer(struct sclp_buffer *);
int sclp_buffer_space(struct sclp_buffer *);
int sclp_write(struct sclp_buffer *buffer, const unsigned char *, int);
int sclp_emit_buffer(struct sclp_buffer *,void (*)(struct sclp_buffer *,int));
-void sclp_set_columns(struct sclp_buffer *, unsigned short);
-void sclp_set_htab(struct sclp_buffer *, unsigned short);
int sclp_chars_in_buffer(struct sclp_buffer *);
#ifdef CONFIG_SCLP_CONSOLE
diff --git a/drivers/s390/char/sclp_sdias.c b/drivers/s390/char/sclp_sdias.c
index 644b61013679..215d4b4a5ff5 100644
--- a/drivers/s390/char/sclp_sdias.c
+++ b/drivers/s390/char/sclp_sdias.c
@@ -257,7 +257,7 @@ static int __init sclp_sdias_init_async(void)
int __init sclp_sdias_init(void)
{
- if (ipl_info.type != IPL_TYPE_FCP_DUMP)
+ if (!is_ipl_type_dump())
return 0;
sclp_sdias_sccb = (void *) __get_free_page(GFP_KERNEL | GFP_DMA);
BUG_ON(!sclp_sdias_sccb);
@@ -275,9 +275,3 @@ out:
TRACE("init done\n");
return 0;
}
-
-void __exit sclp_sdias_exit(void)
-{
- debug_unregister(sdias_dbf);
- sclp_unregister(&sclp_sdias_register);
-}
diff --git a/drivers/s390/char/tape.h b/drivers/s390/char/tape.h
index 8bec5f9ea92c..e2c60475dfa8 100644
--- a/drivers/s390/char/tape.h
+++ b/drivers/s390/char/tape.h
@@ -238,7 +238,6 @@ extern int tape_do_io(struct tape_device *, struct tape_request *);
extern int tape_do_io_async(struct tape_device *, struct tape_request *);
extern int tape_do_io_interruptible(struct tape_device *, struct tape_request *);
extern int tape_cancel_io(struct tape_device *, struct tape_request *);
-void tape_hotplug_event(struct tape_device *, int major, int action);
static inline int
tape_do_io_free(struct tape_device *device, struct tape_request *request)
@@ -258,8 +257,6 @@ tape_do_io_async_free(struct tape_device *device, struct tape_request *request)
tape_do_io_async(device, request);
}
-extern int tape_oper_handler(int irq, int status);
-extern void tape_noper_handler(int irq, int status);
extern int tape_open(struct tape_device *);
extern int tape_release(struct tape_device *);
extern int tape_mtop(struct tape_device *, int, int);
diff --git a/drivers/s390/char/tape_std.h b/drivers/s390/char/tape_std.h
index 53ec8e2870d4..dcc63ff587f9 100644
--- a/drivers/s390/char/tape_std.h
+++ b/drivers/s390/char/tape_std.h
@@ -101,7 +101,6 @@ struct tape_request *tape_std_read_block(struct tape_device *, size_t);
void tape_std_read_backward(struct tape_device *device,
struct tape_request *request);
struct tape_request *tape_std_write_block(struct tape_device *, size_t);
-void tape_std_check_locate(struct tape_device *, struct tape_request *);
/* Some non-mtop commands. */
int tape_std_assign(struct tape_device *);
@@ -131,19 +130,8 @@ int tape_std_mtunload(struct tape_device *, int);
int tape_std_mtweof(struct tape_device *, int);
/* Event handlers */
-void tape_std_default_handler(struct tape_device *);
-void tape_std_unexpect_uchk_handler(struct tape_device *);
-void tape_std_irq(struct tape_device *);
void tape_std_process_eov(struct tape_device *);
-// the error recovery stuff:
-void tape_std_error_recovery(struct tape_device *);
-void tape_std_error_recovery_has_failed(struct tape_device *,int error_id);
-void tape_std_error_recovery_succeded(struct tape_device *);
-void tape_std_error_recovery_do_retry(struct tape_device *);
-void tape_std_error_recovery_read_opposite(struct tape_device *);
-void tape_std_error_recovery_HWBUG(struct tape_device *, int condno);
-
/* S390 tape types */
enum s390_tape_type {
tape_3480,
diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c
index d29f1b71618e..1515fdc3c1ab 100644
--- a/drivers/s390/char/zcore.c
+++ b/drivers/s390/char/zcore.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-1.0+
/*
* zcore module to export memory content and register sets for creating system
- * dumps on SCSI disks (zfcpdump).
+ * dumps on SCSI/NVMe disks (zfcp/nvme dump).
*
* For more information please refer to Documentation/s390/zfcpdump.rst
*
@@ -243,7 +243,7 @@ static int __init zcore_init(void)
unsigned char arch;
int rc;
- if (ipl_info.type != IPL_TYPE_FCP_DUMP)
+ if (!is_ipl_type_dump())
return -ENODATA;
if (OLDMEM_BASE)
return -ENODATA;
@@ -252,9 +252,16 @@ static int __init zcore_init(void)
debug_register_view(zcore_dbf, &debug_sprintf_view);
debug_set_level(zcore_dbf, 6);
- TRACE("devno: %x\n", ipl_info.data.fcp.dev_id.devno);
- TRACE("wwpn: %llx\n", (unsigned long long) ipl_info.data.fcp.wwpn);
- TRACE("lun: %llx\n", (unsigned long long) ipl_info.data.fcp.lun);
+ if (ipl_info.type == IPL_TYPE_FCP_DUMP) {
+ TRACE("type: fcp\n");
+ TRACE("devno: %x\n", ipl_info.data.fcp.dev_id.devno);
+ TRACE("wwpn: %llx\n", (unsigned long long) ipl_info.data.fcp.wwpn);
+ TRACE("lun: %llx\n", (unsigned long long) ipl_info.data.fcp.lun);
+ } else if (ipl_info.type == IPL_TYPE_NVME_DUMP) {
+ TRACE("type: nvme\n");
+ TRACE("fid: %x\n", ipl_info.data.nvme.fid);
+ TRACE("nsid: %x\n", ipl_info.data.nvme.nsid);
+ }
rc = sclp_sdias_init();
if (rc)
diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index 38017c4a31e9..fc06a4002168 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -1265,6 +1265,27 @@ int chsc_sstpi(void *page, void *result, size_t size)
return (rr->response.code == 0x0001) ? 0 : -EIO;
}
+int chsc_stzi(void *page, void *result, size_t size)
+{
+ struct {
+ struct chsc_header request;
+ unsigned int rsvd0[3];
+ struct chsc_header response;
+ char data[];
+ } *rr;
+ int rc;
+
+ memset(page, 0, PAGE_SIZE);
+ rr = page;
+ rr->request.length = 0x0010;
+ rr->request.code = 0x003e;
+ rc = chsc(rr);
+ if (rc)
+ return -EIO;
+ memcpy(result, &rr->data, size);
+ return (rr->response.code == 0x0001) ? 0 : -EIO;
+}
+
int chsc_siosl(struct subchannel_id schid)
{
struct {
diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c
index 1981eb62d329..cca1a7c4bb33 100644
--- a/drivers/s390/cio/css.c
+++ b/drivers/s390/cio/css.c
@@ -1355,20 +1355,6 @@ static int __init channel_subsystem_init_sync(void)
}
subsys_initcall_sync(channel_subsystem_init_sync);
-void channel_subsystem_reinit(void)
-{
- struct channel_path *chp;
- struct chp_id chpid;
-
- chsc_enable_facility(CHSC_SDA_OC_MSS);
- chp_id_for_each(&chpid) {
- chp = chpid_to_chp(chpid);
- if (chp)
- chp_update_desc(chp);
- }
- cmf_reactivate();
-}
-
#ifdef CONFIG_PROC_FS
static ssize_t cio_settle_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
diff --git a/drivers/s390/cio/device.h b/drivers/s390/cio/device.h
index f5c427ec24b1..853b6a8ca095 100644
--- a/drivers/s390/cio/device.h
+++ b/drivers/s390/cio/device.h
@@ -96,7 +96,6 @@ int ccw_device_online(struct ccw_device *);
int ccw_device_offline(struct ccw_device *);
void ccw_device_update_sense_data(struct ccw_device *);
int ccw_device_test_sense_data(struct ccw_device *);
-void ccw_device_schedule_sch_unregister(struct ccw_device *);
int ccw_purge_blacklisted(void);
void ccw_device_sched_todo(struct ccw_device *cdev, enum cdev_todo todo);
struct ccw_device *get_ccwdev_by_dev_id(struct ccw_dev_id *dev_id);
diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index 4fab8bba2cdd..f9a31c7819ae 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -531,26 +531,6 @@ static inline int qdio_inbound_q_done(struct qdio_q *q, unsigned int start)
return 1;
}
-static inline void qdio_handle_aobs(struct qdio_q *q, int start, int count)
-{
- unsigned char state = 0;
- int j, b = start;
-
- for (j = 0; j < count; ++j) {
- get_buf_state(q, b, &state, 0);
- if (state == SLSB_P_OUTPUT_PENDING) {
- struct qaob *aob = q->u.out.aobs[b];
- if (aob == NULL)
- continue;
-
- q->u.out.sbal_state[b].flags |=
- QDIO_OUTBUF_STATE_FLAG_PENDING;
- q->u.out.aobs[b] = NULL;
- }
- b = next_buf(b);
- }
-}
-
static inline unsigned long qdio_aob_for_buffer(struct qdio_output_q *q,
int bufnr)
{
@@ -640,6 +620,19 @@ void qdio_inbound_processing(unsigned long data)
__qdio_inbound_processing(q);
}
+static void qdio_check_pending(struct qdio_q *q, unsigned int index)
+{
+ unsigned char state;
+
+ if (get_buf_state(q, index, &state, 0) > 0 &&
+ state == SLSB_P_OUTPUT_PENDING &&
+ q->u.out.aobs[index]) {
+ q->u.out.sbal_state[index].flags |=
+ QDIO_OUTBUF_STATE_FLAG_PENDING;
+ q->u.out.aobs[index] = NULL;
+ }
+}
+
static int get_outbound_buffer_frontier(struct qdio_q *q, unsigned int start)
{
unsigned char state = 0;
@@ -712,8 +705,13 @@ static inline int qdio_outbound_q_moved(struct qdio_q *q, unsigned int start)
if (count) {
DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "out moved:%1d", q->nr);
- if (q->u.out.use_cq)
- qdio_handle_aobs(q, start, count);
+
+ if (q->u.out.use_cq) {
+ unsigned int i;
+
+ for (i = 0; i < count; i++)
+ qdio_check_pending(q, QDIO_BUFNR(start + i));
+ }
}
return count;
@@ -1221,7 +1219,6 @@ static void qdio_trace_init_data(struct qdio_irq *irq,
struct qdio_initialize *data)
{
DBF_DEV_EVENT(DBF_ERR, irq, "qfmt:%1u", data->q_format);
- DBF_DEV_HEX(irq, data->adapter_name, 8, DBF_ERR);
DBF_DEV_EVENT(DBF_ERR, irq, "qpff%4x", data->qib_param_field_format);
DBF_DEV_HEX(irq, &data->qib_param_field, sizeof(void *), DBF_ERR);
DBF_DEV_HEX(irq, &data->input_slib_elements, sizeof(void *), DBF_ERR);
diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c
index 2c5cc6ec668e..a5b2e16b7aa8 100644
--- a/drivers/s390/cio/qdio_setup.c
+++ b/drivers/s390/cio/qdio_setup.c
@@ -9,6 +9,8 @@
#include <linux/slab.h>
#include <linux/export.h>
#include <linux/io.h>
+
+#include <asm/ebcdic.h>
#include <asm/qdio.h>
#include "cio.h"
@@ -403,28 +405,22 @@ void qdio_free_async_data(struct qdio_irq *irq_ptr)
}
}
-static void __qdio_allocate_fill_qdr(struct qdio_irq *irq_ptr,
- struct qdio_q **irq_ptr_qs,
- int i, int nr)
+static void qdio_fill_qdr_desc(struct qdesfmt0 *desc, struct qdio_q *queue)
{
- irq_ptr->qdr->qdf0[i + nr].sliba =
- (unsigned long)irq_ptr_qs[i]->slib;
-
- irq_ptr->qdr->qdf0[i + nr].sla =
- (unsigned long)irq_ptr_qs[i]->sl;
-
- irq_ptr->qdr->qdf0[i + nr].slsba =
- (unsigned long)&irq_ptr_qs[i]->slsb.val[0];
-
- irq_ptr->qdr->qdf0[i + nr].akey = PAGE_DEFAULT_KEY >> 4;
- irq_ptr->qdr->qdf0[i + nr].bkey = PAGE_DEFAULT_KEY >> 4;
- irq_ptr->qdr->qdf0[i + nr].ckey = PAGE_DEFAULT_KEY >> 4;
- irq_ptr->qdr->qdf0[i + nr].dkey = PAGE_DEFAULT_KEY >> 4;
+ desc->sliba = virt_to_phys(queue->slib);
+ desc->sla = virt_to_phys(queue->sl);
+ desc->slsba = virt_to_phys(&queue->slsb);
+
+ desc->akey = PAGE_DEFAULT_KEY >> 4;
+ desc->bkey = PAGE_DEFAULT_KEY >> 4;
+ desc->ckey = PAGE_DEFAULT_KEY >> 4;
+ desc->dkey = PAGE_DEFAULT_KEY >> 4;
}
static void setup_qdr(struct qdio_irq *irq_ptr,
struct qdio_initialize *qdio_init)
{
+ struct qdesfmt0 *desc = &irq_ptr->qdr->qdf0[0];
int i;
irq_ptr->qdr->qfmt = qdio_init->q_format;
@@ -433,15 +429,14 @@ static void setup_qdr(struct qdio_irq *irq_ptr,
irq_ptr->qdr->oqdcnt = qdio_init->no_output_qs;
irq_ptr->qdr->iqdsz = sizeof(struct qdesfmt0) / 4; /* size in words */
irq_ptr->qdr->oqdsz = sizeof(struct qdesfmt0) / 4;
- irq_ptr->qdr->qiba = (unsigned long)&irq_ptr->qib;
+ irq_ptr->qdr->qiba = virt_to_phys(&irq_ptr->qib);
irq_ptr->qdr->qkey = PAGE_DEFAULT_KEY >> 4;
for (i = 0; i < qdio_init->no_input_qs; i++)
- __qdio_allocate_fill_qdr(irq_ptr, irq_ptr->input_qs, i, 0);
+ qdio_fill_qdr_desc(desc++, irq_ptr->input_qs[i]);
for (i = 0; i < qdio_init->no_output_qs; i++)
- __qdio_allocate_fill_qdr(irq_ptr, irq_ptr->output_qs, i,
- qdio_init->no_input_qs);
+ qdio_fill_qdr_desc(desc++, irq_ptr->output_qs[i]);
}
static void setup_qib(struct qdio_irq *irq_ptr,
@@ -459,7 +454,8 @@ static void setup_qib(struct qdio_irq *irq_ptr,
if (init_data->no_output_qs)
irq_ptr->qib.osliba =
(unsigned long)(irq_ptr->output_qs[0]->slib);
- memcpy(irq_ptr->qib.ebcnam, init_data->adapter_name, 8);
+ memcpy(irq_ptr->qib.ebcnam, dev_name(&irq_ptr->cdev->dev), 8);
+ ASCEBC(irq_ptr->qib.ebcnam, 8);
}
int qdio_setup_irq(struct qdio_irq *irq_ptr, struct qdio_initialize *init_data)
diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index 24a1940b829e..485cbfcbf06e 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -214,7 +214,7 @@ static inline int ap_fetch_qci_info(struct ap_config_info *info)
static void __init ap_init_qci_info(void)
{
if (!ap_qci_available()) {
- AP_DBF(DBF_INFO, "%s QCI not supported\n", __func__);
+ AP_DBF_INFO("%s QCI not supported\n", __func__);
return;
}
@@ -226,18 +226,18 @@ static void __init ap_init_qci_info(void)
ap_qci_info = NULL;
return;
}
- AP_DBF(DBF_INFO, "%s successful fetched initial qci info\n", __func__);
+ AP_DBF_INFO("%s successful fetched initial qci info\n", __func__);
if (ap_qci_info->apxa) {
if (ap_qci_info->Na) {
ap_max_adapter_id = ap_qci_info->Na;
- AP_DBF(DBF_INFO, "%s new ap_max_adapter_id is %d\n",
- __func__, ap_max_adapter_id);
+ AP_DBF_INFO("%s new ap_max_adapter_id is %d\n",
+ __func__, ap_max_adapter_id);
}
if (ap_qci_info->Nd) {
ap_max_domain_id = ap_qci_info->Nd;
- AP_DBF(DBF_INFO, "%s new ap_max_domain_id is %d\n",
- __func__, ap_max_domain_id);
+ AP_DBF_INFO("%s new ap_max_domain_id is %d\n",
+ __func__, ap_max_domain_id);
}
}
}
@@ -307,7 +307,7 @@ EXPORT_SYMBOL(ap_test_config_ctrl_domain);
* false otherwise.
*/
static bool ap_queue_info(ap_qid_t qid, int *q_type,
- unsigned int *q_fac, int *q_depth)
+ unsigned int *q_fac, int *q_depth, bool *q_decfg)
{
struct ap_queue_status status;
unsigned long info = 0;
@@ -322,6 +322,9 @@ static bool ap_queue_info(ap_qid_t qid, int *q_type,
switch (status.response_code) {
case AP_RESPONSE_NORMAL:
case AP_RESPONSE_RESET_IN_PROGRESS:
+ case AP_RESPONSE_DECONFIGURED:
+ case AP_RESPONSE_CHECKSTOPPED:
+ case AP_RESPONSE_BUSY:
/*
* According to the architecture in all these cases the
* info should be filled. All bits 0 is not possible as
@@ -332,6 +335,7 @@ static bool ap_queue_info(ap_qid_t qid, int *q_type,
*q_type = (int)((info >> 24) & 0xff);
*q_fac = (unsigned int)(info >> 32);
*q_depth = (int)(info & 0xff);
+ *q_decfg = status.response_code == AP_RESPONSE_DECONFIGURED;
switch (*q_type) {
/* For CEX2 and CEX3 the available functions
* are not reflected by the facilities bits.
@@ -618,8 +622,8 @@ static int __ap_revise_reserved(struct device *dev, void *dummy)
drvres = to_ap_drv(dev->driver)->flags
& AP_DRIVER_FLAG_DEFAULT;
if (!!devres != !!drvres) {
- AP_DBF(DBF_DEBUG, "reprobing queue=%02x.%04x\n",
- card, queue);
+ AP_DBF_DBG("reprobing queue=%02x.%04x\n",
+ card, queue);
rc = device_reprobe(dev);
}
}
@@ -796,7 +800,7 @@ EXPORT_SYMBOL(ap_bus_force_rescan);
*/
void ap_bus_cfg_chg(void)
{
- AP_DBF(DBF_INFO, "%s config change, forcing bus rescan\n", __func__);
+ AP_DBF_DBG("%s config change, forcing bus rescan\n", __func__);
ap_bus_force_rescan();
}
@@ -947,7 +951,7 @@ static ssize_t ap_domain_store(struct bus_type *bus,
ap_domain_index = domain;
spin_unlock_bh(&ap_domain_lock);
- AP_DBF(DBF_INFO, "stored new default domain=%d\n", domain);
+ AP_DBF_INFO("stored new default domain=%d\n", domain);
return count;
}
@@ -1208,8 +1212,8 @@ static void ap_select_domain(void)
}
if (dom <= ap_max_domain_id) {
ap_domain_index = dom;
- AP_DBF(DBF_DEBUG, "%s new default domain is %d\n",
- __func__, ap_domain_index);
+ AP_DBF_INFO("%s new default domain is %d\n",
+ __func__, ap_domain_index);
}
out:
spin_unlock_bh(&ap_domain_lock);
@@ -1225,8 +1229,11 @@ static int ap_get_compatible_type(ap_qid_t qid, int rawtype, unsigned int func)
int comp_type = 0;
/* < CEX2A is not supported */
- if (rawtype < AP_DEVICE_TYPE_CEX2A)
+ if (rawtype < AP_DEVICE_TYPE_CEX2A) {
+ AP_DBF_WARN("get_comp_type queue=%02x.%04x unsupported type %d\n",
+ AP_QID_CARD(qid), AP_QID_QUEUE(qid), rawtype);
return 0;
+ }
/* up to CEX7 known and fully supported */
if (rawtype <= AP_DEVICE_TYPE_CEX7)
return rawtype;
@@ -1248,11 +1255,12 @@ static int ap_get_compatible_type(ap_qid_t qid, int rawtype, unsigned int func)
comp_type = apinfo.cat;
}
if (!comp_type)
- AP_DBF(DBF_WARN, "queue=%02x.%04x unable to map type %d\n",
- AP_QID_CARD(qid), AP_QID_QUEUE(qid), rawtype);
+ AP_DBF_WARN("get_comp_type queue=%02x.%04x unable to map type %d\n",
+ AP_QID_CARD(qid), AP_QID_QUEUE(qid), rawtype);
else if (comp_type != rawtype)
- AP_DBF(DBF_INFO, "queue=%02x.%04x map type %d to %d\n",
- AP_QID_CARD(qid), AP_QID_QUEUE(qid), rawtype, comp_type);
+ AP_DBF_INFO("get_comp_type queue=%02x.%04x map type %d to %d\n",
+ AP_QID_CARD(qid), AP_QID_QUEUE(qid),
+ rawtype, comp_type);
return comp_type;
}
@@ -1286,155 +1294,278 @@ static int __match_queue_device_with_queue_id(struct device *dev, const void *da
/*
* Helper function for ap_scan_bus().
- * Does the scan bus job for the given adapter id.
+ * Remove card device and associated queue devices.
*/
-static void _ap_scan_bus_adapter(int id)
+static inline void ap_scan_rm_card_dev_and_queue_devs(struct ap_card *ac)
{
- bool broken;
+ bus_for_each_dev(&ap_bus_type, NULL,
+ (void *)(long) ac->id,
+ __ap_queue_devices_with_id_unregister);
+ device_unregister(&ac->ap_dev.device);
+}
+
+/*
+ * Helper function for ap_scan_bus().
+ * Does the scan bus job for all the domains within
+ * a valid adapter given by an ap_card ptr.
+ */
+static inline void ap_scan_domains(struct ap_card *ac)
+{
+ bool decfg;
ap_qid_t qid;
unsigned int func;
- struct ap_card *ac;
struct device *dev;
struct ap_queue *aq;
+ int rc, dom, depth, type;
+
+ /*
+ * Go through the configuration for the domains and compare them
+ * to the existing queue devices. Also take care of the config
+ * and error state for the queue devices.
+ */
+
+ for (dom = 0; dom <= ap_max_domain_id; dom++) {
+ qid = AP_MKQID(ac->id, dom);
+ dev = bus_find_device(&ap_bus_type, NULL,
+ (void *)(long) qid,
+ __match_queue_device_with_qid);
+ aq = dev ? to_ap_queue(dev) : NULL;
+ if (!ap_test_config_usage_domain(dom)) {
+ if (dev) {
+ AP_DBF_INFO("%s(%d,%d) not in config any more, rm queue device\n",
+ __func__, ac->id, dom);
+ device_unregister(dev);
+ put_device(dev);
+ }
+ continue;
+ }
+ /* domain is valid, get info from this APQN */
+ if (!ap_queue_info(qid, &type, &func, &depth, &decfg)) {
+ if (aq) {
+ AP_DBF_INFO(
+ "%s(%d,%d) ap_queue_info() not successful, rm queue device\n",
+ __func__, ac->id, dom);
+ device_unregister(dev);
+ put_device(dev);
+ }
+ continue;
+ }
+ /* if no queue device exists, create a new one */
+ if (!aq) {
+ aq = ap_queue_create(qid, ac->ap_dev.device_type);
+ if (!aq) {
+ AP_DBF_WARN("%s(%d,%d) ap_queue_create() failed\n",
+ __func__, ac->id, dom);
+ continue;
+ }
+ aq->card = ac;
+ aq->config = !decfg;
+ dev = &aq->ap_dev.device;
+ dev->bus = &ap_bus_type;
+ dev->parent = &ac->ap_dev.device;
+ dev_set_name(dev, "%02x.%04x", ac->id, dom);
+ /* register queue device */
+ rc = device_register(dev);
+ if (rc) {
+ AP_DBF_WARN("%s(%d,%d) device_register() failed\n",
+ __func__, ac->id, dom);
+ goto put_dev_and_continue;
+ }
+ if (decfg)
+ AP_DBF_INFO("%s(%d,%d) new (decfg) queue device created\n",
+ __func__, ac->id, dom);
+ else
+ AP_DBF_INFO("%s(%d,%d) new queue device created\n",
+ __func__, ac->id, dom);
+ goto put_dev_and_continue;
+ }
+ /* Check config state on the already existing queue device */
+ spin_lock_bh(&aq->lock);
+ if (decfg && aq->config) {
+ /* config off this queue device */
+ aq->config = false;
+ if (aq->dev_state > AP_DEV_STATE_UNINITIATED) {
+ aq->dev_state = AP_DEV_STATE_ERROR;
+ aq->last_err_rc = AP_RESPONSE_DECONFIGURED;
+ }
+ spin_unlock_bh(&aq->lock);
+ AP_DBF_INFO("%s(%d,%d) queue device config off\n",
+ __func__, ac->id, dom);
+ /* 'receive' pending messages with -EAGAIN */
+ ap_flush_queue(aq);
+ goto put_dev_and_continue;
+ }
+ if (!decfg && !aq->config) {
+ /* config on this queue device */
+ aq->config = true;
+ if (aq->dev_state > AP_DEV_STATE_UNINITIATED) {
+ aq->dev_state = AP_DEV_STATE_OPERATING;
+ aq->sm_state = AP_SM_STATE_RESET_START;
+ }
+ spin_unlock_bh(&aq->lock);
+ AP_DBF_INFO("%s(%d,%d) queue device config on\n",
+ __func__, ac->id, dom);
+ goto put_dev_and_continue;
+ }
+ /* handle other error states */
+ if (!decfg && aq->dev_state == AP_DEV_STATE_ERROR) {
+ spin_unlock_bh(&aq->lock);
+ /* 'receive' pending messages with -EAGAIN */
+ ap_flush_queue(aq);
+ /* re-init (with reset) the queue device */
+ ap_queue_init_state(aq);
+ AP_DBF_INFO("%s(%d,%d) queue device reinit enforced\n",
+ __func__, ac->id, dom);
+ goto put_dev_and_continue;
+ }
+ spin_unlock_bh(&aq->lock);
+put_dev_and_continue:
+ put_device(dev);
+ }
+}
+
+/*
+ * Helper function for ap_scan_bus().
+ * Does the scan bus job for the given adapter id.
+ */
+static inline void ap_scan_adapter(int ap)
+{
+ bool decfg;
+ ap_qid_t qid;
+ unsigned int func;
+ struct device *dev;
+ struct ap_card *ac;
int rc, dom, depth, type, comp_type;
- /* check if there is a card device registered with this id */
+ /* Is there currently a card device for this adapter ? */
dev = bus_find_device(&ap_bus_type, NULL,
- (void *)(long) id,
+ (void *)(long) ap,
__match_card_device_with_id);
ac = dev ? to_ap_card(dev) : NULL;
- if (!ap_test_config_card_id(id)) {
- if (dev) {
- /* Card device has been removed from configuration */
- bus_for_each_dev(&ap_bus_type, NULL,
- (void *)(long) id,
- __ap_queue_devices_with_id_unregister);
- device_unregister(dev);
+
+ /* Adapter not in configuration ? */
+ if (!ap_test_config_card_id(ap)) {
+ if (ac) {
+ AP_DBF_INFO("%s(%d) ap not in config any more, rm card and queue devices\n",
+ __func__, ap);
+ ap_scan_rm_card_dev_and_queue_devs(ac);
put_device(dev);
}
return;
}
/*
- * This card id is enabled in the configuration. If we already have
- * a card device with this id, check if type and functions are still
- * the very same. Also verify that at least one queue is available.
+ * Adapter ap is valid in the current configuration. So do some checks:
+ * If no card device exists, build one. If a card device exists, check
+ * for type and functions changed. For all this we need to find a valid
+ * APQN first.
*/
- if (ac) {
- /* find the first valid queue */
- for (dom = 0; dom < AP_DOMAINS; dom++) {
- qid = AP_MKQID(id, dom);
- if (ap_queue_info(qid, &type, &func, &depth))
+
+ for (dom = 0; dom <= ap_max_domain_id; dom++)
+ if (ap_test_config_usage_domain(dom)) {
+ qid = AP_MKQID(ap, dom);
+ if (ap_queue_info(qid, &type, &func, &depth, &decfg))
break;
}
- broken = false;
- if (dom >= AP_DOMAINS) {
- /* no accessible queue on this card */
- broken = true;
- } else if (ac->raw_hwtype != type) {
- /* card type has changed */
- AP_DBF(DBF_INFO, "card=%02x type changed.\n", id);
- broken = true;
- } else if (ac->functions != func) {
- /* card functions have changed */
- AP_DBF(DBF_INFO, "card=%02x functions changed.\n", id);
- broken = true;
+ if (dom > ap_max_domain_id) {
+ /* Could not find a valid APQN for this adapter */
+ if (ac) {
+ AP_DBF_INFO(
+ "%s(%d) no type info (no APQN found), rm card and queue devices\n",
+ __func__, ap);
+ ap_scan_rm_card_dev_and_queue_devs(ac);
+ put_device(dev);
+ } else {
+ AP_DBF_DBG("%s(%d) no type info (no APQN found), ignored\n",
+ __func__, ap);
}
- if (broken) {
- /* unregister card device and associated queues */
- bus_for_each_dev(&ap_bus_type, NULL,
- (void *)(long) id,
- __ap_queue_devices_with_id_unregister);
- device_unregister(dev);
+ return;
+ }
+ if (!type) {
+ /* No apdater type info available, an unusable adapter */
+ if (ac) {
+ AP_DBF_INFO("%s(%d) no valid type (0) info, rm card and queue devices\n",
+ __func__, ap);
+ ap_scan_rm_card_dev_and_queue_devs(ac);
put_device(dev);
- /* go back if there is no valid queue on this card */
- if (dom >= AP_DOMAINS)
- return;
- ac = NULL;
+ } else {
+ AP_DBF_DBG("%s(%d) no valid type (0) info, ignored\n",
+ __func__, ap);
}
+ return;
}
- /*
- * Go through all possible queue ids. Check and maybe create or release
- * queue devices for this card. If there exists no card device yet,
- * create a card device also.
- */
- for (dom = 0; dom < AP_DOMAINS; dom++) {
- qid = AP_MKQID(id, dom);
- dev = bus_find_device(&ap_bus_type, NULL,
- (void *)(long) qid,
- __match_queue_device_with_qid);
- aq = dev ? to_ap_queue(dev) : NULL;
- if (!ap_test_config_usage_domain(dom)) {
- if (dev) {
- /* Queue device exists but has been
- * removed from configuration.
- */
- device_unregister(dev);
- put_device(dev);
- }
- continue;
- }
- /* try to fetch infos about this queue */
- broken = !ap_queue_info(qid, &type, &func, &depth);
- if (dev) {
- if (!broken) {
- spin_lock_bh(&aq->lock);
- broken = aq->sm_state == AP_SM_STATE_BORKED;
- spin_unlock_bh(&aq->lock);
+ if (ac) {
+ /* Check APQN against existing card device for changes */
+ if (ac->raw_hwtype != type) {
+ AP_DBF_INFO("%s(%d) hwtype %d changed, rm card and queue devices\n",
+ __func__, ap, type);
+ ap_scan_rm_card_dev_and_queue_devs(ac);
+ put_device(dev);
+ ac = NULL;
+ } else if (ac->functions != func) {
+ AP_DBF_INFO("%s(%d) functions 0x%08x changed, rm card and queue devices\n",
+ __func__, ap, type);
+ ap_scan_rm_card_dev_and_queue_devs(ac);
+ put_device(dev);
+ ac = NULL;
+ } else {
+ if (decfg && ac->config) {
+ ac->config = false;
+ AP_DBF_INFO("%s(%d) card device config off\n",
+ __func__, ap);
+
}
- if (broken) {
- /* Remove broken device */
- AP_DBF(DBF_DEBUG,
- "removing broken queue=%02x.%04x\n",
- id, dom);
- device_unregister(dev);
+ if (!decfg && !ac->config) {
+ ac->config = true;
+ AP_DBF_INFO("%s(%d) card device config on\n",
+ __func__, ap);
}
- put_device(dev);
- continue;
}
- if (broken)
- continue;
- /* a new queue device is needed, check out comp type */
+ }
+
+ if (!ac) {
+ /* Build a new card device */
comp_type = ap_get_compatible_type(qid, type, func);
- if (!comp_type)
- continue;
- /* maybe a card device needs to be created first */
+ if (!comp_type) {
+ AP_DBF_WARN("%s(%d) type %d, can't get compatibility type\n",
+ __func__, ap, type);
+ return;
+ }
+ ac = ap_card_create(ap, depth, type, comp_type, func);
if (!ac) {
- ac = ap_card_create(id, depth, type, comp_type, func);
- if (!ac)
- continue;
- ac->ap_dev.device.bus = &ap_bus_type;
- ac->ap_dev.device.parent = ap_root_device;
- dev_set_name(&ac->ap_dev.device, "card%02x", id);
- /* Register card device with AP bus */
- rc = device_register(&ac->ap_dev.device);
- if (rc) {
- put_device(&ac->ap_dev.device);
- ac = NULL;
- break;
- }
- /* get it and thus adjust reference counter */
- get_device(&ac->ap_dev.device);
+ AP_DBF_WARN("%s(%d) ap_card_create() failed\n",
+ __func__, ap);
+ return;
}
- /* now create the new queue device */
- aq = ap_queue_create(qid, comp_type);
- if (!aq)
- continue;
- aq->card = ac;
- aq->ap_dev.device.bus = &ap_bus_type;
- aq->ap_dev.device.parent = &ac->ap_dev.device;
- dev_set_name(&aq->ap_dev.device, "%02x.%04x", id, dom);
- /* Register queue device */
- rc = device_register(&aq->ap_dev.device);
+ ac->config = !decfg;
+ dev = &ac->ap_dev.device;
+ dev->bus = &ap_bus_type;
+ dev->parent = ap_root_device;
+ dev_set_name(dev, "card%02x", ap);
+ /* Register the new card device with AP bus */
+ rc = device_register(dev);
if (rc) {
- put_device(&aq->ap_dev.device);
- continue;
+ AP_DBF_WARN("%s(%d) device_register() failed\n",
+ __func__, ap);
+ put_device(dev);
+ return;
}
- } /* end domain loop */
+ /* get it and thus adjust reference counter */
+ get_device(dev);
+ if (decfg)
+ AP_DBF_INFO("%s(%d) new (decfg) card device type=%d func=0x%08x created\n",
+ __func__, ap, type, func);
+ else
+ AP_DBF_INFO("%s(%d) new card device type=%d func=0x%08x created\n",
+ __func__, ap, type, func);
+ }
+
+ /* Verify the domains and the queue devices for this card */
+ ap_scan_domains(ac);
- if (ac)
- put_device(&ac->ap_dev.device);
+ /* release the card device */
+ put_device(&ac->ap_dev.device);
}
/**
@@ -1443,16 +1574,16 @@ static void _ap_scan_bus_adapter(int id)
*/
static void ap_scan_bus(struct work_struct *unused)
{
- int id;
+ int ap;
ap_fetch_qci_info(ap_qci_info);
ap_select_domain();
- AP_DBF(DBF_DEBUG, "%s running\n", __func__);
+ AP_DBF_DBG("%s running\n", __func__);
/* loop over all possible adapters */
- for (id = 0; id < AP_DEVICES; id++)
- _ap_scan_bus_adapter(id);
+ for (ap = 0; ap <= ap_max_adapter_id; ap++)
+ ap_scan_adapter(ap);
/* check if there is at least one queue available with default domain */
if (ap_domain_index >= 0) {
@@ -1463,9 +1594,8 @@ static void ap_scan_bus(struct work_struct *unused)
if (dev)
put_device(dev);
else
- AP_DBF(DBF_INFO,
- "no queue device with default domain %d available\n",
- ap_domain_index);
+ AP_DBF_INFO("no queue device with default domain %d available\n",
+ ap_domain_index);
}
mod_timer(&ap_config_timer, jiffies + ap_config_time * HZ);
@@ -1575,7 +1705,6 @@ static int __init ap_module_init(void)
*/
if (MACHINE_IS_VM)
poll_timeout = 1500000;
- spin_lock_init(&ap_poll_timer_lock);
hrtimer_init(&ap_poll_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
ap_poll_timer.function = ap_poll_timeout;
diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h
index 1ea046324e8f..5029b80132aa 100644
--- a/drivers/s390/crypto/ap_bus.h
+++ b/drivers/s390/crypto/ap_bus.h
@@ -50,6 +50,7 @@ static inline int ap_test_bit(unsigned int *ptr, unsigned int nr)
#define AP_RESPONSE_NO_FIRST_PART 0x13
#define AP_RESPONSE_MESSAGE_TOO_BIG 0x15
#define AP_RESPONSE_REQ_FAC_NOT_INST 0x16
+#define AP_RESPONSE_INVALID_DOMAIN 0x42
/*
* Known device types
@@ -86,15 +87,12 @@ static inline int ap_test_bit(unsigned int *ptr, unsigned int nr)
* AP queue state machine states
*/
enum ap_sm_state {
- AP_SM_STATE_RESET_START,
+ AP_SM_STATE_RESET_START = 0,
AP_SM_STATE_RESET_WAIT,
AP_SM_STATE_SETIRQ_WAIT,
AP_SM_STATE_IDLE,
AP_SM_STATE_WORKING,
AP_SM_STATE_QUEUE_FULL,
- AP_SM_STATE_REMOVE, /* about to be removed from driver */
- AP_SM_STATE_UNBOUND, /* momentary not bound to a driver */
- AP_SM_STATE_BORKED, /* broken */
NR_AP_SM_STATES
};
@@ -118,6 +116,17 @@ enum ap_sm_wait {
NR_AP_SM_WAIT
};
+/*
+ * AP queue device states
+ */
+enum ap_dev_state {
+ AP_DEV_STATE_UNINITIATED = 0, /* fresh and virgin, not touched */
+ AP_DEV_STATE_OPERATING, /* queue dev is working normal */
+ AP_DEV_STATE_SHUTDOWN, /* remove/unbind/shutdown in progress */
+ AP_DEV_STATE_ERROR, /* device is in error state */
+ NR_AP_DEV_STATES
+};
+
struct ap_device;
struct ap_message;
@@ -158,6 +167,7 @@ struct ap_card {
unsigned int functions; /* AP device function bitfield. */
int queue_depth; /* AP queue depth.*/
int id; /* AP card number. */
+ bool config; /* configured state */
atomic64_t total_request_count; /* # requests ever for this AP device.*/
};
@@ -169,10 +179,11 @@ struct ap_queue {
struct ap_card *card; /* Ptr to assoc. AP card. */
spinlock_t lock; /* Per device lock. */
void *private; /* ap driver private pointer. */
+ enum ap_dev_state dev_state; /* queue device state */
+ bool config; /* configured state */
ap_qid_t qid; /* AP queue id. */
int interrupt; /* indicate if interrupts are enabled */
int queue_count; /* # messages currently on AP queue. */
- enum ap_sm_state sm_state; /* ap queue state machine state */
int pendingq_count; /* # requests on pendingq list. */
int requestq_count; /* # requests on requestq list. */
u64 total_request_count; /* # requests ever for this AP device.*/
@@ -181,18 +192,45 @@ struct ap_queue {
struct list_head pendingq; /* List of message sent to AP queue. */
struct list_head requestq; /* List of message yet to be sent. */
struct ap_message *reply; /* Per device reply message. */
+ enum ap_sm_state sm_state; /* ap queue state machine state */
+ int last_err_rc; /* last error state response code */
};
#define to_ap_queue(x) container_of((x), struct ap_queue, ap_dev.device)
typedef enum ap_sm_wait (ap_func_t)(struct ap_queue *queue);
+/* failure injection cmd struct */
+struct ap_fi {
+ union {
+ u16 cmd; /* fi flags + action */
+ struct {
+ u8 flags; /* fi flags only */
+ u8 action; /* fi action only */
+ };
+ };
+};
+
+/* all currently known fi actions */
+enum ap_fi_actions {
+ AP_FI_ACTION_CCA_AGENT_FF = 0x01,
+ AP_FI_ACTION_CCA_DOM_INVAL = 0x02,
+ AP_FI_ACTION_NQAP_QID_INVAL = 0x03,
+};
+
+/* all currently known fi flags */
+enum ap_fi_flags {
+ AP_FI_FLAG_NO_RETRY = 0x01,
+ AP_FI_FLAG_TOGGLE_SPECIAL = 0x02,
+};
+
struct ap_message {
struct list_head list; /* Request queueing. */
unsigned long long psmid; /* Message id. */
void *msg; /* Pointer to message buffer. */
unsigned int len; /* Message length. */
- u32 flags; /* Flags, see AP_MSG_FLAG_xxx */
+ u16 flags; /* Flags, see AP_MSG_FLAG_xxx */
+ struct ap_fi fi; /* Failure Injection cmd */
int rc; /* Return code for this message */
void *private; /* ap driver private pointer. */
/* receive is called from tasklet context */
@@ -200,7 +238,7 @@ struct ap_message {
struct ap_message *);
};
-#define AP_MSG_FLAG_SPECIAL (1 << 16) /* flag msg as 'special' with NQAP */
+#define AP_MSG_FLAG_SPECIAL 1 /* flag msg as 'special' with NQAP */
/**
* ap_init_message() - Initialize ap_message.
@@ -234,7 +272,7 @@ int ap_recv(ap_qid_t, unsigned long long *, void *, size_t);
enum ap_sm_wait ap_sm_event(struct ap_queue *aq, enum ap_sm_event event);
enum ap_sm_wait ap_sm_event_loop(struct ap_queue *aq, enum ap_sm_event event);
-void ap_queue_message(struct ap_queue *aq, struct ap_message *ap_msg);
+int ap_queue_message(struct ap_queue *aq, struct ap_message *ap_msg);
void ap_cancel_message(struct ap_queue *aq, struct ap_message *ap_msg);
void ap_flush_queue(struct ap_queue *aq);
diff --git a/drivers/s390/crypto/ap_card.c b/drivers/s390/crypto/ap_card.c
index 6588713319ba..d98bdd28d23e 100644
--- a/drivers/s390/crypto/ap_card.c
+++ b/drivers/s390/crypto/ap_card.c
@@ -12,6 +12,7 @@
#include <linux/init.h>
#include <linux/slab.h>
#include <asm/facility.h>
+#include <asm/sclp.h>
#include "ap_bus.h"
@@ -139,6 +140,38 @@ static ssize_t modalias_show(struct device *dev,
static DEVICE_ATTR_RO(modalias);
+static ssize_t config_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct ap_card *ac = to_ap_card(dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%d\n", ac->config ? 1 : 0);
+}
+
+static ssize_t config_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ int rc = 0, cfg;
+ struct ap_card *ac = to_ap_card(dev);
+
+ if (sscanf(buf, "%d\n", &cfg) != 1 || cfg < 0 || cfg > 1)
+ return -EINVAL;
+
+ if (cfg && !ac->config)
+ rc = sclp_ap_configure(ac->id);
+ else if (!cfg && ac->config)
+ rc = sclp_ap_deconfigure(ac->id);
+ if (rc)
+ return rc;
+
+ ac->config = cfg ? true : false;
+
+ return count;
+}
+
+static DEVICE_ATTR_RW(config);
+
static struct attribute *ap_card_dev_attrs[] = {
&dev_attr_hwtype.attr,
&dev_attr_raw_hwtype.attr,
@@ -148,6 +181,7 @@ static struct attribute *ap_card_dev_attrs[] = {
&dev_attr_requestq_count.attr,
&dev_attr_pendingq_count.attr,
&dev_attr_modalias.attr,
+ &dev_attr_config.attr,
NULL
};
diff --git a/drivers/s390/crypto/ap_debug.h b/drivers/s390/crypto/ap_debug.h
index dc675eb5aef6..34b0350d0b1a 100644
--- a/drivers/s390/crypto/ap_debug.h
+++ b/drivers/s390/crypto/ap_debug.h
@@ -20,6 +20,14 @@
#define AP_DBF(...) \
debug_sprintf_event(ap_dbf_info, ##__VA_ARGS__)
+#define AP_DBF_ERR(...) \
+ debug_sprintf_event(ap_dbf_info, DBF_ERR, ##__VA_ARGS__)
+#define AP_DBF_WARN(...) \
+ debug_sprintf_event(ap_dbf_info, DBF_WARN, ##__VA_ARGS__)
+#define AP_DBF_INFO(...) \
+ debug_sprintf_event(ap_dbf_info, DBF_INFO, ##__VA_ARGS__)
+#define AP_DBF_DBG(...) \
+ debug_sprintf_event(ap_dbf_info, DBF_DEBUG, ##__VA_ARGS__)
extern debug_info_t *ap_dbf_info;
diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c
index 688ebebbf98c..ecefc25eff0c 100644
--- a/drivers/s390/crypto/ap_queue.c
+++ b/drivers/s390/crypto/ap_queue.c
@@ -195,7 +195,11 @@ static enum ap_sm_wait ap_sm_read(struct ap_queue *aq)
aq->sm_state = AP_SM_STATE_IDLE;
return AP_SM_WAIT_NONE;
default:
- aq->sm_state = AP_SM_STATE_BORKED;
+ aq->dev_state = AP_DEV_STATE_ERROR;
+ aq->last_err_rc = status.response_code;
+ AP_DBF_WARN("%s RC 0x%02x on 0x%02x.%04x -> AP_DEV_STATE_ERROR\n",
+ __func__, status.response_code,
+ AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid));
return AP_SM_WAIT_NONE;
}
}
@@ -210,12 +214,20 @@ static enum ap_sm_wait ap_sm_write(struct ap_queue *aq)
{
struct ap_queue_status status;
struct ap_message *ap_msg;
+ ap_qid_t qid = aq->qid;
if (aq->requestq_count <= 0)
return AP_SM_WAIT_NONE;
/* Start the next request on the queue. */
ap_msg = list_entry(aq->requestq.next, struct ap_message, list);
- status = __ap_send(aq->qid, ap_msg->psmid,
+#ifdef CONFIG_ZCRYPT_DEBUG
+ if (ap_msg->fi.action == AP_FI_ACTION_NQAP_QID_INVAL) {
+ AP_DBF_WARN("%s fi cmd 0x%04x: forcing invalid qid 0xFF00\n",
+ __func__, ap_msg->fi.cmd);
+ qid = 0xFF00;
+ }
+#endif
+ status = __ap_send(qid, ap_msg->psmid,
ap_msg->msg, ap_msg->len,
ap_msg->flags & AP_MSG_FLAG_SPECIAL);
switch (status.response_code) {
@@ -237,6 +249,9 @@ static enum ap_sm_wait ap_sm_write(struct ap_queue *aq)
case AP_RESPONSE_RESET_IN_PROGRESS:
aq->sm_state = AP_SM_STATE_RESET_WAIT;
return AP_SM_WAIT_TIMEOUT;
+ case AP_RESPONSE_INVALID_DOMAIN:
+ AP_DBF(DBF_WARN, "AP_RESPONSE_INVALID_DOMAIN on NQAP\n");
+ fallthrough;
case AP_RESPONSE_MESSAGE_TOO_BIG:
case AP_RESPONSE_REQ_FAC_NOT_INST:
list_del_init(&ap_msg->list);
@@ -245,7 +260,11 @@ static enum ap_sm_wait ap_sm_write(struct ap_queue *aq)
ap_msg->receive(aq, ap_msg, NULL);
return AP_SM_WAIT_AGAIN;
default:
- aq->sm_state = AP_SM_STATE_BORKED;
+ aq->dev_state = AP_DEV_STATE_ERROR;
+ aq->last_err_rc = status.response_code;
+ AP_DBF_WARN("%s RC 0x%02x on 0x%02x.%04x -> AP_DEV_STATE_ERROR\n",
+ __func__, status.response_code,
+ AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid));
return AP_SM_WAIT_NONE;
}
}
@@ -278,13 +297,12 @@ static enum ap_sm_wait ap_sm_reset(struct ap_queue *aq)
aq->sm_state = AP_SM_STATE_RESET_WAIT;
aq->interrupt = AP_INTR_DISABLED;
return AP_SM_WAIT_TIMEOUT;
- case AP_RESPONSE_BUSY:
- return AP_SM_WAIT_TIMEOUT;
- case AP_RESPONSE_Q_NOT_AVAIL:
- case AP_RESPONSE_DECONFIGURED:
- case AP_RESPONSE_CHECKSTOPPED:
default:
- aq->sm_state = AP_SM_STATE_BORKED;
+ aq->dev_state = AP_DEV_STATE_ERROR;
+ aq->last_err_rc = status.response_code;
+ AP_DBF_WARN("%s RC 0x%02x on 0x%02x.%04x -> AP_DEV_STATE_ERROR\n",
+ __func__, status.response_code,
+ AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid));
return AP_SM_WAIT_NONE;
}
}
@@ -323,7 +341,11 @@ static enum ap_sm_wait ap_sm_reset_wait(struct ap_queue *aq)
case AP_RESPONSE_DECONFIGURED:
case AP_RESPONSE_CHECKSTOPPED:
default:
- aq->sm_state = AP_SM_STATE_BORKED;
+ aq->dev_state = AP_DEV_STATE_ERROR;
+ aq->last_err_rc = status.response_code;
+ AP_DBF_WARN("%s RC 0x%02x on 0x%02x.%04x -> AP_DEV_STATE_ERROR\n",
+ __func__, status.response_code,
+ AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid));
return AP_SM_WAIT_NONE;
}
}
@@ -360,7 +382,11 @@ static enum ap_sm_wait ap_sm_setirq_wait(struct ap_queue *aq)
case AP_RESPONSE_NO_PENDING_REPLY:
return AP_SM_WAIT_TIMEOUT;
default:
- aq->sm_state = AP_SM_STATE_BORKED;
+ aq->dev_state = AP_DEV_STATE_ERROR;
+ aq->last_err_rc = status.response_code;
+ AP_DBF_WARN("%s RC 0x%02x on 0x%02x.%04x -> AP_DEV_STATE_ERROR\n",
+ __func__, status.response_code,
+ AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid));
return AP_SM_WAIT_NONE;
}
}
@@ -393,23 +419,14 @@ static ap_func_t *ap_jumptable[NR_AP_SM_STATES][NR_AP_SM_EVENTS] = {
[AP_SM_EVENT_POLL] = ap_sm_read,
[AP_SM_EVENT_TIMEOUT] = ap_sm_reset,
},
- [AP_SM_STATE_REMOVE] = {
- [AP_SM_EVENT_POLL] = ap_sm_nop,
- [AP_SM_EVENT_TIMEOUT] = ap_sm_nop,
- },
- [AP_SM_STATE_UNBOUND] = {
- [AP_SM_EVENT_POLL] = ap_sm_nop,
- [AP_SM_EVENT_TIMEOUT] = ap_sm_nop,
- },
- [AP_SM_STATE_BORKED] = {
- [AP_SM_EVENT_POLL] = ap_sm_nop,
- [AP_SM_EVENT_TIMEOUT] = ap_sm_nop,
- },
};
enum ap_sm_wait ap_sm_event(struct ap_queue *aq, enum ap_sm_event event)
{
- return ap_jumptable[aq->sm_state][event](aq);
+ if (aq->dev_state > AP_DEV_STATE_UNINITIATED)
+ return ap_jumptable[aq->sm_state][event](aq);
+ else
+ return AP_SM_WAIT_NONE;
}
enum ap_sm_wait ap_sm_event_loop(struct ap_queue *aq, enum ap_sm_event event)
@@ -429,12 +446,20 @@ static ssize_t request_count_show(struct device *dev,
char *buf)
{
struct ap_queue *aq = to_ap_queue(dev);
+ bool valid = false;
u64 req_cnt;
spin_lock_bh(&aq->lock);
- req_cnt = aq->total_request_count;
+ if (aq->dev_state > AP_DEV_STATE_UNINITIATED) {
+ req_cnt = aq->total_request_count;
+ valid = true;
+ }
spin_unlock_bh(&aq->lock);
- return scnprintf(buf, PAGE_SIZE, "%llu\n", req_cnt);
+
+ if (valid)
+ return scnprintf(buf, PAGE_SIZE, "%llu\n", req_cnt);
+ else
+ return scnprintf(buf, PAGE_SIZE, "-\n");
}
static ssize_t request_count_store(struct device *dev,
@@ -459,7 +484,8 @@ static ssize_t requestq_count_show(struct device *dev,
unsigned int reqq_cnt = 0;
spin_lock_bh(&aq->lock);
- reqq_cnt = aq->requestq_count;
+ if (aq->dev_state > AP_DEV_STATE_UNINITIATED)
+ reqq_cnt = aq->requestq_count;
spin_unlock_bh(&aq->lock);
return scnprintf(buf, PAGE_SIZE, "%d\n", reqq_cnt);
}
@@ -473,7 +499,8 @@ static ssize_t pendingq_count_show(struct device *dev,
unsigned int penq_cnt = 0;
spin_lock_bh(&aq->lock);
- penq_cnt = aq->pendingq_count;
+ if (aq->dev_state > AP_DEV_STATE_UNINITIATED)
+ penq_cnt = aq->pendingq_count;
spin_unlock_bh(&aq->lock);
return scnprintf(buf, PAGE_SIZE, "%d\n", penq_cnt);
}
@@ -542,12 +569,138 @@ static ssize_t interrupt_show(struct device *dev,
static DEVICE_ATTR_RO(interrupt);
+static ssize_t config_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct ap_queue *aq = to_ap_queue(dev);
+ int rc;
+
+ spin_lock_bh(&aq->lock);
+ rc = scnprintf(buf, PAGE_SIZE, "%d\n", aq->config ? 1 : 0);
+ spin_unlock_bh(&aq->lock);
+ return rc;
+}
+
+static DEVICE_ATTR_RO(config);
+
+#ifdef CONFIG_ZCRYPT_DEBUG
+static ssize_t states_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct ap_queue *aq = to_ap_queue(dev);
+ int rc = 0;
+
+ spin_lock_bh(&aq->lock);
+ /* queue device state */
+ switch (aq->dev_state) {
+ case AP_DEV_STATE_UNINITIATED:
+ rc = scnprintf(buf, PAGE_SIZE, "UNINITIATED\n");
+ break;
+ case AP_DEV_STATE_OPERATING:
+ rc = scnprintf(buf, PAGE_SIZE, "OPERATING");
+ break;
+ case AP_DEV_STATE_SHUTDOWN:
+ rc = scnprintf(buf, PAGE_SIZE, "SHUTDOWN");
+ break;
+ case AP_DEV_STATE_ERROR:
+ rc = scnprintf(buf, PAGE_SIZE, "ERROR");
+ break;
+ default:
+ rc = scnprintf(buf, PAGE_SIZE, "UNKNOWN");
+ }
+ /* state machine state */
+ if (aq->dev_state) {
+ switch (aq->sm_state) {
+ case AP_SM_STATE_RESET_START:
+ rc += scnprintf(buf + rc, PAGE_SIZE - rc,
+ " [RESET_START]\n");
+ break;
+ case AP_SM_STATE_RESET_WAIT:
+ rc += scnprintf(buf + rc, PAGE_SIZE - rc,
+ " [RESET_WAIT]\n");
+ break;
+ case AP_SM_STATE_SETIRQ_WAIT:
+ rc += scnprintf(buf + rc, PAGE_SIZE - rc,
+ " [SETIRQ_WAIT]\n");
+ break;
+ case AP_SM_STATE_IDLE:
+ rc += scnprintf(buf + rc, PAGE_SIZE - rc,
+ " [IDLE]\n");
+ break;
+ case AP_SM_STATE_WORKING:
+ rc += scnprintf(buf + rc, PAGE_SIZE - rc,
+ " [WORKING]\n");
+ break;
+ case AP_SM_STATE_QUEUE_FULL:
+ rc += scnprintf(buf + rc, PAGE_SIZE - rc,
+ " [FULL]\n");
+ break;
+ default:
+ rc += scnprintf(buf + rc, PAGE_SIZE - rc,
+ " [UNKNOWN]\n");
+ }
+ }
+ spin_unlock_bh(&aq->lock);
+
+ return rc;
+}
+static DEVICE_ATTR_RO(states);
+
+static ssize_t last_err_rc_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct ap_queue *aq = to_ap_queue(dev);
+ int rc;
+
+ spin_lock_bh(&aq->lock);
+ rc = aq->last_err_rc;
+ spin_unlock_bh(&aq->lock);
+
+ switch (rc) {
+ case AP_RESPONSE_NORMAL:
+ return scnprintf(buf, PAGE_SIZE, "NORMAL\n");
+ case AP_RESPONSE_Q_NOT_AVAIL:
+ return scnprintf(buf, PAGE_SIZE, "Q_NOT_AVAIL\n");
+ case AP_RESPONSE_RESET_IN_PROGRESS:
+ return scnprintf(buf, PAGE_SIZE, "RESET_IN_PROGRESS\n");
+ case AP_RESPONSE_DECONFIGURED:
+ return scnprintf(buf, PAGE_SIZE, "DECONFIGURED\n");
+ case AP_RESPONSE_CHECKSTOPPED:
+ return scnprintf(buf, PAGE_SIZE, "CHECKSTOPPED\n");
+ case AP_RESPONSE_BUSY:
+ return scnprintf(buf, PAGE_SIZE, "BUSY\n");
+ case AP_RESPONSE_INVALID_ADDRESS:
+ return scnprintf(buf, PAGE_SIZE, "INVALID_ADDRESS\n");
+ case AP_RESPONSE_OTHERWISE_CHANGED:
+ return scnprintf(buf, PAGE_SIZE, "OTHERWISE_CHANGED\n");
+ case AP_RESPONSE_Q_FULL:
+ return scnprintf(buf, PAGE_SIZE, "Q_FULL/NO_PENDING_REPLY\n");
+ case AP_RESPONSE_INDEX_TOO_BIG:
+ return scnprintf(buf, PAGE_SIZE, "INDEX_TOO_BIG\n");
+ case AP_RESPONSE_NO_FIRST_PART:
+ return scnprintf(buf, PAGE_SIZE, "NO_FIRST_PART\n");
+ case AP_RESPONSE_MESSAGE_TOO_BIG:
+ return scnprintf(buf, PAGE_SIZE, "MESSAGE_TOO_BIG\n");
+ case AP_RESPONSE_REQ_FAC_NOT_INST:
+ return scnprintf(buf, PAGE_SIZE, "REQ_FAC_NOT_INST\n");
+ default:
+ return scnprintf(buf, PAGE_SIZE, "response code %d\n", rc);
+ }
+}
+static DEVICE_ATTR_RO(last_err_rc);
+#endif
+
static struct attribute *ap_queue_dev_attrs[] = {
&dev_attr_request_count.attr,
&dev_attr_requestq_count.attr,
&dev_attr_pendingq_count.attr,
&dev_attr_reset.attr,
&dev_attr_interrupt.attr,
+ &dev_attr_config.attr,
+#ifdef CONFIG_ZCRYPT_DEBUG
+ &dev_attr_states.attr,
+ &dev_attr_last_err_rc.attr,
+#endif
NULL
};
@@ -587,7 +740,6 @@ struct ap_queue *ap_queue_create(ap_qid_t qid, int device_type)
aq->ap_dev.device.type = &ap_queue_type;
aq->ap_dev.device_type = device_type;
aq->qid = qid;
- aq->sm_state = AP_SM_STATE_UNBOUND;
aq->interrupt = AP_INTR_DISABLED;
spin_lock_init(&aq->lock);
INIT_LIST_HEAD(&aq->pendingq);
@@ -612,22 +764,30 @@ EXPORT_SYMBOL(ap_queue_init_reply);
* @aq: The AP device to queue the message to
* @ap_msg: The message that is to be added
*/
-void ap_queue_message(struct ap_queue *aq, struct ap_message *ap_msg)
+int ap_queue_message(struct ap_queue *aq, struct ap_message *ap_msg)
{
- /* For asynchronous message handling a valid receive-callback
- * is required.
- */
+ int rc = 0;
+
+ /* msg needs to have a valid receive-callback */
BUG_ON(!ap_msg->receive);
spin_lock_bh(&aq->lock);
- /* Queue the message. */
- list_add_tail(&ap_msg->list, &aq->requestq);
- aq->requestq_count++;
- aq->total_request_count++;
- atomic64_inc(&aq->card->total_request_count);
+
+ /* only allow to queue new messages if device state is ok */
+ if (aq->dev_state == AP_DEV_STATE_OPERATING) {
+ list_add_tail(&ap_msg->list, &aq->requestq);
+ aq->requestq_count++;
+ aq->total_request_count++;
+ atomic64_inc(&aq->card->total_request_count);
+ } else
+ rc = -ENODEV;
+
/* Send/receive as many request from the queue as possible. */
ap_wait(ap_sm_event_loop(aq, AP_SM_EVENT_POLL));
+
spin_unlock_bh(&aq->lock);
+
+ return rc;
}
EXPORT_SYMBOL(ap_queue_message);
@@ -698,8 +858,8 @@ void ap_queue_prepare_remove(struct ap_queue *aq)
spin_lock_bh(&aq->lock);
/* flush queue */
__ap_flush_queue(aq);
- /* set REMOVE state to prevent new messages are queued in */
- aq->sm_state = AP_SM_STATE_REMOVE;
+ /* move queue device state to SHUTDOWN in progress */
+ aq->dev_state = AP_DEV_STATE_SHUTDOWN;
spin_unlock_bh(&aq->lock);
del_timer_sync(&aq->timeout);
}
@@ -707,21 +867,21 @@ void ap_queue_prepare_remove(struct ap_queue *aq)
void ap_queue_remove(struct ap_queue *aq)
{
/*
- * all messages have been flushed and the state is
- * AP_SM_STATE_REMOVE. Now reset with zero which also
- * clears the irq registration and move the state
- * to AP_SM_STATE_UNBOUND to signal that this queue
- * is not used by any driver currently.
+ * all messages have been flushed and the device state
+ * is SHUTDOWN. Now reset with zero which also clears
+ * the irq registration and move the device state
+ * to the initial value AP_DEV_STATE_UNINITIATED.
*/
spin_lock_bh(&aq->lock);
ap_zapq(aq->qid);
- aq->sm_state = AP_SM_STATE_UNBOUND;
+ aq->dev_state = AP_DEV_STATE_UNINITIATED;
spin_unlock_bh(&aq->lock);
}
void ap_queue_init_state(struct ap_queue *aq)
{
spin_lock_bh(&aq->lock);
+ aq->dev_state = AP_DEV_STATE_OPERATING;
aq->sm_state = AP_SM_STATE_RESET_START;
ap_wait(ap_sm_event(aq, AP_SM_EVENT_POLL));
spin_unlock_bh(&aq->lock);
diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c
index 5896e5282a4e..99cb60ea663d 100644
--- a/drivers/s390/crypto/pkey_api.c
+++ b/drivers/s390/crypto/pkey_api.c
@@ -31,8 +31,9 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("IBM Corporation");
MODULE_DESCRIPTION("s390 protected key interface");
-#define KEYBLOBBUFSIZE 8192 /* key buffer size used for internal processing */
-#define MAXAPQNSINLIST 64 /* max 64 apqns within a apqn list */
+#define KEYBLOBBUFSIZE 8192 /* key buffer size used for internal processing */
+#define PROTKEYBLOBBUFSIZE 256 /* protected key buffer size used internal */
+#define MAXAPQNSINLIST 64 /* max 64 apqns within a apqn list */
/* mask of available pckmo subfunctions, fetched once at module init */
static cpacf_mask_t pckmo_functions;
@@ -237,8 +238,9 @@ static int pkey_ep11key2pkey(const u8 *key, struct pkey_protkey *pkey)
for (rc = -ENODEV, i = 0; i < nr_apqns; i++) {
card = apqns[i] >> 16;
dom = apqns[i] & 0xFFFF;
- rc = ep11_key2protkey(card, dom, key, kb->head.len,
- pkey->protkey, &pkey->len, &pkey->type);
+ pkey->len = sizeof(pkey->protkey);
+ rc = ep11_kblob2protkey(card, dom, key, kb->head.len,
+ pkey->protkey, &pkey->len, &pkey->type);
if (rc == 0)
break;
}
@@ -449,15 +451,21 @@ static int pkey_nonccatok2pkey(const u8 *key, u32 keylen,
break;
}
case TOKVER_EP11_AES: {
- if (keylen < MINEP11AESKEYBLOBSIZE)
- goto out;
/* check ep11 key for exportable as protected key */
- rc = ep11_check_aeskeyblob(debug_info, 3, key, 0, 1);
+ rc = ep11_check_aes_key(debug_info, 3, key, keylen, 1);
if (rc)
goto out;
rc = pkey_ep11key2pkey(key, protkey);
break;
}
+ case TOKVER_EP11_AES_WITH_HEADER:
+ /* check ep11 key with header for exportable as protected key */
+ rc = ep11_check_aes_key_with_hdr(debug_info, 3, key, keylen, 1);
+ if (rc)
+ goto out;
+ rc = pkey_ep11key2pkey(key + sizeof(struct ep11kblob_header),
+ protkey);
+ break;
default:
DEBUG_ERR("%s unknown/unsupported non-CCA token version %d\n",
__func__, hdr->version);
@@ -661,13 +669,14 @@ static int pkey_verifykey2(const u8 *key, size_t keylen,
*ksize = (enum pkey_key_size) t->bitsize;
rc = cca_findcard2(&_apqns, &_nr_apqns, *cardnr, *domain,
- ZCRYPT_CEX3C, t->mkvp, 0, 1);
+ ZCRYPT_CEX3C, AES_MK_SET, t->mkvp, 0, 1);
if (rc == 0 && flags)
*flags = PKEY_FLAGS_MATCH_CUR_MKVP;
if (rc == -ENODEV) {
rc = cca_findcard2(&_apqns, &_nr_apqns,
*cardnr, *domain,
- ZCRYPT_CEX3C, 0, t->mkvp, 1);
+ ZCRYPT_CEX3C, AES_MK_SET,
+ 0, t->mkvp, 1);
if (rc == 0 && flags)
*flags = PKEY_FLAGS_MATCH_ALT_MKVP;
}
@@ -697,13 +706,14 @@ static int pkey_verifykey2(const u8 *key, size_t keylen,
}
rc = cca_findcard2(&_apqns, &_nr_apqns, *cardnr, *domain,
- ZCRYPT_CEX6, t->mkvp0, 0, 1);
+ ZCRYPT_CEX6, AES_MK_SET, t->mkvp0, 0, 1);
if (rc == 0 && flags)
*flags = PKEY_FLAGS_MATCH_CUR_MKVP;
if (rc == -ENODEV) {
rc = cca_findcard2(&_apqns, &_nr_apqns,
*cardnr, *domain,
- ZCRYPT_CEX6, 0, t->mkvp0, 1);
+ ZCRYPT_CEX6, AES_MK_SET,
+ 0, t->mkvp0, 1);
if (rc == 0 && flags)
*flags = PKEY_FLAGS_MATCH_ALT_MKVP;
}
@@ -717,7 +727,7 @@ static int pkey_verifykey2(const u8 *key, size_t keylen,
&& hdr->version == TOKVER_EP11_AES) {
struct ep11keyblob *kb = (struct ep11keyblob *)key;
- rc = ep11_check_aeskeyblob(debug_info, 3, key, 0, 1);
+ rc = ep11_check_aes_key(debug_info, 3, key, keylen, 1);
if (rc)
goto out;
if (ktype)
@@ -778,7 +788,7 @@ static int pkey_keyblob2pkey2(const struct pkey_apqn *apqns, size_t nr_apqns,
if (hdr->version == TOKVER_EP11_AES) {
if (keylen < sizeof(struct ep11keyblob))
return -EINVAL;
- if (ep11_check_aeskeyblob(debug_info, 3, key, 0, 1))
+ if (ep11_check_aes_key(debug_info, 3, key, keylen, 1))
return -EINVAL;
} else {
return pkey_nonccatok2pkey(key, keylen, pkey);
@@ -804,9 +814,10 @@ static int pkey_keyblob2pkey2(const struct pkey_apqn *apqns, size_t nr_apqns,
else { /* EP11 AES secure key blob */
struct ep11keyblob *kb = (struct ep11keyblob *) key;
- rc = ep11_key2protkey(card, dom, key, kb->head.len,
- pkey->protkey, &pkey->len,
- &pkey->type);
+ pkey->len = sizeof(pkey->protkey);
+ rc = ep11_kblob2protkey(card, dom, key, kb->head.len,
+ pkey->protkey, &pkey->len,
+ &pkey->type);
}
if (rc == 0)
break;
@@ -825,7 +836,27 @@ static int pkey_apqns4key(const u8 *key, size_t keylen, u32 flags,
if (keylen < sizeof(struct keytoken_header) || flags == 0)
return -EINVAL;
- if (hdr->type == TOKTYPE_NON_CCA && hdr->version == TOKVER_EP11_AES) {
+ if (hdr->type == TOKTYPE_NON_CCA
+ && (hdr->version == TOKVER_EP11_AES_WITH_HEADER
+ || hdr->version == TOKVER_EP11_ECC_WITH_HEADER)
+ && is_ep11_keyblob(key + sizeof(struct ep11kblob_header))) {
+ int minhwtype = 0, api = 0;
+ struct ep11keyblob *kb = (struct ep11keyblob *)
+ (key + sizeof(struct ep11kblob_header));
+
+ if (flags != PKEY_FLAGS_MATCH_CUR_MKVP)
+ return -EINVAL;
+ if (kb->attr & EP11_BLOB_PKEY_EXTRACTABLE) {
+ minhwtype = ZCRYPT_CEX7;
+ api = EP11_API_V;
+ }
+ rc = ep11_findcard2(&_apqns, &_nr_apqns, 0xFFFF, 0xFFFF,
+ minhwtype, api, kb->wkvp);
+ if (rc)
+ goto out;
+ } else if (hdr->type == TOKTYPE_NON_CCA
+ && hdr->version == TOKVER_EP11_AES
+ && is_ep11_keyblob(key)) {
int minhwtype = 0, api = 0;
struct ep11keyblob *kb = (struct ep11keyblob *) key;
@@ -863,7 +894,26 @@ static int pkey_apqns4key(const u8 *key, size_t keylen, u32 flags,
return -EINVAL;
}
rc = cca_findcard2(&_apqns, &_nr_apqns, 0xFFFF, 0xFFFF,
- minhwtype, cur_mkvp, old_mkvp, 1);
+ minhwtype, AES_MK_SET,
+ cur_mkvp, old_mkvp, 1);
+ if (rc)
+ goto out;
+ } else if (hdr->type == TOKTYPE_CCA_INTERNAL_PKA) {
+ u64 cur_mkvp = 0, old_mkvp = 0;
+ struct eccprivkeytoken *t = (struct eccprivkeytoken *)key;
+
+ if (t->secid == 0x20) {
+ if (flags & PKEY_FLAGS_MATCH_CUR_MKVP)
+ cur_mkvp = t->mkvp;
+ if (flags & PKEY_FLAGS_MATCH_ALT_MKVP)
+ old_mkvp = t->mkvp;
+ } else {
+ /* unknown cca internal 2 token type */
+ return -EINVAL;
+ }
+ rc = cca_findcard2(&_apqns, &_nr_apqns, 0xFFFF, 0xFFFF,
+ ZCRYPT_CEX7, APKA_MK_SET,
+ cur_mkvp, old_mkvp, 1);
if (rc)
goto out;
} else
@@ -900,10 +950,26 @@ static int pkey_apqns4keytype(enum pkey_key_type ktype,
if (ktype == PKEY_TYPE_CCA_CIPHER)
minhwtype = ZCRYPT_CEX6;
rc = cca_findcard2(&_apqns, &_nr_apqns, 0xFFFF, 0xFFFF,
- minhwtype, cur_mkvp, old_mkvp, 1);
+ minhwtype, AES_MK_SET,
+ cur_mkvp, old_mkvp, 1);
if (rc)
goto out;
- } else if (ktype == PKEY_TYPE_EP11) {
+ } else if (ktype == PKEY_TYPE_CCA_ECC) {
+ u64 cur_mkvp = 0, old_mkvp = 0;
+
+ if (flags & PKEY_FLAGS_MATCH_CUR_MKVP)
+ cur_mkvp = *((u64 *) cur_mkvp);
+ if (flags & PKEY_FLAGS_MATCH_ALT_MKVP)
+ old_mkvp = *((u64 *) alt_mkvp);
+ rc = cca_findcard2(&_apqns, &_nr_apqns, 0xFFFF, 0xFFFF,
+ ZCRYPT_CEX7, APKA_MK_SET,
+ cur_mkvp, old_mkvp, 1);
+ if (rc)
+ goto out;
+
+ } else if (ktype == PKEY_TYPE_EP11 ||
+ ktype == PKEY_TYPE_EP11_AES ||
+ ktype == PKEY_TYPE_EP11_ECC) {
u8 *wkvp = NULL;
if (flags & PKEY_FLAGS_MATCH_CUR_MKVP)
@@ -929,6 +995,111 @@ out:
return rc;
}
+static int pkey_keyblob2pkey3(const struct pkey_apqn *apqns, size_t nr_apqns,
+ const u8 *key, size_t keylen, u32 *protkeytype,
+ u8 *protkey, u32 *protkeylen)
+{
+ int i, card, dom, rc;
+ struct keytoken_header *hdr = (struct keytoken_header *)key;
+
+ /* check for at least one apqn given */
+ if (!apqns || !nr_apqns)
+ return -EINVAL;
+
+ if (keylen < sizeof(struct keytoken_header))
+ return -EINVAL;
+
+ if (hdr->type == TOKTYPE_NON_CCA
+ && hdr->version == TOKVER_EP11_AES_WITH_HEADER
+ && is_ep11_keyblob(key + sizeof(struct ep11kblob_header))) {
+ /* EP11 AES key blob with header */
+ if (ep11_check_aes_key_with_hdr(debug_info, 3, key, keylen, 1))
+ return -EINVAL;
+ } else if (hdr->type == TOKTYPE_NON_CCA
+ && hdr->version == TOKVER_EP11_ECC_WITH_HEADER
+ && is_ep11_keyblob(key + sizeof(struct ep11kblob_header))) {
+ /* EP11 ECC key blob with header */
+ if (ep11_check_ecc_key_with_hdr(debug_info, 3, key, keylen, 1))
+ return -EINVAL;
+ } else if (hdr->type == TOKTYPE_NON_CCA
+ && hdr->version == TOKVER_EP11_AES
+ && is_ep11_keyblob(key)) {
+ /* EP11 AES key blob with header in session field */
+ if (ep11_check_aes_key(debug_info, 3, key, keylen, 1))
+ return -EINVAL;
+ } else if (hdr->type == TOKTYPE_CCA_INTERNAL) {
+ if (hdr->version == TOKVER_CCA_AES) {
+ /* CCA AES data key */
+ if (keylen != sizeof(struct secaeskeytoken))
+ return -EINVAL;
+ if (cca_check_secaeskeytoken(debug_info, 3, key, 0))
+ return -EINVAL;
+ } else if (hdr->version == TOKVER_CCA_VLSC) {
+ /* CCA AES cipher key */
+ if (keylen < hdr->len || keylen > MAXCCAVLSCTOKENSIZE)
+ return -EINVAL;
+ if (cca_check_secaescipherkey(debug_info, 3, key, 0, 1))
+ return -EINVAL;
+ } else {
+ DEBUG_ERR("%s unknown CCA internal token version %d\n",
+ __func__, hdr->version);
+ return -EINVAL;
+ }
+ } else if (hdr->type == TOKTYPE_CCA_INTERNAL_PKA) {
+ /* CCA ECC (private) key */
+ if (keylen < sizeof(struct eccprivkeytoken))
+ return -EINVAL;
+ if (cca_check_sececckeytoken(debug_info, 3, key, keylen, 1))
+ return -EINVAL;
+ } else if (hdr->type == TOKTYPE_NON_CCA) {
+ struct pkey_protkey pkey;
+
+ rc = pkey_nonccatok2pkey(key, keylen, &pkey);
+ if (rc)
+ return rc;
+ memcpy(protkey, pkey.protkey, pkey.len);
+ *protkeylen = pkey.len;
+ *protkeytype = pkey.type;
+ return 0;
+ } else {
+ DEBUG_ERR("%s unknown/unsupported blob type %d\n",
+ __func__, hdr->type);
+ return -EINVAL;
+ }
+
+ /* simple try all apqns from the list */
+ for (rc = -ENODEV, i = 0; rc && i < nr_apqns; i++) {
+ card = apqns[i].card;
+ dom = apqns[i].domain;
+ if (hdr->type == TOKTYPE_NON_CCA
+ && (hdr->version == TOKVER_EP11_AES_WITH_HEADER
+ || hdr->version == TOKVER_EP11_ECC_WITH_HEADER)
+ && is_ep11_keyblob(key + sizeof(struct ep11kblob_header)))
+ rc = ep11_kblob2protkey(card, dom, key, hdr->len,
+ protkey, protkeylen, protkeytype);
+ else if (hdr->type == TOKTYPE_NON_CCA
+ && hdr->version == TOKVER_EP11_AES
+ && is_ep11_keyblob(key))
+ rc = ep11_kblob2protkey(card, dom, key, hdr->len,
+ protkey, protkeylen, protkeytype);
+ else if (hdr->type == TOKTYPE_CCA_INTERNAL &&
+ hdr->version == TOKVER_CCA_AES)
+ rc = cca_sec2protkey(card, dom, key, protkey,
+ protkeylen, protkeytype);
+ else if (hdr->type == TOKTYPE_CCA_INTERNAL &&
+ hdr->version == TOKVER_CCA_VLSC)
+ rc = cca_cipher2protkey(card, dom, key, protkey,
+ protkeylen, protkeytype);
+ else if (hdr->type == TOKTYPE_CCA_INTERNAL_PKA)
+ rc = cca_ecc2protkey(card, dom, key, protkey,
+ protkeylen, protkeytype);
+ else
+ return -EINVAL;
+ }
+
+ return rc;
+}
+
/*
* File io functions
*/
@@ -1329,6 +1500,55 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
kfree(apqns);
break;
}
+ case PKEY_KBLOB2PROTK3: {
+ struct pkey_kblob2pkey3 __user *utp = (void __user *) arg;
+ struct pkey_kblob2pkey3 ktp;
+ struct pkey_apqn *apqns = NULL;
+ u32 protkeylen = PROTKEYBLOBBUFSIZE;
+ u8 *kkey, *protkey;
+
+ if (copy_from_user(&ktp, utp, sizeof(ktp)))
+ return -EFAULT;
+ apqns = _copy_apqns_from_user(ktp.apqns, ktp.apqn_entries);
+ if (IS_ERR(apqns))
+ return PTR_ERR(apqns);
+ kkey = _copy_key_from_user(ktp.key, ktp.keylen);
+ if (IS_ERR(kkey)) {
+ kfree(apqns);
+ return PTR_ERR(kkey);
+ }
+ protkey = kmalloc(protkeylen, GFP_KERNEL);
+ if (!protkey) {
+ kfree(apqns);
+ kfree(kkey);
+ return -ENOMEM;
+ }
+ rc = pkey_keyblob2pkey3(apqns, ktp.apqn_entries, kkey,
+ ktp.keylen, &ktp.pkeytype,
+ protkey, &protkeylen);
+ DEBUG_DBG("%s pkey_keyblob2pkey3()=%d\n", __func__, rc);
+ kfree(apqns);
+ kfree(kkey);
+ if (rc) {
+ kfree(protkey);
+ break;
+ }
+ if (ktp.pkey && ktp.pkeylen) {
+ if (protkeylen > ktp.pkeylen) {
+ kfree(protkey);
+ return -EINVAL;
+ }
+ if (copy_to_user(ktp.pkey, protkey, protkeylen)) {
+ kfree(protkey);
+ return -EFAULT;
+ }
+ }
+ kfree(protkey);
+ ktp.pkeylen = protkeylen;
+ if (copy_to_user(utp, &ktp, sizeof(ktp)))
+ return -EFAULT;
+ break;
+ }
default:
/* unknown/unsupported ioctl cmd */
return -ENOTTY;
@@ -1589,7 +1809,7 @@ static ssize_t pkey_ccacipher_aes_attr_read(enum pkey_key_size keybits,
/* build a list of apqns able to generate an cipher key */
rc = cca_findcard2(&apqns, &nr_apqns, 0xFFFF, 0xFFFF,
- ZCRYPT_CEX6, 0, 0, 0);
+ ZCRYPT_CEX6, 0, 0, 0, 0);
if (rc)
return rc;
diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c
index f314936b5462..f60f9fb25214 100644
--- a/drivers/s390/crypto/zcrypt_api.c
+++ b/drivers/s390/crypto/zcrypt_api.c
@@ -25,6 +25,7 @@
#include <linux/debugfs.h>
#include <linux/cdev.h>
#include <linux/ctype.h>
+#include <linux/capability.h>
#include <asm/debug.h>
#define CREATE_TRACE_POINTS
@@ -602,13 +603,13 @@ static inline bool zcrypt_card_compare(struct zcrypt_card *zc,
unsigned int pref_weight)
{
if (!pref_zc)
- return false;
+ return true;
weight += atomic_read(&zc->load);
pref_weight += atomic_read(&pref_zc->load);
if (weight == pref_weight)
- return atomic64_read(&zc->card->total_request_count) >
+ return atomic64_read(&zc->card->total_request_count) <
atomic64_read(&pref_zc->card->total_request_count);
- return weight > pref_weight;
+ return weight < pref_weight;
}
static inline bool zcrypt_queue_compare(struct zcrypt_queue *zq,
@@ -617,30 +618,39 @@ static inline bool zcrypt_queue_compare(struct zcrypt_queue *zq,
unsigned int pref_weight)
{
if (!pref_zq)
- return false;
+ return true;
weight += atomic_read(&zq->load);
pref_weight += atomic_read(&pref_zq->load);
if (weight == pref_weight)
- return zq->queue->total_request_count >
+ return zq->queue->total_request_count <
pref_zq->queue->total_request_count;
- return weight > pref_weight;
+ return weight < pref_weight;
}
/*
* zcrypt ioctls.
*/
static long zcrypt_rsa_modexpo(struct ap_perms *perms,
+ struct zcrypt_track *tr,
struct ica_rsa_modexpo *mex)
{
struct zcrypt_card *zc, *pref_zc;
struct zcrypt_queue *zq, *pref_zq;
- unsigned int weight = 0, pref_weight = 0;
+ struct ap_message ap_msg;
+ unsigned int wgt = 0, pref_wgt = 0;
unsigned int func_code;
- int qid = 0, rc = -ENODEV;
+ int cpen, qpen, qid = 0, rc = -ENODEV;
struct module *mod;
trace_s390_zcrypt_req(mex, TP_ICARSAMODEXPO);
+ ap_init_message(&ap_msg);
+
+#ifdef CONFIG_ZCRYPT_DEBUG
+ if (tr && tr->fi.cmd)
+ ap_msg.fi.cmd = tr->fi.cmd;
+#endif
+
if (mex->outputdatalength < mex->inputdatalength) {
func_code = 0;
rc = -EINVAL;
@@ -662,8 +672,9 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms,
pref_zq = NULL;
spin_lock(&zcrypt_list_lock);
for_each_zcrypt_card(zc) {
- /* Check for online accelarator and CCA cards */
- if (!zc->online || !(zc->card->functions & 0x18000000))
+ /* Check for useable accelarator or CCA card */
+ if (!zc->online || !zc->card->config ||
+ !(zc->card->functions & 0x18000000))
continue;
/* Check for size limits */
if (zc->min_mod_size > mex->inputdatalength ||
@@ -673,26 +684,35 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms,
if (!zcrypt_check_card(perms, zc->card->id))
continue;
/* get weight index of the card device */
- weight = zc->speed_rating[func_code];
- if (zcrypt_card_compare(zc, pref_zc, weight, pref_weight))
+ wgt = zc->speed_rating[func_code];
+ /* penalty if this msg was previously sent via this card */
+ cpen = (tr && tr->again_counter && tr->last_qid &&
+ AP_QID_CARD(tr->last_qid) == zc->card->id) ?
+ TRACK_AGAIN_CARD_WEIGHT_PENALTY : 0;
+ if (!zcrypt_card_compare(zc, pref_zc, wgt + cpen, pref_wgt))
continue;
for_each_zcrypt_queue(zq, zc) {
- /* check if device is online and eligible */
- if (!zq->online || !zq->ops->rsa_modexpo)
+ /* check if device is useable and eligible */
+ if (!zq->online || !zq->ops->rsa_modexpo ||
+ !zq->queue->config)
continue;
/* check if device node has admission for this queue */
if (!zcrypt_check_queue(perms,
AP_QID_QUEUE(zq->queue->qid)))
continue;
- if (zcrypt_queue_compare(zq, pref_zq,
- weight, pref_weight))
+ /* penalty if the msg was previously sent at this qid */
+ qpen = (tr && tr->again_counter && tr->last_qid &&
+ tr->last_qid == zq->queue->qid) ?
+ TRACK_AGAIN_QUEUE_WEIGHT_PENALTY : 0;
+ if (!zcrypt_queue_compare(zq, pref_zq,
+ wgt + cpen + qpen, pref_wgt))
continue;
pref_zc = zc;
pref_zq = zq;
- pref_weight = weight;
+ pref_wgt = wgt + cpen + qpen;
}
}
- pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, weight);
+ pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, wgt);
spin_unlock(&zcrypt_list_lock);
if (!pref_zq) {
@@ -701,30 +721,44 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms,
}
qid = pref_zq->queue->qid;
- rc = pref_zq->ops->rsa_modexpo(pref_zq, mex);
+ rc = pref_zq->ops->rsa_modexpo(pref_zq, mex, &ap_msg);
spin_lock(&zcrypt_list_lock);
- zcrypt_drop_queue(pref_zc, pref_zq, mod, weight);
+ zcrypt_drop_queue(pref_zc, pref_zq, mod, wgt);
spin_unlock(&zcrypt_list_lock);
out:
+ ap_release_message(&ap_msg);
+ if (tr) {
+ tr->last_rc = rc;
+ tr->last_qid = qid;
+ }
trace_s390_zcrypt_rep(mex, func_code, rc,
AP_QID_CARD(qid), AP_QID_QUEUE(qid));
return rc;
}
static long zcrypt_rsa_crt(struct ap_perms *perms,
+ struct zcrypt_track *tr,
struct ica_rsa_modexpo_crt *crt)
{
struct zcrypt_card *zc, *pref_zc;
struct zcrypt_queue *zq, *pref_zq;
- unsigned int weight = 0, pref_weight = 0;
+ struct ap_message ap_msg;
+ unsigned int wgt = 0, pref_wgt = 0;
unsigned int func_code;
- int qid = 0, rc = -ENODEV;
+ int cpen, qpen, qid = 0, rc = -ENODEV;
struct module *mod;
trace_s390_zcrypt_req(crt, TP_ICARSACRT);
+ ap_init_message(&ap_msg);
+
+#ifdef CONFIG_ZCRYPT_DEBUG
+ if (tr && tr->fi.cmd)
+ ap_msg.fi.cmd = tr->fi.cmd;
+#endif
+
if (crt->outputdatalength < crt->inputdatalength) {
func_code = 0;
rc = -EINVAL;
@@ -746,8 +780,9 @@ static long zcrypt_rsa_crt(struct ap_perms *perms,
pref_zq = NULL;
spin_lock(&zcrypt_list_lock);
for_each_zcrypt_card(zc) {
- /* Check for online accelarator and CCA cards */
- if (!zc->online || !(zc->card->functions & 0x18000000))
+ /* Check for useable accelarator or CCA card */
+ if (!zc->online || !zc->card->config ||
+ !(zc->card->functions & 0x18000000))
continue;
/* Check for size limits */
if (zc->min_mod_size > crt->inputdatalength ||
@@ -757,26 +792,35 @@ static long zcrypt_rsa_crt(struct ap_perms *perms,
if (!zcrypt_check_card(perms, zc->card->id))
continue;
/* get weight index of the card device */
- weight = zc->speed_rating[func_code];
- if (zcrypt_card_compare(zc, pref_zc, weight, pref_weight))
+ wgt = zc->speed_rating[func_code];
+ /* penalty if this msg was previously sent via this card */
+ cpen = (tr && tr->again_counter && tr->last_qid &&
+ AP_QID_CARD(tr->last_qid) == zc->card->id) ?
+ TRACK_AGAIN_CARD_WEIGHT_PENALTY : 0;
+ if (!zcrypt_card_compare(zc, pref_zc, wgt + cpen, pref_wgt))
continue;
for_each_zcrypt_queue(zq, zc) {
- /* check if device is online and eligible */
- if (!zq->online || !zq->ops->rsa_modexpo_crt)
+ /* check if device is useable and eligible */
+ if (!zq->online || !zq->ops->rsa_modexpo_crt ||
+ !zq->queue->config)
continue;
/* check if device node has admission for this queue */
if (!zcrypt_check_queue(perms,
AP_QID_QUEUE(zq->queue->qid)))
continue;
- if (zcrypt_queue_compare(zq, pref_zq,
- weight, pref_weight))
+ /* penalty if the msg was previously sent at this qid */
+ qpen = (tr && tr->again_counter && tr->last_qid &&
+ tr->last_qid == zq->queue->qid) ?
+ TRACK_AGAIN_QUEUE_WEIGHT_PENALTY : 0;
+ if (!zcrypt_queue_compare(zq, pref_zq,
+ wgt + cpen + qpen, pref_wgt))
continue;
pref_zc = zc;
pref_zq = zq;
- pref_weight = weight;
+ pref_wgt = wgt + cpen + qpen;
}
}
- pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, weight);
+ pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, wgt);
spin_unlock(&zcrypt_list_lock);
if (!pref_zq) {
@@ -785,35 +829,52 @@ static long zcrypt_rsa_crt(struct ap_perms *perms,
}
qid = pref_zq->queue->qid;
- rc = pref_zq->ops->rsa_modexpo_crt(pref_zq, crt);
+ rc = pref_zq->ops->rsa_modexpo_crt(pref_zq, crt, &ap_msg);
spin_lock(&zcrypt_list_lock);
- zcrypt_drop_queue(pref_zc, pref_zq, mod, weight);
+ zcrypt_drop_queue(pref_zc, pref_zq, mod, wgt);
spin_unlock(&zcrypt_list_lock);
out:
+ ap_release_message(&ap_msg);
+ if (tr) {
+ tr->last_rc = rc;
+ tr->last_qid = qid;
+ }
trace_s390_zcrypt_rep(crt, func_code, rc,
AP_QID_CARD(qid), AP_QID_QUEUE(qid));
return rc;
}
-static long _zcrypt_send_cprb(struct ap_perms *perms,
+static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms,
+ struct zcrypt_track *tr,
struct ica_xcRB *xcRB)
{
struct zcrypt_card *zc, *pref_zc;
struct zcrypt_queue *zq, *pref_zq;
struct ap_message ap_msg;
- unsigned int weight = 0, pref_weight = 0;
+ unsigned int wgt = 0, pref_wgt = 0;
unsigned int func_code;
unsigned short *domain, tdom;
- int qid = 0, rc = -ENODEV;
+ int cpen, qpen, qid = 0, rc = -ENODEV;
struct module *mod;
trace_s390_zcrypt_req(xcRB, TB_ZSECSENDCPRB);
xcRB->status = 0;
ap_init_message(&ap_msg);
- rc = get_cprb_fc(xcRB, &ap_msg, &func_code, &domain);
+
+#ifdef CONFIG_ZCRYPT_DEBUG
+ if (tr && tr->fi.cmd)
+ ap_msg.fi.cmd = tr->fi.cmd;
+ if (tr && tr->fi.action == AP_FI_ACTION_CCA_AGENT_FF) {
+ ZCRYPT_DBF_WARN("%s fi cmd 0x%04x: forcing invalid agent_ID 'FF'\n",
+ __func__, tr->fi.cmd);
+ xcRB->agent_ID = 0x4646;
+ }
+#endif
+
+ rc = get_cprb_fc(userspace, xcRB, &ap_msg, &func_code, &domain);
if (rc)
goto out;
@@ -832,8 +893,9 @@ static long _zcrypt_send_cprb(struct ap_perms *perms,
pref_zq = NULL;
spin_lock(&zcrypt_list_lock);
for_each_zcrypt_card(zc) {
- /* Check for online CCA cards */
- if (!zc->online || !(zc->card->functions & 0x10000000))
+ /* Check for useable CCA card */
+ if (!zc->online || !zc->card->config ||
+ !(zc->card->functions & 0x10000000))
continue;
/* Check for user selected CCA card */
if (xcRB->user_defined != AUTOSELECT &&
@@ -843,13 +905,18 @@ static long _zcrypt_send_cprb(struct ap_perms *perms,
if (!zcrypt_check_card(perms, zc->card->id))
continue;
/* get weight index of the card device */
- weight = speed_idx_cca(func_code) * zc->speed_rating[SECKEY];
- if (zcrypt_card_compare(zc, pref_zc, weight, pref_weight))
+ wgt = speed_idx_cca(func_code) * zc->speed_rating[SECKEY];
+ /* penalty if this msg was previously sent via this card */
+ cpen = (tr && tr->again_counter && tr->last_qid &&
+ AP_QID_CARD(tr->last_qid) == zc->card->id) ?
+ TRACK_AGAIN_CARD_WEIGHT_PENALTY : 0;
+ if (!zcrypt_card_compare(zc, pref_zc, wgt + cpen, pref_wgt))
continue;
for_each_zcrypt_queue(zq, zc) {
- /* check if device is online and eligible */
+ /* check for device useable and eligible */
if (!zq->online ||
!zq->ops->send_cprb ||
+ !zq->queue->config ||
(tdom != AUTOSEL_DOM &&
tdom != AP_QID_QUEUE(zq->queue->qid)))
continue;
@@ -857,15 +924,19 @@ static long _zcrypt_send_cprb(struct ap_perms *perms,
if (!zcrypt_check_queue(perms,
AP_QID_QUEUE(zq->queue->qid)))
continue;
- if (zcrypt_queue_compare(zq, pref_zq,
- weight, pref_weight))
+ /* penalty if the msg was previously sent at this qid */
+ qpen = (tr && tr->again_counter && tr->last_qid &&
+ tr->last_qid == zq->queue->qid) ?
+ TRACK_AGAIN_QUEUE_WEIGHT_PENALTY : 0;
+ if (!zcrypt_queue_compare(zq, pref_zq,
+ wgt + cpen + qpen, pref_wgt))
continue;
pref_zc = zc;
pref_zq = zq;
- pref_weight = weight;
+ pref_wgt = wgt + cpen + qpen;
}
}
- pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, weight);
+ pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, wgt);
spin_unlock(&zcrypt_list_lock);
if (!pref_zq) {
@@ -878,14 +949,26 @@ static long _zcrypt_send_cprb(struct ap_perms *perms,
if (*domain == AUTOSEL_DOM)
*domain = AP_QID_QUEUE(qid);
- rc = pref_zq->ops->send_cprb(pref_zq, xcRB, &ap_msg);
+#ifdef CONFIG_ZCRYPT_DEBUG
+ if (tr && tr->fi.action == AP_FI_ACTION_CCA_DOM_INVAL) {
+ ZCRYPT_DBF_WARN("%s fi cmd 0x%04x: forcing invalid domain\n",
+ __func__, tr->fi.cmd);
+ *domain = 99;
+ }
+#endif
+
+ rc = pref_zq->ops->send_cprb(userspace, pref_zq, xcRB, &ap_msg);
spin_lock(&zcrypt_list_lock);
- zcrypt_drop_queue(pref_zc, pref_zq, mod, weight);
+ zcrypt_drop_queue(pref_zc, pref_zq, mod, wgt);
spin_unlock(&zcrypt_list_lock);
out:
ap_release_message(&ap_msg);
+ if (tr) {
+ tr->last_rc = rc;
+ tr->last_qid = qid;
+ }
trace_s390_zcrypt_rep(xcRB, func_code, rc,
AP_QID_CARD(qid), AP_QID_QUEUE(qid));
return rc;
@@ -893,7 +976,7 @@ out:
long zcrypt_send_cprb(struct ica_xcRB *xcRB)
{
- return _zcrypt_send_cprb(&ap_perms, xcRB);
+ return _zcrypt_send_cprb(false, &ap_perms, NULL, xcRB);
}
EXPORT_SYMBOL(zcrypt_send_cprb);
@@ -924,23 +1007,29 @@ static bool is_desired_ep11_queue(unsigned int dev_qid,
return false;
}
-static long _zcrypt_send_ep11_cprb(struct ap_perms *perms,
+static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms,
+ struct zcrypt_track *tr,
struct ep11_urb *xcrb)
{
struct zcrypt_card *zc, *pref_zc;
struct zcrypt_queue *zq, *pref_zq;
struct ep11_target_dev *targets;
unsigned short target_num;
- unsigned int weight = 0, pref_weight = 0;
+ unsigned int wgt = 0, pref_wgt = 0;
unsigned int func_code;
struct ap_message ap_msg;
- int qid = 0, rc = -ENODEV;
+ int cpen, qpen, qid = 0, rc = -ENODEV;
struct module *mod;
trace_s390_zcrypt_req(xcrb, TP_ZSENDEP11CPRB);
ap_init_message(&ap_msg);
+#ifdef CONFIG_ZCRYPT_DEBUG
+ if (tr && tr->fi.cmd)
+ ap_msg.fi.cmd = tr->fi.cmd;
+#endif
+
target_num = (unsigned short) xcrb->targets_num;
/* empty list indicates autoselect (all available targets) */
@@ -956,7 +1045,7 @@ static long _zcrypt_send_ep11_cprb(struct ap_perms *perms,
}
uptr = (struct ep11_target_dev __force __user *) xcrb->targets;
- if (copy_from_user(targets, uptr,
+ if (z_copy_from_user(userspace, targets, uptr,
target_num * sizeof(*targets))) {
func_code = 0;
rc = -EFAULT;
@@ -964,7 +1053,7 @@ static long _zcrypt_send_ep11_cprb(struct ap_perms *perms,
}
}
- rc = get_ep11cprb_fc(xcrb, &ap_msg, &func_code);
+ rc = get_ep11cprb_fc(userspace, xcrb, &ap_msg, &func_code);
if (rc)
goto out_free;
@@ -972,8 +1061,9 @@ static long _zcrypt_send_ep11_cprb(struct ap_perms *perms,
pref_zq = NULL;
spin_lock(&zcrypt_list_lock);
for_each_zcrypt_card(zc) {
- /* Check for online EP11 cards */
- if (!zc->online || !(zc->card->functions & 0x04000000))
+ /* Check for useable EP11 card */
+ if (!zc->online || !zc->card->config ||
+ !(zc->card->functions & 0x04000000))
continue;
/* Check for user selected EP11 card */
if (targets &&
@@ -983,13 +1073,18 @@ static long _zcrypt_send_ep11_cprb(struct ap_perms *perms,
if (!zcrypt_check_card(perms, zc->card->id))
continue;
/* get weight index of the card device */
- weight = speed_idx_ep11(func_code) * zc->speed_rating[SECKEY];
- if (zcrypt_card_compare(zc, pref_zc, weight, pref_weight))
+ wgt = speed_idx_ep11(func_code) * zc->speed_rating[SECKEY];
+ /* penalty if this msg was previously sent via this card */
+ cpen = (tr && tr->again_counter && tr->last_qid &&
+ AP_QID_CARD(tr->last_qid) == zc->card->id) ?
+ TRACK_AGAIN_CARD_WEIGHT_PENALTY : 0;
+ if (!zcrypt_card_compare(zc, pref_zc, wgt + cpen, pref_wgt))
continue;
for_each_zcrypt_queue(zq, zc) {
- /* check if device is online and eligible */
+ /* check if device is useable and eligible */
if (!zq->online ||
!zq->ops->send_ep11_cprb ||
+ !zq->queue->config ||
(targets &&
!is_desired_ep11_queue(zq->queue->qid,
target_num, targets)))
@@ -998,15 +1093,19 @@ static long _zcrypt_send_ep11_cprb(struct ap_perms *perms,
if (!zcrypt_check_queue(perms,
AP_QID_QUEUE(zq->queue->qid)))
continue;
- if (zcrypt_queue_compare(zq, pref_zq,
- weight, pref_weight))
+ /* penalty if the msg was previously sent at this qid */
+ qpen = (tr && tr->again_counter && tr->last_qid &&
+ tr->last_qid == zq->queue->qid) ?
+ TRACK_AGAIN_QUEUE_WEIGHT_PENALTY : 0;
+ if (!zcrypt_queue_compare(zq, pref_zq,
+ wgt + cpen + qpen, pref_wgt))
continue;
pref_zc = zc;
pref_zq = zq;
- pref_weight = weight;
+ pref_wgt = wgt + cpen + qpen;
}
}
- pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, weight);
+ pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, wgt);
spin_unlock(&zcrypt_list_lock);
if (!pref_zq) {
@@ -1015,16 +1114,20 @@ static long _zcrypt_send_ep11_cprb(struct ap_perms *perms,
}
qid = pref_zq->queue->qid;
- rc = pref_zq->ops->send_ep11_cprb(pref_zq, xcrb, &ap_msg);
+ rc = pref_zq->ops->send_ep11_cprb(userspace, pref_zq, xcrb, &ap_msg);
spin_lock(&zcrypt_list_lock);
- zcrypt_drop_queue(pref_zc, pref_zq, mod, weight);
+ zcrypt_drop_queue(pref_zc, pref_zq, mod, wgt);
spin_unlock(&zcrypt_list_lock);
out_free:
kfree(targets);
out:
ap_release_message(&ap_msg);
+ if (tr) {
+ tr->last_rc = rc;
+ tr->last_qid = qid;
+ }
trace_s390_zcrypt_rep(xcrb, func_code, rc,
AP_QID_CARD(qid), AP_QID_QUEUE(qid));
return rc;
@@ -1032,7 +1135,7 @@ out:
long zcrypt_send_ep11_cprb(struct ep11_urb *xcrb)
{
- return _zcrypt_send_ep11_cprb(&ap_perms, xcrb);
+ return _zcrypt_send_ep11_cprb(false, &ap_perms, NULL, xcrb);
}
EXPORT_SYMBOL(zcrypt_send_ep11_cprb);
@@ -1040,7 +1143,7 @@ static long zcrypt_rng(char *buffer)
{
struct zcrypt_card *zc, *pref_zc;
struct zcrypt_queue *zq, *pref_zq;
- unsigned int weight = 0, pref_weight = 0;
+ unsigned int wgt = 0, pref_wgt = 0;
unsigned int func_code;
struct ap_message ap_msg;
unsigned int domain;
@@ -1058,26 +1161,27 @@ static long zcrypt_rng(char *buffer)
pref_zq = NULL;
spin_lock(&zcrypt_list_lock);
for_each_zcrypt_card(zc) {
- /* Check for online CCA cards */
- if (!zc->online || !(zc->card->functions & 0x10000000))
+ /* Check for useable CCA card */
+ if (!zc->online || !zc->card->config ||
+ !(zc->card->functions & 0x10000000))
continue;
/* get weight index of the card device */
- weight = zc->speed_rating[func_code];
- if (zcrypt_card_compare(zc, pref_zc, weight, pref_weight))
+ wgt = zc->speed_rating[func_code];
+ if (!zcrypt_card_compare(zc, pref_zc, wgt, pref_wgt))
continue;
for_each_zcrypt_queue(zq, zc) {
- /* check if device is online and eligible */
- if (!zq->online || !zq->ops->rng)
+ /* check if device is useable and eligible */
+ if (!zq->online || !zq->ops->rng ||
+ !zq->queue->config)
continue;
- if (zcrypt_queue_compare(zq, pref_zq,
- weight, pref_weight))
+ if (!zcrypt_queue_compare(zq, pref_zq, wgt, pref_wgt))
continue;
pref_zc = zc;
pref_zq = zq;
- pref_weight = weight;
+ pref_wgt = wgt;
}
}
- pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, weight);
+ pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, wgt);
spin_unlock(&zcrypt_list_lock);
if (!pref_zq) {
@@ -1089,7 +1193,7 @@ static long zcrypt_rng(char *buffer)
rc = pref_zq->ops->rng(pref_zq, buffer, &ap_msg);
spin_lock(&zcrypt_list_lock);
- zcrypt_drop_queue(pref_zc, pref_zq, mod, weight);
+ zcrypt_drop_queue(pref_zc, pref_zq, mod, wgt);
spin_unlock(&zcrypt_list_lock);
out:
@@ -1301,19 +1405,39 @@ static int zcrypt_requestq_count(void)
static int icarsamodexpo_ioctl(struct ap_perms *perms, unsigned long arg)
{
int rc;
+ struct zcrypt_track tr;
struct ica_rsa_modexpo mex;
struct ica_rsa_modexpo __user *umex = (void __user *) arg;
+ memset(&tr, 0, sizeof(tr));
if (copy_from_user(&mex, umex, sizeof(mex)))
return -EFAULT;
+
+#ifdef CONFIG_ZCRYPT_DEBUG
+ if (mex.inputdatalength & (1U << 31)) {
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ tr.fi.cmd = (u16)(mex.inputdatalength >> 16);
+ }
+ mex.inputdatalength &= 0x0000FFFF;
+#endif
+
do {
- rc = zcrypt_rsa_modexpo(perms, &mex);
- } while (rc == -EAGAIN);
+ rc = zcrypt_rsa_modexpo(perms, &tr, &mex);
+ if (rc == -EAGAIN)
+ tr.again_counter++;
+#ifdef CONFIG_ZCRYPT_DEBUG
+ if (rc == -EAGAIN && (tr.fi.flags & AP_FI_FLAG_NO_RETRY))
+ break;
+#endif
+ } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX);
/* on failure: retry once again after a requested rescan */
if ((rc == -ENODEV) && (zcrypt_process_rescan()))
do {
- rc = zcrypt_rsa_modexpo(perms, &mex);
- } while (rc == -EAGAIN);
+ rc = zcrypt_rsa_modexpo(perms, &tr, &mex);
+ if (rc == -EAGAIN)
+ tr.again_counter++;
+ } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX);
if (rc) {
ZCRYPT_DBF(DBF_DEBUG, "ioctl ICARSAMODEXPO rc=%d\n", rc);
return rc;
@@ -1324,19 +1448,39 @@ static int icarsamodexpo_ioctl(struct ap_perms *perms, unsigned long arg)
static int icarsacrt_ioctl(struct ap_perms *perms, unsigned long arg)
{
int rc;
+ struct zcrypt_track tr;
struct ica_rsa_modexpo_crt crt;
struct ica_rsa_modexpo_crt __user *ucrt = (void __user *) arg;
+ memset(&tr, 0, sizeof(tr));
if (copy_from_user(&crt, ucrt, sizeof(crt)))
return -EFAULT;
+
+#ifdef CONFIG_ZCRYPT_DEBUG
+ if (crt.inputdatalength & (1U << 31)) {
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ tr.fi.cmd = (u16)(crt.inputdatalength >> 16);
+ }
+ crt.inputdatalength &= 0x0000FFFF;
+#endif
+
do {
- rc = zcrypt_rsa_crt(perms, &crt);
- } while (rc == -EAGAIN);
+ rc = zcrypt_rsa_crt(perms, &tr, &crt);
+ if (rc == -EAGAIN)
+ tr.again_counter++;
+#ifdef CONFIG_ZCRYPT_DEBUG
+ if (rc == -EAGAIN && (tr.fi.flags & AP_FI_FLAG_NO_RETRY))
+ break;
+#endif
+ } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX);
/* on failure: retry once again after a requested rescan */
if ((rc == -ENODEV) && (zcrypt_process_rescan()))
do {
- rc = zcrypt_rsa_crt(perms, &crt);
- } while (rc == -EAGAIN);
+ rc = zcrypt_rsa_crt(perms, &tr, &crt);
+ if (rc == -EAGAIN)
+ tr.again_counter++;
+ } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX);
if (rc) {
ZCRYPT_DBF(DBF_DEBUG, "ioctl ICARSACRT rc=%d\n", rc);
return rc;
@@ -1348,18 +1492,38 @@ static int zsecsendcprb_ioctl(struct ap_perms *perms, unsigned long arg)
{
int rc;
struct ica_xcRB xcRB;
+ struct zcrypt_track tr;
struct ica_xcRB __user *uxcRB = (void __user *) arg;
+ memset(&tr, 0, sizeof(tr));
if (copy_from_user(&xcRB, uxcRB, sizeof(xcRB)))
return -EFAULT;
+
+#ifdef CONFIG_ZCRYPT_DEBUG
+ if (xcRB.status & (1U << 31)) {
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ tr.fi.cmd = (u16)(xcRB.status >> 16);
+ }
+ xcRB.status &= 0x0000FFFF;
+#endif
+
do {
- rc = _zcrypt_send_cprb(perms, &xcRB);
- } while (rc == -EAGAIN);
+ rc = _zcrypt_send_cprb(true, perms, &tr, &xcRB);
+ if (rc == -EAGAIN)
+ tr.again_counter++;
+#ifdef CONFIG_ZCRYPT_DEBUG
+ if (rc == -EAGAIN && (tr.fi.flags & AP_FI_FLAG_NO_RETRY))
+ break;
+#endif
+ } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX);
/* on failure: retry once again after a requested rescan */
if ((rc == -ENODEV) && (zcrypt_process_rescan()))
do {
- rc = _zcrypt_send_cprb(perms, &xcRB);
- } while (rc == -EAGAIN);
+ rc = _zcrypt_send_cprb(true, perms, &tr, &xcRB);
+ if (rc == -EAGAIN)
+ tr.again_counter++;
+ } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX);
if (rc)
ZCRYPT_DBF(DBF_DEBUG, "ioctl ZSENDCPRB rc=%d status=0x%x\n",
rc, xcRB.status);
@@ -1372,18 +1536,38 @@ static int zsendep11cprb_ioctl(struct ap_perms *perms, unsigned long arg)
{
int rc;
struct ep11_urb xcrb;
+ struct zcrypt_track tr;
struct ep11_urb __user *uxcrb = (void __user *)arg;
+ memset(&tr, 0, sizeof(tr));
if (copy_from_user(&xcrb, uxcrb, sizeof(xcrb)))
return -EFAULT;
+
+#ifdef CONFIG_ZCRYPT_DEBUG
+ if (xcrb.req_len & (1ULL << 63)) {
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ tr.fi.cmd = (u16)(xcrb.req_len >> 48);
+ }
+ xcrb.req_len &= 0x0000FFFFFFFFFFFFULL;
+#endif
+
do {
- rc = _zcrypt_send_ep11_cprb(perms, &xcrb);
- } while (rc == -EAGAIN);
+ rc = _zcrypt_send_ep11_cprb(true, perms, &tr, &xcrb);
+ if (rc == -EAGAIN)
+ tr.again_counter++;
+#ifdef CONFIG_ZCRYPT_DEBUG
+ if (rc == -EAGAIN && (tr.fi.flags & AP_FI_FLAG_NO_RETRY))
+ break;
+#endif
+ } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX);
/* on failure: retry once again after a requested rescan */
if ((rc == -ENODEV) && (zcrypt_process_rescan()))
do {
- rc = _zcrypt_send_ep11_cprb(perms, &xcrb);
- } while (rc == -EAGAIN);
+ rc = _zcrypt_send_ep11_cprb(true, perms, &tr, &xcrb);
+ if (rc == -EAGAIN)
+ tr.again_counter++;
+ } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX);
if (rc)
ZCRYPT_DBF(DBF_DEBUG, "ioctl ZSENDEP11CPRB rc=%d\n", rc);
if (copy_to_user(uxcrb, &xcrb, sizeof(xcrb)))
@@ -1536,8 +1720,10 @@ static long trans_modexpo32(struct ap_perms *perms, struct file *filp,
struct compat_ica_rsa_modexpo __user *umex32 = compat_ptr(arg);
struct compat_ica_rsa_modexpo mex32;
struct ica_rsa_modexpo mex64;
+ struct zcrypt_track tr;
long rc;
+ memset(&tr, 0, sizeof(tr));
if (copy_from_user(&mex32, umex32, sizeof(mex32)))
return -EFAULT;
mex64.inputdata = compat_ptr(mex32.inputdata);
@@ -1547,13 +1733,17 @@ static long trans_modexpo32(struct ap_perms *perms, struct file *filp,
mex64.b_key = compat_ptr(mex32.b_key);
mex64.n_modulus = compat_ptr(mex32.n_modulus);
do {
- rc = zcrypt_rsa_modexpo(perms, &mex64);
- } while (rc == -EAGAIN);
+ rc = zcrypt_rsa_modexpo(perms, &tr, &mex64);
+ if (rc == -EAGAIN)
+ tr.again_counter++;
+ } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX);
/* on failure: retry once again after a requested rescan */
if ((rc == -ENODEV) && (zcrypt_process_rescan()))
do {
- rc = zcrypt_rsa_modexpo(perms, &mex64);
- } while (rc == -EAGAIN);
+ rc = zcrypt_rsa_modexpo(perms, &tr, &mex64);
+ if (rc == -EAGAIN)
+ tr.again_counter++;
+ } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX);
if (rc)
return rc;
return put_user(mex64.outputdatalength,
@@ -1578,8 +1768,10 @@ static long trans_modexpo_crt32(struct ap_perms *perms, struct file *filp,
struct compat_ica_rsa_modexpo_crt __user *ucrt32 = compat_ptr(arg);
struct compat_ica_rsa_modexpo_crt crt32;
struct ica_rsa_modexpo_crt crt64;
+ struct zcrypt_track tr;
long rc;
+ memset(&tr, 0, sizeof(tr));
if (copy_from_user(&crt32, ucrt32, sizeof(crt32)))
return -EFAULT;
crt64.inputdata = compat_ptr(crt32.inputdata);
@@ -1592,13 +1784,17 @@ static long trans_modexpo_crt32(struct ap_perms *perms, struct file *filp,
crt64.nq_prime = compat_ptr(crt32.nq_prime);
crt64.u_mult_inv = compat_ptr(crt32.u_mult_inv);
do {
- rc = zcrypt_rsa_crt(perms, &crt64);
- } while (rc == -EAGAIN);
+ rc = zcrypt_rsa_crt(perms, &tr, &crt64);
+ if (rc == -EAGAIN)
+ tr.again_counter++;
+ } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX);
/* on failure: retry once again after a requested rescan */
if ((rc == -ENODEV) && (zcrypt_process_rescan()))
do {
- rc = zcrypt_rsa_crt(perms, &crt64);
- } while (rc == -EAGAIN);
+ rc = zcrypt_rsa_crt(perms, &tr, &crt64);
+ if (rc == -EAGAIN)
+ tr.again_counter++;
+ } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX);
if (rc)
return rc;
return put_user(crt64.outputdatalength,
@@ -1630,9 +1826,11 @@ static long trans_xcRB32(struct ap_perms *perms, struct file *filp,
{
struct compat_ica_xcRB __user *uxcRB32 = compat_ptr(arg);
struct compat_ica_xcRB xcRB32;
+ struct zcrypt_track tr;
struct ica_xcRB xcRB64;
long rc;
+ memset(&tr, 0, sizeof(tr));
if (copy_from_user(&xcRB32, uxcRB32, sizeof(xcRB32)))
return -EFAULT;
xcRB64.agent_ID = xcRB32.agent_ID;
@@ -1656,13 +1854,17 @@ static long trans_xcRB32(struct ap_perms *perms, struct file *filp,
xcRB64.priority_window = xcRB32.priority_window;
xcRB64.status = xcRB32.status;
do {
- rc = _zcrypt_send_cprb(perms, &xcRB64);
- } while (rc == -EAGAIN);
+ rc = _zcrypt_send_cprb(true, perms, &tr, &xcRB64);
+ if (rc == -EAGAIN)
+ tr.again_counter++;
+ } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX);
/* on failure: retry once again after a requested rescan */
if ((rc == -ENODEV) && (zcrypt_process_rescan()))
do {
- rc = _zcrypt_send_cprb(perms, &xcRB64);
- } while (rc == -EAGAIN);
+ rc = _zcrypt_send_cprb(true, perms, &tr, &xcRB64);
+ if (rc == -EAGAIN)
+ tr.again_counter++;
+ } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX);
xcRB32.reply_control_blk_length = xcRB64.reply_control_blk_length;
xcRB32.reply_data_length = xcRB64.reply_data_length;
xcRB32.status = xcRB64.status;
diff --git a/drivers/s390/crypto/zcrypt_api.h b/drivers/s390/crypto/zcrypt_api.h
index 599e68bf53f7..51c0b8bdef50 100644
--- a/drivers/s390/crypto/zcrypt_api.h
+++ b/drivers/s390/crypto/zcrypt_api.h
@@ -55,13 +55,30 @@ enum crypto_ops {
struct zcrypt_queue;
+/* struct to hold tracking information for a userspace request/response */
+struct zcrypt_track {
+ int again_counter; /* retry attempts counter */
+ int last_qid; /* last qid used */
+ int last_rc; /* last return code */
+#ifdef CONFIG_ZCRYPT_DEBUG
+ struct ap_fi fi; /* failure injection cmd */
+#endif
+};
+
+/* defines related to message tracking */
+#define TRACK_AGAIN_MAX 10
+#define TRACK_AGAIN_CARD_WEIGHT_PENALTY 1000
+#define TRACK_AGAIN_QUEUE_WEIGHT_PENALTY 10000
+
struct zcrypt_ops {
- long (*rsa_modexpo)(struct zcrypt_queue *, struct ica_rsa_modexpo *);
+ long (*rsa_modexpo)(struct zcrypt_queue *, struct ica_rsa_modexpo *,
+ struct ap_message *);
long (*rsa_modexpo_crt)(struct zcrypt_queue *,
- struct ica_rsa_modexpo_crt *);
- long (*send_cprb)(struct zcrypt_queue *, struct ica_xcRB *,
+ struct ica_rsa_modexpo_crt *,
+ struct ap_message *);
+ long (*send_cprb)(bool userspace, struct zcrypt_queue *, struct ica_xcRB *,
struct ap_message *);
- long (*send_ep11_cprb)(struct zcrypt_queue *, struct ep11_urb *,
+ long (*send_ep11_cprb)(bool userspace, struct zcrypt_queue *, struct ep11_urb *,
struct ap_message *);
long (*rng)(struct zcrypt_queue *, char *, struct ap_message *);
struct list_head list; /* zcrypt ops list. */
@@ -82,7 +99,7 @@ struct zcrypt_card {
int min_mod_size; /* Min number of bits. */
int max_mod_size; /* Max number of bits. */
int max_exp_bit_length;
- int speed_rating[NUM_OPS]; /* Speed idx of crypto ops. */
+ const int *speed_rating; /* Speed idx of crypto ops. */
atomic_t load; /* Utilization of the crypto device */
int request_count; /* # current requests. */
@@ -145,4 +162,26 @@ void zcrypt_device_status_mask_ext(struct zcrypt_device_status_ext *devstatus);
int zcrypt_device_status_ext(int card, int queue,
struct zcrypt_device_status_ext *devstatus);
+static inline unsigned long z_copy_from_user(bool userspace,
+ void *to,
+ const void __user *from,
+ unsigned long n)
+{
+ if (likely(userspace))
+ return copy_from_user(to, from, n);
+ memcpy(to, (void __force *) from, n);
+ return 0;
+}
+
+static inline unsigned long z_copy_to_user(bool userspace,
+ void __user *to,
+ const void *from,
+ unsigned long n)
+{
+ if (likely(userspace))
+ return copy_to_user(to, from, n);
+ memcpy((void __force *) to, from, n);
+ return 0;
+}
+
#endif /* _ZCRYPT_API_H_ */
diff --git a/drivers/s390/crypto/zcrypt_card.c b/drivers/s390/crypto/zcrypt_card.c
index c53cab4b0c9e..e342eb86acd1 100644
--- a/drivers/s390/crypto/zcrypt_card.c
+++ b/drivers/s390/crypto/zcrypt_card.c
@@ -50,22 +50,28 @@ static ssize_t online_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- struct zcrypt_card *zc = to_ap_card(dev)->private;
+ struct ap_card *ac = to_ap_card(dev);
+ struct zcrypt_card *zc = ac->private;
+ int online = ac->config && zc->online ? 1 : 0;
- return scnprintf(buf, PAGE_SIZE, "%d\n", zc->online);
+ return scnprintf(buf, PAGE_SIZE, "%d\n", online);
}
static ssize_t online_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
- struct zcrypt_card *zc = to_ap_card(dev)->private;
+ struct ap_card *ac = to_ap_card(dev);
+ struct zcrypt_card *zc = ac->private;
struct zcrypt_queue *zq;
int online, id;
if (sscanf(buf, "%d\n", &online) != 1 || online < 0 || online > 1)
return -EINVAL;
+ if (online && !ac->config)
+ return -ENODEV;
+
zc->online = online;
id = zc->card->id;
diff --git a/drivers/s390/crypto/zcrypt_ccamisc.c b/drivers/s390/crypto/zcrypt_ccamisc.c
index c793dcabd551..b1046811450f 100644
--- a/drivers/s390/crypto/zcrypt_ccamisc.c
+++ b/drivers/s390/crypto/zcrypt_ccamisc.c
@@ -173,6 +173,49 @@ int cca_check_secaescipherkey(debug_info_t *dbg, int dbflvl,
EXPORT_SYMBOL(cca_check_secaescipherkey);
/*
+ * Simple check if the token is a valid CCA secure ECC private
+ * key token. Returns 0 on success or errno value on failure.
+ */
+int cca_check_sececckeytoken(debug_info_t *dbg, int dbflvl,
+ const u8 *token, size_t keysize,
+ int checkcpacfexport)
+{
+ struct eccprivkeytoken *t = (struct eccprivkeytoken *) token;
+
+#define DBF(...) debug_sprintf_event(dbg, dbflvl, ##__VA_ARGS__)
+
+ if (t->type != TOKTYPE_CCA_INTERNAL_PKA) {
+ if (dbg)
+ DBF("%s token check failed, type 0x%02x != 0x%02x\n",
+ __func__, (int) t->type, TOKTYPE_CCA_INTERNAL_PKA);
+ return -EINVAL;
+ }
+ if (t->len > keysize) {
+ if (dbg)
+ DBF("%s token check failed, len %d > keysize %zu\n",
+ __func__, (int) t->len, keysize);
+ return -EINVAL;
+ }
+ if (t->secid != 0x20) {
+ if (dbg)
+ DBF("%s token check failed, secid 0x%02x != 0x20\n",
+ __func__, (int) t->secid);
+ return -EINVAL;
+ }
+ if (checkcpacfexport && !(t->kutc & 0x01)) {
+ if (dbg)
+ DBF("%s token check failed, XPRTCPAC bit is 0\n",
+ __func__);
+ return -EINVAL;
+ }
+
+#undef DBF
+
+ return 0;
+}
+EXPORT_SYMBOL(cca_check_sececckeytoken);
+
+/*
* Allocate consecutive memory for request CPRB, request param
* block, reply CPRB and reply param block and fill in values
* for the common fields. Returns 0 on success or errno value
@@ -249,24 +292,6 @@ static inline void prep_xcrb(struct ica_xcRB *pxcrb,
}
/*
- * Helper function which calls zcrypt_send_cprb with
- * memory management segment adjusted to kernel space
- * so that the copy_from_user called within this
- * function do in fact copy from kernel space.
- */
-static inline int _zcrypt_send_cprb(struct ica_xcRB *xcrb)
-{
- int rc;
- mm_segment_t old_fs = get_fs();
-
- set_fs(KERNEL_DS);
- rc = zcrypt_send_cprb(xcrb);
- set_fs(old_fs);
-
- return rc;
-}
-
-/*
* Generate (random) CCA AES DATA secure key.
*/
int cca_genseckey(u16 cardnr, u16 domain,
@@ -359,7 +384,7 @@ int cca_genseckey(u16 cardnr, u16 domain,
prep_xcrb(&xcrb, cardnr, preqcblk, prepcblk);
/* forward xcrb with request CPRB and reply CPRB to zcrypt dd */
- rc = _zcrypt_send_cprb(&xcrb);
+ rc = zcrypt_send_cprb(&xcrb);
if (rc) {
DEBUG_ERR("%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, errno %d\n",
__func__, (int) cardnr, (int) domain, rc);
@@ -497,7 +522,7 @@ int cca_clr2seckey(u16 cardnr, u16 domain, u32 keybitsize,
prep_xcrb(&xcrb, cardnr, preqcblk, prepcblk);
/* forward xcrb with request CPRB and reply CPRB to zcrypt dd */
- rc = _zcrypt_send_cprb(&xcrb);
+ rc = zcrypt_send_cprb(&xcrb);
if (rc) {
DEBUG_ERR("%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n",
__func__, (int) cardnr, (int) domain, rc);
@@ -624,7 +649,7 @@ int cca_sec2protkey(u16 cardnr, u16 domain,
prep_xcrb(&xcrb, cardnr, preqcblk, prepcblk);
/* forward xcrb with request CPRB and reply CPRB to zcrypt dd */
- rc = _zcrypt_send_cprb(&xcrb);
+ rc = zcrypt_send_cprb(&xcrb);
if (rc) {
DEBUG_ERR("%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n",
__func__, (int) cardnr, (int) domain, rc);
@@ -850,7 +875,7 @@ int cca_gencipherkey(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags,
prep_xcrb(&xcrb, cardnr, preqcblk, prepcblk);
/* forward xcrb with request CPRB and reply CPRB to zcrypt dd */
- rc = _zcrypt_send_cprb(&xcrb);
+ rc = zcrypt_send_cprb(&xcrb);
if (rc) {
DEBUG_ERR(
"%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n",
@@ -1018,7 +1043,7 @@ static int _ip_cprb_helper(u16 cardnr, u16 domain,
prep_xcrb(&xcrb, cardnr, preqcblk, prepcblk);
/* forward xcrb with request CPRB and reply CPRB to zcrypt dd */
- rc = _zcrypt_send_cprb(&xcrb);
+ rc = zcrypt_send_cprb(&xcrb);
if (rc) {
DEBUG_ERR(
"%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n",
@@ -1235,7 +1260,7 @@ int cca_cipher2protkey(u16 cardnr, u16 domain, const u8 *ckey,
prep_xcrb(&xcrb, cardnr, preqcblk, prepcblk);
/* forward xcrb with request CPRB and reply CPRB to zcrypt dd */
- rc = _zcrypt_send_cprb(&xcrb);
+ rc = zcrypt_send_cprb(&xcrb);
if (rc) {
DEBUG_ERR(
"%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n",
@@ -1316,6 +1341,156 @@ out:
EXPORT_SYMBOL(cca_cipher2protkey);
/*
+ * Derive protected key from CCA ECC secure private key.
+ */
+int cca_ecc2protkey(u16 cardnr, u16 domain, const u8 *key,
+ u8 *protkey, u32 *protkeylen, u32 *protkeytype)
+{
+ int rc;
+ u8 *mem, *ptr;
+ struct CPRBX *preqcblk, *prepcblk;
+ struct ica_xcRB xcrb;
+ struct aureqparm {
+ u8 subfunc_code[2];
+ u16 rule_array_len;
+ u8 rule_array[8];
+ struct {
+ u16 len;
+ u16 tk_blob_len;
+ u16 tk_blob_tag;
+ u8 tk_blob[66];
+ } vud;
+ struct {
+ u16 len;
+ u16 cca_key_token_len;
+ u16 cca_key_token_flags;
+ u8 cca_key_token[0];
+ } kb;
+ } __packed * preqparm;
+ struct aurepparm {
+ u8 subfunc_code[2];
+ u16 rule_array_len;
+ struct {
+ u16 len;
+ u16 sublen;
+ u16 tag;
+ struct cpacfkeyblock {
+ u8 version; /* version of this struct */
+ u8 flags[2];
+ u8 algo;
+ u8 form;
+ u8 pad1[3];
+ u16 keylen;
+ u8 key[0]; /* the key (keylen bytes) */
+ u16 keyattrlen;
+ u8 keyattr[32];
+ u8 pad2[1];
+ u8 vptype;
+ u8 vp[32]; /* verification pattern */
+ } ckb;
+ } vud;
+ struct {
+ u16 len;
+ } kb;
+ } __packed * prepparm;
+ int keylen = ((struct eccprivkeytoken *)key)->len;
+
+ /* get already prepared memory for 2 cprbs with param block each */
+ rc = alloc_and_prep_cprbmem(PARMBSIZE, &mem, &preqcblk, &prepcblk);
+ if (rc)
+ return rc;
+
+ /* fill request cprb struct */
+ preqcblk->domain = domain;
+
+ /* fill request cprb param block with AU request */
+ preqparm = (struct aureqparm __force *) preqcblk->req_parmb;
+ memcpy(preqparm->subfunc_code, "AU", 2);
+ preqparm->rule_array_len =
+ sizeof(preqparm->rule_array_len)
+ + sizeof(preqparm->rule_array);
+ memcpy(preqparm->rule_array, "EXPT-SK ", 8);
+ /* vud, tk blob */
+ preqparm->vud.len = sizeof(preqparm->vud);
+ preqparm->vud.tk_blob_len = sizeof(preqparm->vud.tk_blob)
+ + 2 * sizeof(uint16_t);
+ preqparm->vud.tk_blob_tag = 0x00C2;
+ /* kb, cca token */
+ preqparm->kb.len = keylen + 3 * sizeof(uint16_t);
+ preqparm->kb.cca_key_token_len = keylen + 2 * sizeof(uint16_t);
+ memcpy(preqparm->kb.cca_key_token, key, keylen);
+ /* now fill length of param block into cprb */
+ preqcblk->req_parml = sizeof(struct aureqparm) + keylen;
+
+ /* fill xcrb struct */
+ prep_xcrb(&xcrb, cardnr, preqcblk, prepcblk);
+
+ /* forward xcrb with request CPRB and reply CPRB to zcrypt dd */
+ rc = zcrypt_send_cprb(&xcrb);
+ if (rc) {
+ DEBUG_ERR(
+ "%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n",
+ __func__, (int) cardnr, (int) domain, rc);
+ goto out;
+ }
+
+ /* check response returncode and reasoncode */
+ if (prepcblk->ccp_rtcode != 0) {
+ DEBUG_ERR(
+ "%s unwrap secure key failure, card response %d/%d\n",
+ __func__,
+ (int) prepcblk->ccp_rtcode,
+ (int) prepcblk->ccp_rscode);
+ rc = -EIO;
+ goto out;
+ }
+ if (prepcblk->ccp_rscode != 0) {
+ DEBUG_WARN(
+ "%s unwrap secure key warning, card response %d/%d\n",
+ __func__,
+ (int) prepcblk->ccp_rtcode,
+ (int) prepcblk->ccp_rscode);
+ }
+
+ /* process response cprb param block */
+ ptr = ((u8 *) prepcblk) + sizeof(struct CPRBX);
+ prepcblk->rpl_parmb = (u8 __user *) ptr;
+ prepparm = (struct aurepparm *) ptr;
+
+ /* check the returned keyblock */
+ if (prepparm->vud.ckb.version != 0x02) {
+ DEBUG_ERR("%s reply param keyblock version mismatch 0x%02x != 0x02\n",
+ __func__, (int) prepparm->vud.ckb.version);
+ rc = -EIO;
+ goto out;
+ }
+ if (prepparm->vud.ckb.algo != 0x81) {
+ DEBUG_ERR(
+ "%s reply param keyblock algo mismatch 0x%02x != 0x81\n",
+ __func__, (int) prepparm->vud.ckb.algo);
+ rc = -EIO;
+ goto out;
+ }
+
+ /* copy the translated protected key */
+ if (prepparm->vud.ckb.keylen > *protkeylen) {
+ DEBUG_ERR("%s prot keylen mismatch %d > buffersize %u\n",
+ __func__, prepparm->vud.ckb.keylen, *protkeylen);
+ rc = -EIO;
+ goto out;
+ }
+ memcpy(protkey, prepparm->vud.ckb.key, prepparm->vud.ckb.keylen);
+ *protkeylen = prepparm->vud.ckb.keylen;
+ if (protkeytype)
+ *protkeytype = PKEY_KEYTYPE_ECC;
+
+out:
+ free_cprbmem(mem, PARMBSIZE, 0);
+ return rc;
+}
+EXPORT_SYMBOL(cca_ecc2protkey);
+
+/*
* query cryptographic facility from CCA adapter
*/
int cca_query_crypto_facility(u16 cardnr, u16 domain,
@@ -1366,7 +1541,7 @@ int cca_query_crypto_facility(u16 cardnr, u16 domain,
prep_xcrb(&xcrb, cardnr, preqcblk, prepcblk);
/* forward xcrb with request CPRB and reply CPRB to zcrypt dd */
- rc = _zcrypt_send_cprb(&xcrb);
+ rc = zcrypt_send_cprb(&xcrb);
if (rc) {
DEBUG_ERR("%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n",
__func__, (int) cardnr, (int) domain, rc);
@@ -1524,21 +1699,38 @@ static int fetch_cca_info(u16 cardnr, u16 domain, struct cca_info *ci)
rarray, &rlen, varray, &vlen);
if (rc == 0 && rlen >= 10*8 && vlen >= 204) {
memcpy(ci->serial, rarray, 8);
- ci->new_mk_state = (char) rarray[7*8];
- ci->cur_mk_state = (char) rarray[8*8];
- ci->old_mk_state = (char) rarray[9*8];
- if (ci->old_mk_state == '2')
- memcpy(&ci->old_mkvp, varray + 172, 8);
- if (ci->cur_mk_state == '2')
- memcpy(&ci->cur_mkvp, varray + 184, 8);
- if (ci->new_mk_state == '3')
- memcpy(&ci->new_mkvp, varray + 196, 8);
- found = 1;
+ ci->new_aes_mk_state = (char) rarray[7*8];
+ ci->cur_aes_mk_state = (char) rarray[8*8];
+ ci->old_aes_mk_state = (char) rarray[9*8];
+ if (ci->old_aes_mk_state == '2')
+ memcpy(&ci->old_aes_mkvp, varray + 172, 8);
+ if (ci->cur_aes_mk_state == '2')
+ memcpy(&ci->cur_aes_mkvp, varray + 184, 8);
+ if (ci->new_aes_mk_state == '3')
+ memcpy(&ci->new_aes_mkvp, varray + 196, 8);
+ found++;
+ }
+ if (!found)
+ goto out;
+ rlen = vlen = PAGE_SIZE/2;
+ rc = cca_query_crypto_facility(cardnr, domain, "STATICSB",
+ rarray, &rlen, varray, &vlen);
+ if (rc == 0 && rlen >= 10*8 && vlen >= 240) {
+ ci->new_apka_mk_state = (char) rarray[7*8];
+ ci->cur_apka_mk_state = (char) rarray[8*8];
+ ci->old_apka_mk_state = (char) rarray[9*8];
+ if (ci->old_apka_mk_state == '2')
+ memcpy(&ci->old_apka_mkvp, varray + 208, 8);
+ if (ci->cur_apka_mk_state == '2')
+ memcpy(&ci->cur_apka_mkvp, varray + 220, 8);
+ if (ci->new_apka_mk_state == '3')
+ memcpy(&ci->new_apka_mkvp, varray + 232, 8);
+ found++;
}
+out:
free_page((unsigned long) pg);
-
- return found ? 0 : -ENOENT;
+ return found == 2 ? 0 : -ENOENT;
}
/*
@@ -1592,16 +1784,16 @@ static int findcard(u64 mkvp, u16 *pcardnr, u16 *pdomain,
/* enabled CCA card, check current mkvp from cache */
if (cca_info_cache_fetch(card, dom, &ci) == 0 &&
ci.hwtype >= minhwtype &&
- ci.cur_mk_state == '2' &&
- ci.cur_mkvp == mkvp) {
+ ci.cur_aes_mk_state == '2' &&
+ ci.cur_aes_mkvp == mkvp) {
if (!verify)
break;
/* verify: refresh card info */
if (fetch_cca_info(card, dom, &ci) == 0) {
cca_info_cache_update(card, dom, &ci);
if (ci.hwtype >= minhwtype &&
- ci.cur_mk_state == '2' &&
- ci.cur_mkvp == mkvp)
+ ci.cur_aes_mk_state == '2' &&
+ ci.cur_aes_mkvp == mkvp)
break;
}
}
@@ -1623,12 +1815,12 @@ static int findcard(u64 mkvp, u16 *pcardnr, u16 *pdomain,
if (fetch_cca_info(card, dom, &ci) == 0) {
cca_info_cache_update(card, dom, &ci);
if (ci.hwtype >= minhwtype &&
- ci.cur_mk_state == '2' &&
- ci.cur_mkvp == mkvp)
+ ci.cur_aes_mk_state == '2' &&
+ ci.cur_aes_mkvp == mkvp)
break;
if (ci.hwtype >= minhwtype &&
- ci.old_mk_state == '2' &&
- ci.old_mkvp == mkvp &&
+ ci.old_aes_mk_state == '2' &&
+ ci.old_aes_mkvp == mkvp &&
oi < 0)
oi = i;
}
@@ -1682,15 +1874,14 @@ int cca_findcard(const u8 *key, u16 *pcardnr, u16 *pdomain, int verify)
EXPORT_SYMBOL(cca_findcard);
int cca_findcard2(u32 **apqns, u32 *nr_apqns, u16 cardnr, u16 domain,
- int minhwtype, u64 cur_mkvp, u64 old_mkvp, int verify)
+ int minhwtype, int mktype, u64 cur_mkvp, u64 old_mkvp,
+ int verify)
{
struct zcrypt_device_status_ext *device_status;
- int i, n, card, dom, curmatch, oldmatch, rc = 0;
+ u32 *_apqns = NULL, _nr_apqns = 0;
+ int i, card, dom, curmatch, oldmatch, rc = 0;
struct cca_info ci;
- *apqns = NULL;
- *nr_apqns = 0;
-
/* fetch status of all crypto cards */
device_status = kvmalloc_array(MAX_ZDEV_ENTRIES_EXT,
sizeof(struct zcrypt_device_status_ext),
@@ -1699,67 +1890,73 @@ int cca_findcard2(u32 **apqns, u32 *nr_apqns, u16 cardnr, u16 domain,
return -ENOMEM;
zcrypt_device_status_mask_ext(device_status);
- /* loop two times: first gather eligible apqns, then store them */
- while (1) {
- n = 0;
- /* walk through all the crypto cards */
- for (i = 0; i < MAX_ZDEV_ENTRIES_EXT; i++) {
- card = AP_QID_CARD(device_status[i].qid);
- dom = AP_QID_QUEUE(device_status[i].qid);
- /* check online state */
- if (!device_status[i].online)
- continue;
- /* check for cca functions */
- if (!(device_status[i].functions & 0x04))
- continue;
- /* check cardnr */
- if (cardnr != 0xFFFF && card != cardnr)
- continue;
- /* check domain */
- if (domain != 0xFFFF && dom != domain)
- continue;
- /* get cca info on this apqn */
- if (cca_get_info(card, dom, &ci, verify))
- continue;
- /* current master key needs to be valid */
- if (ci.cur_mk_state != '2')
- continue;
- /* check min hardware type */
- if (minhwtype > 0 && minhwtype > ci.hwtype)
- continue;
- if (cur_mkvp || old_mkvp) {
- /* check mkvps */
- curmatch = oldmatch = 0;
- if (cur_mkvp && cur_mkvp == ci.cur_mkvp)
+ /* allocate 1k space for up to 256 apqns */
+ _apqns = kmalloc_array(256, sizeof(u32), GFP_KERNEL);
+ if (!_apqns) {
+ kvfree(device_status);
+ return -ENOMEM;
+ }
+
+ /* walk through all the crypto apqnss */
+ for (i = 0; i < MAX_ZDEV_ENTRIES_EXT; i++) {
+ card = AP_QID_CARD(device_status[i].qid);
+ dom = AP_QID_QUEUE(device_status[i].qid);
+ /* check online state */
+ if (!device_status[i].online)
+ continue;
+ /* check for cca functions */
+ if (!(device_status[i].functions & 0x04))
+ continue;
+ /* check cardnr */
+ if (cardnr != 0xFFFF && card != cardnr)
+ continue;
+ /* check domain */
+ if (domain != 0xFFFF && dom != domain)
+ continue;
+ /* get cca info on this apqn */
+ if (cca_get_info(card, dom, &ci, verify))
+ continue;
+ /* current master key needs to be valid */
+ if (mktype == AES_MK_SET && ci.cur_aes_mk_state != '2')
+ continue;
+ if (mktype == APKA_MK_SET && ci.cur_apka_mk_state != '2')
+ continue;
+ /* check min hardware type */
+ if (minhwtype > 0 && minhwtype > ci.hwtype)
+ continue;
+ if (cur_mkvp || old_mkvp) {
+ /* check mkvps */
+ curmatch = oldmatch = 0;
+ if (mktype == AES_MK_SET) {
+ if (cur_mkvp && cur_mkvp == ci.cur_aes_mkvp)
+ curmatch = 1;
+ if (old_mkvp && ci.old_aes_mk_state == '2' &&
+ old_mkvp == ci.old_aes_mkvp)
+ oldmatch = 1;
+ } else {
+ if (cur_mkvp && cur_mkvp == ci.cur_apka_mkvp)
curmatch = 1;
- if (old_mkvp && ci.old_mk_state == '2' &&
- old_mkvp == ci.old_mkvp)
+ if (old_mkvp && ci.old_apka_mk_state == '2' &&
+ old_mkvp == ci.old_apka_mkvp)
oldmatch = 1;
- if ((cur_mkvp || old_mkvp) &&
- (curmatch + oldmatch < 1))
- continue;
}
- /* apqn passed all filtering criterons */
- if (*apqns && n < *nr_apqns)
- (*apqns)[n] = (((u16)card) << 16) | ((u16) dom);
- n++;
- }
- /* loop 2nd time: array has been filled */
- if (*apqns)
- break;
- /* loop 1st time: have # of eligible apqns in n */
- if (!n) {
- rc = -ENODEV; /* no eligible apqns found */
- break;
- }
- *nr_apqns = n;
- /* allocate array to store n apqns into */
- *apqns = kmalloc_array(n, sizeof(u32), GFP_KERNEL);
- if (!*apqns) {
- rc = -ENOMEM;
- break;
+ if (curmatch + oldmatch < 1)
+ continue;
}
- verify = 0;
+ /* apqn passed all filtering criterons, add to the array */
+ if (_nr_apqns < 256)
+ _apqns[_nr_apqns++] = (((u16)card) << 16) | ((u16) dom);
+ }
+
+ /* nothing found ? */
+ if (!_nr_apqns) {
+ kfree(_apqns);
+ rc = -ENODEV;
+ } else {
+ /* no re-allocation, simple return the _apqns array */
+ *apqns = _apqns;
+ *nr_apqns = _nr_apqns;
+ rc = 0;
}
kvfree(device_status);
diff --git a/drivers/s390/crypto/zcrypt_ccamisc.h b/drivers/s390/crypto/zcrypt_ccamisc.h
index 8b7a641671c9..e7105443d5cb 100644
--- a/drivers/s390/crypto/zcrypt_ccamisc.h
+++ b/drivers/s390/crypto/zcrypt_ccamisc.h
@@ -14,8 +14,9 @@
#include <asm/pkey.h>
/* Key token types */
-#define TOKTYPE_NON_CCA 0x00 /* Non-CCA key token */
-#define TOKTYPE_CCA_INTERNAL 0x01 /* CCA internal key token */
+#define TOKTYPE_NON_CCA 0x00 /* Non-CCA key token */
+#define TOKTYPE_CCA_INTERNAL 0x01 /* CCA internal sym key token */
+#define TOKTYPE_CCA_INTERNAL_PKA 0x1f /* CCA internal asym key token */
/* For TOKTYPE_NON_CCA: */
#define TOKVER_PROTECTED_KEY 0x01 /* Protected key token */
@@ -93,6 +94,31 @@ struct cipherkeytoken {
u8 vdata[]; /* variable part data follows */
} __packed;
+/* inside view of an CCA secure ECC private key */
+struct eccprivkeytoken {
+ u8 type; /* 0x1f for internal asym key token */
+ u8 version; /* should be 0x00 */
+ u16 len; /* total key token length in bytes */
+ u8 res1[4];
+ u8 secid; /* 0x20 for ECC priv key section marker */
+ u8 secver; /* section version */
+ u16 seclen; /* section length */
+ u8 wtype; /* wrapping method, 0x00 clear, 0x01 AES */
+ u8 htype; /* hash method, 0x02 for SHA-256 */
+ u8 res2[2];
+ u8 kutc; /* key usage and translation control */
+ u8 ctype; /* curve type */
+ u8 kfs; /* key format and security */
+ u8 ksrc; /* key source */
+ u16 pbitlen; /* length of prime p in bits */
+ u16 ibmadlen; /* IBM associated data length in bytes */
+ u64 mkvp; /* master key verification pattern */
+ u8 opk[48]; /* encrypted object protection key data */
+ u16 adatalen; /* associated data length in bytes */
+ u16 fseclen; /* formated section length in bytes */
+ u8 more_data[]; /* more data follows */
+} __packed;
+
/* Some defines for the CCA AES cipherkeytoken kmf1 field */
#define KMF1_XPRT_SYM 0x8000
#define KMF1_XPRT_UASY 0x4000
@@ -123,6 +149,14 @@ int cca_check_secaescipherkey(debug_info_t *dbg, int dbflvl,
int checkcpacfexport);
/*
+ * Simple check if the token is a valid CCA secure ECC private
+ * key token. Returns 0 on success or errno value on failure.
+ */
+int cca_check_sececckeytoken(debug_info_t *dbg, int dbflvl,
+ const u8 *token, size_t keysize,
+ int checkcpacfexport);
+
+/*
* Generate (random) CCA AES DATA secure key.
*/
int cca_genseckey(u16 cardnr, u16 domain, u32 keybitsize, u8 *seckey);
@@ -159,6 +193,12 @@ int cca_clr2cipherkey(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags,
const u8 *clrkey, u8 *keybuf, size_t *keybufsize);
/*
+ * Derive proteced key from CCA ECC secure private key.
+ */
+int cca_ecc2protkey(u16 cardnr, u16 domain, const u8 *key,
+ u8 *protkey, u32 *protkeylen, u32 *protkeytype);
+
+/*
* Query cryptographic facility from CCA adapter
*/
int cca_query_crypto_facility(u16 cardnr, u16 domain,
@@ -186,6 +226,8 @@ int cca_findcard(const u8 *key, u16 *pcardnr, u16 *pdomain, int verify);
* - if verify is enabled and a cur_mkvp and/or old_mkvp
* value is given, then refetch the cca_info and make sure the current
* cur_mkvp or old_mkvp values of the apqn are used.
+ * The mktype determines which set of master keys to use:
+ * 0 = AES_MK_SET - AES MK set, 1 = APKA MK_SET - APKA MK set
* The array of apqn entries is allocated with kmalloc and returned in *apqns;
* the number of apqns stored into the list is returned in *nr_apqns. One apqn
* entry is simple a 32 bit value with 16 bit cardnr and 16 bit domain nr and
@@ -194,18 +236,28 @@ int cca_findcard(const u8 *key, u16 *pcardnr, u16 *pdomain, int verify);
* -ENODEV is returned.
*/
int cca_findcard2(u32 **apqns, u32 *nr_apqns, u16 cardnr, u16 domain,
- int minhwtype, u64 cur_mkvp, u64 old_mkvp, int verify);
+ int minhwtype, int mktype, u64 cur_mkvp, u64 old_mkvp,
+ int verify);
+
+#define AES_MK_SET 0
+#define APKA_MK_SET 1
/* struct to hold info for each CCA queue */
struct cca_info {
- int hwtype; /* one of the defined AP_DEVICE_TYPE_* */
- char new_mk_state; /* '1' empty, '2' partially full, '3' full */
- char cur_mk_state; /* '1' invalid, '2' valid */
- char old_mk_state; /* '1' invalid, '2' valid */
- u64 new_mkvp; /* truncated sha256 hash of new master key */
- u64 cur_mkvp; /* truncated sha256 hash of current master key */
- u64 old_mkvp; /* truncated sha256 hash of old master key */
- char serial[9]; /* serial number string (8 ascii numbers + 0x00) */
+ int hwtype; /* one of the defined AP_DEVICE_TYPE_* */
+ char new_aes_mk_state; /* '1' empty, '2' partially full, '3' full */
+ char cur_aes_mk_state; /* '1' invalid, '2' valid */
+ char old_aes_mk_state; /* '1' invalid, '2' valid */
+ char new_apka_mk_state; /* '1' empty, '2' partially full, '3' full */
+ char cur_apka_mk_state; /* '1' invalid, '2' valid */
+ char old_apka_mk_state; /* '1' invalid, '2' valid */
+ u64 new_aes_mkvp; /* truncated sha256 of new aes master key */
+ u64 cur_aes_mkvp; /* truncated sha256 of current aes master key */
+ u64 old_aes_mkvp; /* truncated sha256 of old aes master key */
+ u64 new_apka_mkvp; /* truncated sha256 of new apka master key */
+ u64 cur_apka_mkvp; /* truncated sha256 of current apka mk */
+ u64 old_apka_mkvp; /* truncated sha256 of old apka mk */
+ char serial[9]; /* serial number (8 ascii numbers + 0x00) */
};
/*
diff --git a/drivers/s390/crypto/zcrypt_cex2a.c b/drivers/s390/crypto/zcrypt_cex2a.c
index b447f3e9e4a2..226a5612e855 100644
--- a/drivers/s390/crypto/zcrypt_cex2a.c
+++ b/drivers/s390/crypto/zcrypt_cex2a.c
@@ -94,8 +94,7 @@ static int zcrypt_cex2a_card_probe(struct ap_device *ap_dev)
if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX2A) {
zc->min_mod_size = CEX2A_MIN_MOD_SIZE;
zc->max_mod_size = CEX2A_MAX_MOD_SIZE;
- memcpy(zc->speed_rating, CEX2A_SPEED_IDX,
- sizeof(CEX2A_SPEED_IDX));
+ zc->speed_rating = CEX2A_SPEED_IDX;
zc->max_exp_bit_length = CEX2A_MAX_MOD_SIZE;
zc->type_string = "CEX2A";
zc->user_space_type = ZCRYPT_CEX2A;
@@ -108,8 +107,7 @@ static int zcrypt_cex2a_card_probe(struct ap_device *ap_dev)
zc->max_mod_size = CEX3A_MAX_MOD_SIZE;
zc->max_exp_bit_length = CEX3A_MAX_MOD_SIZE;
}
- memcpy(zc->speed_rating, CEX3A_SPEED_IDX,
- sizeof(CEX3A_SPEED_IDX));
+ zc->speed_rating = CEX3A_SPEED_IDX;
zc->type_string = "CEX3A";
zc->user_space_type = ZCRYPT_CEX3A;
} else {
diff --git a/drivers/s390/crypto/zcrypt_cex2c.c b/drivers/s390/crypto/zcrypt_cex2c.c
index f00127a78bab..7a8cbdbe4408 100644
--- a/drivers/s390/crypto/zcrypt_cex2c.c
+++ b/drivers/s390/crypto/zcrypt_cex2c.c
@@ -109,26 +109,53 @@ static ssize_t cca_mkvps_show(struct device *dev,
AP_QID_QUEUE(zq->queue->qid),
&ci, zq->online);
- if (ci.new_mk_state >= '1' && ci.new_mk_state <= '3')
+ if (ci.new_aes_mk_state >= '1' && ci.new_aes_mk_state <= '3')
n = scnprintf(buf, PAGE_SIZE, "AES NEW: %s 0x%016llx\n",
- new_state[ci.new_mk_state - '1'], ci.new_mkvp);
+ new_state[ci.new_aes_mk_state - '1'],
+ ci.new_aes_mkvp);
else
n = scnprintf(buf, PAGE_SIZE, "AES NEW: - -\n");
- if (ci.cur_mk_state >= '1' && ci.cur_mk_state <= '2')
+ if (ci.cur_aes_mk_state >= '1' && ci.cur_aes_mk_state <= '2')
n += scnprintf(buf + n, PAGE_SIZE - n,
"AES CUR: %s 0x%016llx\n",
- cao_state[ci.cur_mk_state - '1'], ci.cur_mkvp);
+ cao_state[ci.cur_aes_mk_state - '1'],
+ ci.cur_aes_mkvp);
else
n += scnprintf(buf + n, PAGE_SIZE - n, "AES CUR: - -\n");
- if (ci.old_mk_state >= '1' && ci.old_mk_state <= '2')
+ if (ci.old_aes_mk_state >= '1' && ci.old_aes_mk_state <= '2')
n += scnprintf(buf + n, PAGE_SIZE - n,
"AES OLD: %s 0x%016llx\n",
- cao_state[ci.old_mk_state - '1'], ci.old_mkvp);
+ cao_state[ci.old_aes_mk_state - '1'],
+ ci.old_aes_mkvp);
else
n += scnprintf(buf + n, PAGE_SIZE - n, "AES OLD: - -\n");
+ if (ci.new_apka_mk_state >= '1' && ci.new_apka_mk_state <= '3')
+ n += scnprintf(buf + n, PAGE_SIZE - n,
+ "APKA NEW: %s 0x%016llx\n",
+ new_state[ci.new_apka_mk_state - '1'],
+ ci.new_apka_mkvp);
+ else
+ n += scnprintf(buf + n, PAGE_SIZE - n, "APKA NEW: - -\n");
+
+ if (ci.cur_apka_mk_state >= '1' && ci.cur_apka_mk_state <= '2')
+ n += scnprintf(buf + n, PAGE_SIZE - n,
+ "APKA CUR: %s 0x%016llx\n",
+ cao_state[ci.cur_apka_mk_state - '1'],
+ ci.cur_apka_mkvp);
+ else
+ n += scnprintf(buf + n, PAGE_SIZE - n, "APKA CUR: - -\n");
+
+ if (ci.old_apka_mk_state >= '1' && ci.old_apka_mk_state <= '2')
+ n += scnprintf(buf + n, PAGE_SIZE - n,
+ "APKA OLD: %s 0x%016llx\n",
+ cao_state[ci.old_apka_mk_state - '1'],
+ ci.old_apka_mkvp);
+ else
+ n += scnprintf(buf + n, PAGE_SIZE - n, "APKA OLD: - -\n");
+
return n;
}
@@ -239,8 +266,7 @@ static int zcrypt_cex2c_card_probe(struct ap_device *ap_dev)
case AP_DEVICE_TYPE_CEX2C:
zc->user_space_type = ZCRYPT_CEX2C;
zc->type_string = "CEX2C";
- memcpy(zc->speed_rating, CEX2C_SPEED_IDX,
- sizeof(CEX2C_SPEED_IDX));
+ zc->speed_rating = CEX2C_SPEED_IDX;
zc->min_mod_size = CEX2C_MIN_MOD_SIZE;
zc->max_mod_size = CEX2C_MAX_MOD_SIZE;
zc->max_exp_bit_length = CEX2C_MAX_MOD_SIZE;
@@ -248,8 +274,7 @@ static int zcrypt_cex2c_card_probe(struct ap_device *ap_dev)
case AP_DEVICE_TYPE_CEX3C:
zc->user_space_type = ZCRYPT_CEX3C;
zc->type_string = "CEX3C";
- memcpy(zc->speed_rating, CEX3C_SPEED_IDX,
- sizeof(CEX3C_SPEED_IDX));
+ zc->speed_rating = CEX3C_SPEED_IDX;
zc->min_mod_size = CEX3C_MIN_MOD_SIZE;
zc->max_mod_size = CEX3C_MAX_MOD_SIZE;
zc->max_exp_bit_length = CEX3C_MAX_MOD_SIZE;
diff --git a/drivers/s390/crypto/zcrypt_cex4.c b/drivers/s390/crypto/zcrypt_cex4.c
index dc20d983e468..f5195bca1d85 100644
--- a/drivers/s390/crypto/zcrypt_cex4.c
+++ b/drivers/s390/crypto/zcrypt_cex4.c
@@ -121,26 +121,53 @@ static ssize_t cca_mkvps_show(struct device *dev,
AP_QID_QUEUE(zq->queue->qid),
&ci, zq->online);
- if (ci.new_mk_state >= '1' && ci.new_mk_state <= '3')
+ if (ci.new_aes_mk_state >= '1' && ci.new_aes_mk_state <= '3')
n = scnprintf(buf, PAGE_SIZE, "AES NEW: %s 0x%016llx\n",
- new_state[ci.new_mk_state - '1'], ci.new_mkvp);
+ new_state[ci.new_aes_mk_state - '1'],
+ ci.new_aes_mkvp);
else
n = scnprintf(buf, PAGE_SIZE, "AES NEW: - -\n");
- if (ci.cur_mk_state >= '1' && ci.cur_mk_state <= '2')
+ if (ci.cur_aes_mk_state >= '1' && ci.cur_aes_mk_state <= '2')
n += scnprintf(buf + n, PAGE_SIZE - n,
"AES CUR: %s 0x%016llx\n",
- cao_state[ci.cur_mk_state - '1'], ci.cur_mkvp);
+ cao_state[ci.cur_aes_mk_state - '1'],
+ ci.cur_aes_mkvp);
else
n += scnprintf(buf + n, PAGE_SIZE - n, "AES CUR: - -\n");
- if (ci.old_mk_state >= '1' && ci.old_mk_state <= '2')
+ if (ci.old_aes_mk_state >= '1' && ci.old_aes_mk_state <= '2')
n += scnprintf(buf + n, PAGE_SIZE - n,
"AES OLD: %s 0x%016llx\n",
- cao_state[ci.old_mk_state - '1'], ci.old_mkvp);
+ cao_state[ci.old_aes_mk_state - '1'],
+ ci.old_aes_mkvp);
else
n += scnprintf(buf + n, PAGE_SIZE - n, "AES OLD: - -\n");
+ if (ci.new_apka_mk_state >= '1' && ci.new_apka_mk_state <= '3')
+ n += scnprintf(buf + n, PAGE_SIZE - n,
+ "APKA NEW: %s 0x%016llx\n",
+ new_state[ci.new_apka_mk_state - '1'],
+ ci.new_apka_mkvp);
+ else
+ n += scnprintf(buf + n, PAGE_SIZE - n, "APKA NEW: - -\n");
+
+ if (ci.cur_apka_mk_state >= '1' && ci.cur_apka_mk_state <= '2')
+ n += scnprintf(buf + n, PAGE_SIZE - n,
+ "APKA CUR: %s 0x%016llx\n",
+ cao_state[ci.cur_apka_mk_state - '1'],
+ ci.cur_apka_mkvp);
+ else
+ n += scnprintf(buf + n, PAGE_SIZE - n, "APKA CUR: - -\n");
+
+ if (ci.old_apka_mk_state >= '1' && ci.old_apka_mk_state <= '2')
+ n += scnprintf(buf + n, PAGE_SIZE - n,
+ "APKA OLD: %s 0x%016llx\n",
+ cao_state[ci.old_apka_mk_state - '1'],
+ ci.old_apka_mkvp);
+ else
+ n += scnprintf(buf + n, PAGE_SIZE - n, "APKA OLD: - -\n");
+
return n;
}
@@ -382,31 +409,31 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev)
* Normalized speed ratings per crypto adapter
* MEX_1k, MEX_2k, MEX_4k, CRT_1k, CRT_2k, CRT_4k, RNG, SECKEY
*/
- static const int CEX4A_SPEED_IDX[] = {
+ static const int CEX4A_SPEED_IDX[NUM_OPS] = {
14, 19, 249, 42, 228, 1458, 0, 0};
- static const int CEX5A_SPEED_IDX[] = {
+ static const int CEX5A_SPEED_IDX[NUM_OPS] = {
8, 9, 20, 18, 66, 458, 0, 0};
- static const int CEX6A_SPEED_IDX[] = {
+ static const int CEX6A_SPEED_IDX[NUM_OPS] = {
6, 9, 20, 17, 65, 438, 0, 0};
- static const int CEX7A_SPEED_IDX[] = {
+ static const int CEX7A_SPEED_IDX[NUM_OPS] = {
6, 8, 17, 15, 54, 362, 0, 0};
- static const int CEX4C_SPEED_IDX[] = {
+ static const int CEX4C_SPEED_IDX[NUM_OPS] = {
59, 69, 308, 83, 278, 2204, 209, 40};
static const int CEX5C_SPEED_IDX[] = {
24, 31, 50, 37, 90, 479, 27, 10};
- static const int CEX6C_SPEED_IDX[] = {
+ static const int CEX6C_SPEED_IDX[NUM_OPS] = {
16, 20, 32, 27, 77, 455, 24, 9};
- static const int CEX7C_SPEED_IDX[] = {
+ static const int CEX7C_SPEED_IDX[NUM_OPS] = {
14, 16, 26, 23, 64, 376, 23, 8};
- static const int CEX4P_SPEED_IDX[] = {
+ static const int CEX4P_SPEED_IDX[NUM_OPS] = {
0, 0, 0, 0, 0, 0, 0, 50};
- static const int CEX5P_SPEED_IDX[] = {
+ static const int CEX5P_SPEED_IDX[NUM_OPS] = {
0, 0, 0, 0, 0, 0, 0, 10};
- static const int CEX6P_SPEED_IDX[] = {
+ static const int CEX6P_SPEED_IDX[NUM_OPS] = {
0, 0, 0, 0, 0, 0, 0, 9};
- static const int CEX7P_SPEED_IDX[] = {
+ static const int CEX7P_SPEED_IDX[NUM_OPS] = {
0, 0, 0, 0, 0, 0, 0, 8};
struct ap_card *ac = to_ap_card(&ap_dev->device);
@@ -422,26 +449,22 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev)
if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX4) {
zc->type_string = "CEX4A";
zc->user_space_type = ZCRYPT_CEX4;
- memcpy(zc->speed_rating, CEX4A_SPEED_IDX,
- sizeof(CEX4A_SPEED_IDX));
+ zc->speed_rating = CEX4A_SPEED_IDX;
} else if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX5) {
zc->type_string = "CEX5A";
zc->user_space_type = ZCRYPT_CEX5;
- memcpy(zc->speed_rating, CEX5A_SPEED_IDX,
- sizeof(CEX5A_SPEED_IDX));
+ zc->speed_rating = CEX5A_SPEED_IDX;
} else if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX6) {
zc->type_string = "CEX6A";
zc->user_space_type = ZCRYPT_CEX6;
- memcpy(zc->speed_rating, CEX6A_SPEED_IDX,
- sizeof(CEX6A_SPEED_IDX));
+ zc->speed_rating = CEX6A_SPEED_IDX;
} else {
zc->type_string = "CEX7A";
/* wrong user space type, just for compatibility
* with the ZCRYPT_STATUS_MASK ioctl.
*/
zc->user_space_type = ZCRYPT_CEX6;
- memcpy(zc->speed_rating, CEX7A_SPEED_IDX,
- sizeof(CEX7A_SPEED_IDX));
+ zc->speed_rating = CEX7A_SPEED_IDX;
}
zc->min_mod_size = CEX4A_MIN_MOD_SIZE;
if (ap_test_bit(&ac->functions, AP_FUNC_MEX4K) &&
@@ -461,32 +484,28 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev)
* just keep it for cca compatibility
*/
zc->user_space_type = ZCRYPT_CEX3C;
- memcpy(zc->speed_rating, CEX4C_SPEED_IDX,
- sizeof(CEX4C_SPEED_IDX));
+ zc->speed_rating = CEX4C_SPEED_IDX;
} else if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX5) {
zc->type_string = "CEX5C";
/* wrong user space type, must be CEX5
* just keep it for cca compatibility
*/
zc->user_space_type = ZCRYPT_CEX3C;
- memcpy(zc->speed_rating, CEX5C_SPEED_IDX,
- sizeof(CEX5C_SPEED_IDX));
+ zc->speed_rating = CEX5C_SPEED_IDX;
} else if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX6) {
zc->type_string = "CEX6C";
/* wrong user space type, must be CEX6
* just keep it for cca compatibility
*/
zc->user_space_type = ZCRYPT_CEX3C;
- memcpy(zc->speed_rating, CEX6C_SPEED_IDX,
- sizeof(CEX6C_SPEED_IDX));
+ zc->speed_rating = CEX6C_SPEED_IDX;
} else {
zc->type_string = "CEX7C";
/* wrong user space type, must be CEX7
* just keep it for cca compatibility
*/
zc->user_space_type = ZCRYPT_CEX3C;
- memcpy(zc->speed_rating, CEX7C_SPEED_IDX,
- sizeof(CEX7C_SPEED_IDX));
+ zc->speed_rating = CEX7C_SPEED_IDX;
}
zc->min_mod_size = CEX4C_MIN_MOD_SIZE;
zc->max_mod_size = CEX4C_MAX_MOD_SIZE;
@@ -495,26 +514,22 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev)
if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX4) {
zc->type_string = "CEX4P";
zc->user_space_type = ZCRYPT_CEX4;
- memcpy(zc->speed_rating, CEX4P_SPEED_IDX,
- sizeof(CEX4P_SPEED_IDX));
+ zc->speed_rating = CEX4P_SPEED_IDX;
} else if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX5) {
zc->type_string = "CEX5P";
zc->user_space_type = ZCRYPT_CEX5;
- memcpy(zc->speed_rating, CEX5P_SPEED_IDX,
- sizeof(CEX5P_SPEED_IDX));
+ zc->speed_rating = CEX5P_SPEED_IDX;
} else if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX6) {
zc->type_string = "CEX6P";
zc->user_space_type = ZCRYPT_CEX6;
- memcpy(zc->speed_rating, CEX6P_SPEED_IDX,
- sizeof(CEX6P_SPEED_IDX));
+ zc->speed_rating = CEX6P_SPEED_IDX;
} else {
zc->type_string = "CEX7P";
/* wrong user space type, just for compatibility
* with the ZCRYPT_STATUS_MASK ioctl.
*/
zc->user_space_type = ZCRYPT_CEX6;
- memcpy(zc->speed_rating, CEX7P_SPEED_IDX,
- sizeof(CEX7P_SPEED_IDX));
+ zc->speed_rating = CEX7P_SPEED_IDX;
}
zc->min_mod_size = CEX4C_MIN_MOD_SIZE;
zc->max_mod_size = CEX4C_MAX_MOD_SIZE;
diff --git a/drivers/s390/crypto/zcrypt_debug.h b/drivers/s390/crypto/zcrypt_debug.h
index 241dbb5f75bf..3225489a1c41 100644
--- a/drivers/s390/crypto/zcrypt_debug.h
+++ b/drivers/s390/crypto/zcrypt_debug.h
@@ -21,6 +21,14 @@
#define ZCRYPT_DBF(...) \
debug_sprintf_event(zcrypt_dbf_info, ##__VA_ARGS__)
+#define ZCRYPT_DBF_ERR(...) \
+ debug_sprintf_event(zcrypt_dbf_info, DBF_ERR, ##__VA_ARGS__)
+#define ZCRYPT_DBF_WARN(...) \
+ debug_sprintf_event(zcrypt_dbf_info, DBF_WARN, ##__VA_ARGS__)
+#define ZCRYPT_DBF_INFO(...) \
+ debug_sprintf_event(zcrypt_dbf_info, DBF_INFO, ##__VA_ARGS__)
+#define ZCRYPT_DBF_DBG(...) \
+ debug_sprintf_event(zcrypt_dbf_info, DBF_DEBUG, ##__VA_ARGS__)
extern debug_info_t *zcrypt_dbf_info;
diff --git a/drivers/s390/crypto/zcrypt_ep11misc.c b/drivers/s390/crypto/zcrypt_ep11misc.c
index 3c3d403abe92..9ce5a71da69b 100644
--- a/drivers/s390/crypto/zcrypt_ep11misc.c
+++ b/drivers/s390/crypto/zcrypt_ep11misc.c
@@ -15,6 +15,7 @@
#include <linux/random.h>
#include <asm/zcrypt.h>
#include <asm/pkey.h>
+#include <crypto/aes.h>
#include "ap_bus.h"
#include "zcrypt_api.h"
@@ -113,79 +114,199 @@ static void __exit card_cache_free(void)
}
/*
- * Simple check if the key blob is a valid EP11 secure AES key.
+ * Simple check if the key blob is a valid EP11 AES key blob with header.
*/
-int ep11_check_aeskeyblob(debug_info_t *dbg, int dbflvl,
- const u8 *key, int keybitsize,
- int checkcpacfexport)
+int ep11_check_aes_key_with_hdr(debug_info_t *dbg, int dbflvl,
+ const u8 *key, size_t keylen, int checkcpacfexp)
{
- struct ep11keyblob *kb = (struct ep11keyblob *) key;
+ struct ep11kblob_header *hdr = (struct ep11kblob_header *) key;
+ struct ep11keyblob *kb = (struct ep11keyblob *) (key + sizeof(*hdr));
#define DBF(...) debug_sprintf_event(dbg, dbflvl, ##__VA_ARGS__)
- if (kb->head.type != TOKTYPE_NON_CCA) {
+ if (keylen < sizeof(*hdr) + sizeof(*kb)) {
+ DBF("%s key check failed, keylen %zu < %zu\n",
+ __func__, keylen, sizeof(*hdr) + sizeof(*kb));
+ return -EINVAL;
+ }
+
+ if (hdr->type != TOKTYPE_NON_CCA) {
if (dbg)
DBF("%s key check failed, type 0x%02x != 0x%02x\n",
- __func__, (int) kb->head.type, TOKTYPE_NON_CCA);
+ __func__, (int) hdr->type, TOKTYPE_NON_CCA);
return -EINVAL;
}
- if (kb->head.version != TOKVER_EP11_AES) {
+ if (hdr->hver != 0x00) {
+ if (dbg)
+ DBF("%s key check failed, header version 0x%02x != 0x00\n",
+ __func__, (int) hdr->hver);
+ return -EINVAL;
+ }
+ if (hdr->version != TOKVER_EP11_AES_WITH_HEADER) {
if (dbg)
DBF("%s key check failed, version 0x%02x != 0x%02x\n",
- __func__, (int) kb->head.version, TOKVER_EP11_AES);
+ __func__, (int) hdr->version, TOKVER_EP11_AES_WITH_HEADER);
+ return -EINVAL;
+ }
+ if (hdr->len > keylen) {
+ if (dbg)
+ DBF("%s key check failed, header len %d keylen %zu mismatch\n",
+ __func__, (int) hdr->len, keylen);
+ return -EINVAL;
+ }
+ if (hdr->len < sizeof(*hdr) + sizeof(*kb)) {
+ if (dbg)
+ DBF("%s key check failed, header len %d < %zu\n",
+ __func__, (int) hdr->len, sizeof(*hdr) + sizeof(*kb));
return -EINVAL;
}
+
if (kb->version != EP11_STRUCT_MAGIC) {
if (dbg)
- DBF("%s key check failed, magic 0x%04x != 0x%04x\n",
+ DBF("%s key check failed, blob magic 0x%04x != 0x%04x\n",
__func__, (int) kb->version, EP11_STRUCT_MAGIC);
return -EINVAL;
}
- switch (kb->head.keybitlen) {
- case 128:
- case 192:
- case 256:
- break;
- default:
+ if (checkcpacfexp && !(kb->attr & EP11_BLOB_PKEY_EXTRACTABLE)) {
if (dbg)
- DBF("%s key check failed, keybitlen %d invalid\n",
- __func__, (int) kb->head.keybitlen);
+ DBF("%s key check failed, PKEY_EXTRACTABLE is off\n",
+ __func__);
return -EINVAL;
}
- if (keybitsize > 0 && keybitsize != (int) kb->head.keybitlen) {
- DBF("%s key check failed, keybitsize %d\n",
- __func__, keybitsize);
+
+#undef DBF
+
+ return 0;
+}
+EXPORT_SYMBOL(ep11_check_aes_key_with_hdr);
+
+/*
+ * Simple check if the key blob is a valid EP11 ECC key blob with header.
+ */
+int ep11_check_ecc_key_with_hdr(debug_info_t *dbg, int dbflvl,
+ const u8 *key, size_t keylen, int checkcpacfexp)
+{
+ struct ep11kblob_header *hdr = (struct ep11kblob_header *) key;
+ struct ep11keyblob *kb = (struct ep11keyblob *) (key + sizeof(*hdr));
+
+#define DBF(...) debug_sprintf_event(dbg, dbflvl, ##__VA_ARGS__)
+
+ if (keylen < sizeof(*hdr) + sizeof(*kb)) {
+ DBF("%s key check failed, keylen %zu < %zu\n",
+ __func__, keylen, sizeof(*hdr) + sizeof(*kb));
+ return -EINVAL;
+ }
+
+ if (hdr->type != TOKTYPE_NON_CCA) {
+ if (dbg)
+ DBF("%s key check failed, type 0x%02x != 0x%02x\n",
+ __func__, (int) hdr->type, TOKTYPE_NON_CCA);
+ return -EINVAL;
+ }
+ if (hdr->hver != 0x00) {
+ if (dbg)
+ DBF("%s key check failed, header version 0x%02x != 0x00\n",
+ __func__, (int) hdr->hver);
+ return -EINVAL;
+ }
+ if (hdr->version != TOKVER_EP11_ECC_WITH_HEADER) {
+ if (dbg)
+ DBF("%s key check failed, version 0x%02x != 0x%02x\n",
+ __func__, (int) hdr->version, TOKVER_EP11_ECC_WITH_HEADER);
return -EINVAL;
}
- if (checkcpacfexport && !(kb->attr & EP11_BLOB_PKEY_EXTRACTABLE)) {
+ if (hdr->len > keylen) {
if (dbg)
- DBF("%s key check failed, PKEY_EXTRACTABLE is 0\n",
+ DBF("%s key check failed, header len %d keylen %zu mismatch\n",
+ __func__, (int) hdr->len, keylen);
+ return -EINVAL;
+ }
+ if (hdr->len < sizeof(*hdr) + sizeof(*kb)) {
+ if (dbg)
+ DBF("%s key check failed, header len %d < %zu\n",
+ __func__, (int) hdr->len, sizeof(*hdr) + sizeof(*kb));
+ return -EINVAL;
+ }
+
+ if (kb->version != EP11_STRUCT_MAGIC) {
+ if (dbg)
+ DBF("%s key check failed, blob magic 0x%04x != 0x%04x\n",
+ __func__, (int) kb->version, EP11_STRUCT_MAGIC);
+ return -EINVAL;
+ }
+ if (checkcpacfexp && !(kb->attr & EP11_BLOB_PKEY_EXTRACTABLE)) {
+ if (dbg)
+ DBF("%s key check failed, PKEY_EXTRACTABLE is off\n",
__func__);
return -EINVAL;
}
+
#undef DBF
return 0;
}
-EXPORT_SYMBOL(ep11_check_aeskeyblob);
+EXPORT_SYMBOL(ep11_check_ecc_key_with_hdr);
/*
- * Helper function which calls zcrypt_send_ep11_cprb with
- * memory management segment adjusted to kernel space
- * so that the copy_from_user called within this
- * function do in fact copy from kernel space.
+ * Simple check if the key blob is a valid EP11 AES key blob with
+ * the header in the session field (old style EP11 AES key).
*/
-static inline int _zcrypt_send_ep11_cprb(struct ep11_urb *urb)
+int ep11_check_aes_key(debug_info_t *dbg, int dbflvl,
+ const u8 *key, size_t keylen, int checkcpacfexp)
{
- int rc;
- mm_segment_t old_fs = get_fs();
+ struct ep11keyblob *kb = (struct ep11keyblob *) key;
- set_fs(KERNEL_DS);
- rc = zcrypt_send_ep11_cprb(urb);
- set_fs(old_fs);
+#define DBF(...) debug_sprintf_event(dbg, dbflvl, ##__VA_ARGS__)
- return rc;
+ if (keylen < sizeof(*kb)) {
+ DBF("%s key check failed, keylen %zu < %zu\n",
+ __func__, keylen, sizeof(*kb));
+ return -EINVAL;
+ }
+
+ if (kb->head.type != TOKTYPE_NON_CCA) {
+ if (dbg)
+ DBF("%s key check failed, type 0x%02x != 0x%02x\n",
+ __func__, (int) kb->head.type, TOKTYPE_NON_CCA);
+ return -EINVAL;
+ }
+ if (kb->head.version != TOKVER_EP11_AES) {
+ if (dbg)
+ DBF("%s key check failed, version 0x%02x != 0x%02x\n",
+ __func__, (int) kb->head.version, TOKVER_EP11_AES);
+ return -EINVAL;
+ }
+ if (kb->head.len > keylen) {
+ if (dbg)
+ DBF("%s key check failed, header len %d keylen %zu mismatch\n",
+ __func__, (int) kb->head.len, keylen);
+ return -EINVAL;
+ }
+ if (kb->head.len < sizeof(*kb)) {
+ if (dbg)
+ DBF("%s key check failed, header len %d < %zu\n",
+ __func__, (int) kb->head.len, sizeof(*kb));
+ return -EINVAL;
+ }
+
+ if (kb->version != EP11_STRUCT_MAGIC) {
+ if (dbg)
+ DBF("%s key check failed, blob magic 0x%04x != 0x%04x\n",
+ __func__, (int) kb->version, EP11_STRUCT_MAGIC);
+ return -EINVAL;
+ }
+ if (checkcpacfexp && !(kb->attr & EP11_BLOB_PKEY_EXTRACTABLE)) {
+ if (dbg)
+ DBF("%s key check failed, PKEY_EXTRACTABLE is off\n",
+ __func__);
+ return -EINVAL;
+ }
+
+#undef DBF
+
+ return 0;
}
+EXPORT_SYMBOL(ep11_check_aes_key);
/*
* Allocate and prepare ep11 cprb plus additional payload.
@@ -399,7 +520,7 @@ static int ep11_query_info(u16 cardnr, u16 domain, u32 query_type,
req, sizeof(*req) + sizeof(*req_pl),
rep, sizeof(*rep) + sizeof(*rep_pl) + buflen);
- rc = _zcrypt_send_ep11_cprb(urb);
+ rc = zcrypt_send_ep11_cprb(urb);
if (rc) {
DEBUG_ERR(
"%s zcrypt_send_ep11_cprb(card=%d dom=%d) failed, rc=%d\n",
@@ -637,7 +758,7 @@ int ep11_genaeskey(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
req, sizeof(*req) + sizeof(*req_pl),
rep, sizeof(*rep) + sizeof(*rep_pl));
- rc = _zcrypt_send_ep11_cprb(urb);
+ rc = zcrypt_send_ep11_cprb(urb);
if (rc) {
DEBUG_ERR(
"%s zcrypt_send_ep11_cprb(card=%d dom=%d) failed, rc=%d\n",
@@ -757,7 +878,7 @@ static int ep11_cryptsingle(u16 card, u16 domain,
req, sizeof(*req) + req_pl_size,
rep, sizeof(*rep) + rep_pl_size);
- rc = _zcrypt_send_ep11_cprb(urb);
+ rc = zcrypt_send_ep11_cprb(urb);
if (rc) {
DEBUG_ERR(
"%s zcrypt_send_ep11_cprb(card=%d dom=%d) failed, rc=%d\n",
@@ -905,7 +1026,7 @@ static int ep11_unwrapkey(u16 card, u16 domain,
req, sizeof(*req) + req_pl_size,
rep, sizeof(*rep) + sizeof(*rep_pl));
- rc = _zcrypt_send_ep11_cprb(urb);
+ rc = zcrypt_send_ep11_cprb(urb);
if (rc) {
DEBUG_ERR(
"%s zcrypt_send_ep11_cprb(card=%d dom=%d) failed, rc=%d\n",
@@ -972,7 +1093,7 @@ static int ep11_wrapkey(u16 card, u16 domain,
u8 data_tag;
u8 data_lenfmt;
u16 data_len;
- u8 data[512];
+ u8 data[1024];
} __packed * rep_pl;
struct ep11_cprb *req = NULL, *rep = NULL;
struct ep11_target_dev target;
@@ -980,8 +1101,17 @@ static int ep11_wrapkey(u16 card, u16 domain,
struct ep11keyblob *kb;
size_t req_pl_size;
int api, rc = -ENOMEM;
+ bool has_header = false;
u8 *p;
+ /* maybe the session field holds a header with key info */
+ kb = (struct ep11keyblob *) key;
+ if (kb->head.type == TOKTYPE_NON_CCA &&
+ kb->head.version == TOKVER_EP11_AES) {
+ has_header = true;
+ keysize = kb->head.len < keysize ? kb->head.len : keysize;
+ }
+
/* request cprb and payload */
req_pl_size = sizeof(struct wk_req_pl) + (iv ? 16 : 0)
+ ASN1TAGLEN(keysize) + 4;
@@ -1007,9 +1137,10 @@ static int ep11_wrapkey(u16 card, u16 domain,
/* key blob */
p += asn1tag_write(p, 0x04, key, keysize);
/* maybe the key argument needs the head data cleaned out */
- kb = (struct ep11keyblob *)(p - keysize);
- if (kb->head.version == TOKVER_EP11_AES)
+ if (has_header) {
+ kb = (struct ep11keyblob *)(p - keysize);
memset(&kb->head, 0, sizeof(kb->head));
+ }
/* empty kek tag */
*p++ = 0x04;
*p++ = 0;
@@ -1033,7 +1164,7 @@ static int ep11_wrapkey(u16 card, u16 domain,
req, sizeof(*req) + req_pl_size,
rep, sizeof(*rep) + sizeof(*rep_pl));
- rc = _zcrypt_send_ep11_cprb(urb);
+ rc = zcrypt_send_ep11_cprb(urb);
if (rc) {
DEBUG_ERR(
"%s zcrypt_send_ep11_cprb(card=%d dom=%d) failed, rc=%d\n",
@@ -1132,12 +1263,12 @@ out:
}
EXPORT_SYMBOL(ep11_clr2keyblob);
-int ep11_key2protkey(u16 card, u16 dom, const u8 *key, size_t keylen,
- u8 *protkey, u32 *protkeylen, u32 *protkeytype)
+int ep11_kblob2protkey(u16 card, u16 dom, const u8 *keyblob, size_t keybloblen,
+ u8 *protkey, u32 *protkeylen, u32 *protkeytype)
{
int rc = -EIO;
u8 *wkbuf = NULL;
- size_t wkbuflen = 256;
+ size_t wkbuflen, keylen;
struct wk_info {
u16 version;
u8 res1[16];
@@ -1147,8 +1278,33 @@ int ep11_key2protkey(u16 card, u16 dom, const u8 *key, size_t keylen,
u8 res2[8];
u8 pkey[0];
} __packed * wki;
+ const u8 *key;
+ struct ep11kblob_header *hdr;
+
+ /* key with or without header ? */
+ hdr = (struct ep11kblob_header *) keyblob;
+ if (hdr->type == TOKTYPE_NON_CCA
+ && (hdr->version == TOKVER_EP11_AES_WITH_HEADER
+ || hdr->version == TOKVER_EP11_ECC_WITH_HEADER)
+ && is_ep11_keyblob(keyblob + sizeof(struct ep11kblob_header))) {
+ /* EP11 AES or ECC key with header */
+ key = keyblob + sizeof(struct ep11kblob_header);
+ keylen = hdr->len - sizeof(struct ep11kblob_header);
+ } else if (hdr->type == TOKTYPE_NON_CCA
+ && hdr->version == TOKVER_EP11_AES
+ && is_ep11_keyblob(keyblob)) {
+ /* EP11 AES key (old style) */
+ key = keyblob;
+ keylen = hdr->len;
+ } else if (is_ep11_keyblob(keyblob)) {
+ /* raw EP11 key blob */
+ key = keyblob;
+ keylen = keybloblen;
+ } else
+ return -EINVAL;
/* alloc temp working buffer */
+ wkbuflen = (keylen + AES_BLOCK_SIZE) & (~(AES_BLOCK_SIZE - 1));
wkbuf = kmalloc(wkbuflen, GFP_ATOMIC);
if (!wkbuf)
return -ENOMEM;
@@ -1165,46 +1321,68 @@ int ep11_key2protkey(u16 card, u16 dom, const u8 *key, size_t keylen,
wki = (struct wk_info *) wkbuf;
/* check struct version and pkey type */
- if (wki->version != 1 || wki->pkeytype != 1) {
+ if (wki->version != 1 || wki->pkeytype < 1 || wki->pkeytype > 5) {
DEBUG_ERR("%s wk info version %d or pkeytype %d mismatch.\n",
__func__, (int) wki->version, (int) wki->pkeytype);
rc = -EIO;
goto out;
}
- /* copy the tanslated protected key */
- switch (wki->pkeysize) {
- case 16+32:
- /* AES 128 protected key */
- if (protkeytype)
- *protkeytype = PKEY_KEYTYPE_AES_128;
- break;
- case 24+32:
- /* AES 192 protected key */
- if (protkeytype)
- *protkeytype = PKEY_KEYTYPE_AES_192;
+ /* check protected key type field */
+ switch (wki->pkeytype) {
+ case 1: /* AES */
+ switch (wki->pkeysize) {
+ case 16+32:
+ /* AES 128 protected key */
+ if (protkeytype)
+ *protkeytype = PKEY_KEYTYPE_AES_128;
+ break;
+ case 24+32:
+ /* AES 192 protected key */
+ if (protkeytype)
+ *protkeytype = PKEY_KEYTYPE_AES_192;
+ break;
+ case 32+32:
+ /* AES 256 protected key */
+ if (protkeytype)
+ *protkeytype = PKEY_KEYTYPE_AES_256;
+ break;
+ default:
+ DEBUG_ERR("%s unknown/unsupported AES pkeysize %d\n",
+ __func__, (int) wki->pkeysize);
+ rc = -EIO;
+ goto out;
+ }
break;
- case 32+32:
- /* AES 256 protected key */
+ case 3: /* EC-P */
+ case 4: /* EC-ED */
+ case 5: /* EC-BP */
if (protkeytype)
- *protkeytype = PKEY_KEYTYPE_AES_256;
+ *protkeytype = PKEY_KEYTYPE_ECC;
break;
+ case 2: /* TDES */
default:
- DEBUG_ERR("%s unknown/unsupported pkeysize %d\n",
- __func__, (int) wki->pkeysize);
+ DEBUG_ERR("%s unknown/unsupported key type %d\n",
+ __func__, (int) wki->pkeytype);
rc = -EIO;
goto out;
}
+
+ /* copy the tanslated protected key */
+ if (wki->pkeysize > *protkeylen) {
+ DEBUG_ERR("%s wk info pkeysize %llu > protkeysize %u\n",
+ __func__, wki->pkeysize, *protkeylen);
+ rc = -EINVAL;
+ goto out;
+ }
memcpy(protkey, wki->pkey, wki->pkeysize);
- if (protkeylen)
- *protkeylen = (u32) wki->pkeysize;
- rc = 0;
+ *protkeylen = wki->pkeysize;
out:
kfree(wkbuf);
return rc;
}
-EXPORT_SYMBOL(ep11_key2protkey);
+EXPORT_SYMBOL(ep11_kblob2protkey);
int ep11_findcard2(u32 **apqns, u32 *nr_apqns, u16 cardnr, u16 domain,
int minhwtype, int minapi, const u8 *wkvp)
diff --git a/drivers/s390/crypto/zcrypt_ep11misc.h b/drivers/s390/crypto/zcrypt_ep11misc.h
index e3ed5ed1de86..1e02b197c003 100644
--- a/drivers/s390/crypto/zcrypt_ep11misc.h
+++ b/drivers/s390/crypto/zcrypt_ep11misc.h
@@ -12,22 +12,28 @@
#include <asm/zcrypt.h>
#include <asm/pkey.h>
-#define TOKVER_EP11_AES 0x03 /* EP11 AES key blob */
-
#define EP11_API_V 4 /* highest known and supported EP11 API version */
-
#define EP11_STRUCT_MAGIC 0x1234
-#define EP11_BLOB_PKEY_EXTRACTABLE 0x200000
+#define EP11_BLOB_PKEY_EXTRACTABLE 0x00200000
+
+/*
+ * Internal used values for the version field of the key header.
+ * Should match to the enum pkey_key_type in pkey.h.
+ */
+#define TOKVER_EP11_AES 0x03 /* EP11 AES key blob (old style) */
+#define TOKVER_EP11_AES_WITH_HEADER 0x06 /* EP11 AES key blob with header */
+#define TOKVER_EP11_ECC_WITH_HEADER 0x07 /* EP11 ECC key blob with header */
/* inside view of an EP11 secure key blob */
struct ep11keyblob {
union {
u8 session[32];
+ /* only used for PKEY_TYPE_EP11: */
struct {
u8 type; /* 0x00 (TOKTYPE_NON_CCA) */
u8 res0; /* unused */
u16 len; /* total length in bytes of this blob */
- u8 version; /* 0x06 (TOKVER_EP11_AES) */
+ u8 version; /* 0x03 (TOKVER_EP11_AES) */
u8 res1; /* unused */
u16 keybitlen; /* clear key bit len, 0 for unknown */
} head;
@@ -41,16 +47,41 @@ struct ep11keyblob {
u8 mac[32];
} __packed;
+/* check ep11 key magic to find out if this is an ep11 key blob */
+static inline bool is_ep11_keyblob(const u8 *key)
+{
+ struct ep11keyblob *kb = (struct ep11keyblob *) key;
+
+ return (kb->version == EP11_STRUCT_MAGIC);
+}
+
+/*
+ * Simple check if the key blob is a valid EP11 AES key blob with header.
+ * If checkcpacfexport is enabled, the key is also checked for the
+ * attributes needed to export this key for CPACF use.
+ * Returns 0 on success or errno value on failure.
+ */
+int ep11_check_aes_key_with_hdr(debug_info_t *dbg, int dbflvl,
+ const u8 *key, size_t keylen, int checkcpacfexp);
+
/*
- * Simple check if the key blob is a valid EP11 secure AES key.
- * If keybitsize is given, the bitsize of the key is also checked.
+ * Simple check if the key blob is a valid EP11 ECC key blob with header.
* If checkcpacfexport is enabled, the key is also checked for the
* attributes needed to export this key for CPACF use.
* Returns 0 on success or errno value on failure.
*/
-int ep11_check_aeskeyblob(debug_info_t *dbg, int dbflvl,
- const u8 *key, int keybitsize,
- int checkcpacfexport);
+int ep11_check_ecc_key_with_hdr(debug_info_t *dbg, int dbflvl,
+ const u8 *key, size_t keylen, int checkcpacfexp);
+
+/*
+ * Simple check if the key blob is a valid EP11 AES key blob with
+ * the header in the session field (old style EP11 AES key).
+ * If checkcpacfexport is enabled, the key is also checked for the
+ * attributes needed to export this key for CPACF use.
+ * Returns 0 on success or errno value on failure.
+ */
+int ep11_check_aes_key(debug_info_t *dbg, int dbflvl,
+ const u8 *key, size_t keylen, int checkcpacfexp);
/* EP11 card info struct */
struct ep11_card_info {
@@ -92,12 +123,6 @@ int ep11_clr2keyblob(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags,
const u8 *clrkey, u8 *keybuf, size_t *keybufsize);
/*
- * Derive proteced key from EP11 AES secure key blob.
- */
-int ep11_key2protkey(u16 cardnr, u16 domain, const u8 *key, size_t keylen,
- u8 *protkey, u32 *protkeylen, u32 *protkeytype);
-
-/*
* Build a list of ep11 apqns meeting the following constrains:
* - apqn is online and is in fact an EP11 apqn
* - if cardnr is not FFFF only apqns with this cardnr
@@ -119,6 +144,12 @@ int ep11_key2protkey(u16 cardnr, u16 domain, const u8 *key, size_t keylen,
int ep11_findcard2(u32 **apqns, u32 *nr_apqns, u16 cardnr, u16 domain,
int minhwtype, int minapi, const u8 *wkvp);
+/*
+ * Derive proteced key from EP11 key blob (AES and ECC keys).
+ */
+int ep11_kblob2protkey(u16 card, u16 dom, const u8 *key, size_t keylen,
+ u8 *protkey, u32 *protkeylen, u32 *protkeytype);
+
void zcrypt_ep11misc_exit(void);
#endif /* _ZCRYPT_EP11MISC_H_ */
diff --git a/drivers/s390/crypto/zcrypt_error.h b/drivers/s390/crypto/zcrypt_error.h
index 54a04f8c38ef..39e626e3a379 100644
--- a/drivers/s390/crypto/zcrypt_error.h
+++ b/drivers/s390/crypto/zcrypt_error.h
@@ -52,7 +52,6 @@ struct error_hdr {
#define REP82_ERROR_INVALID_COMMAND 0x30
#define REP82_ERROR_MALFORMED_MSG 0x40
#define REP82_ERROR_INVALID_SPECIAL_CMD 0x41
-#define REP82_ERROR_INVALID_DOMAIN_PRECHECK 0x42
#define REP82_ERROR_RESERVED_FIELDO 0x50 /* old value */
#define REP82_ERROR_WORD_ALIGNMENT 0x60
#define REP82_ERROR_MESSAGE_LENGTH 0x80
@@ -67,7 +66,6 @@ struct error_hdr {
#define REP82_ERROR_ZERO_BUFFER_LEN 0xB0
#define REP88_ERROR_MODULE_FAILURE 0x10
-
#define REP88_ERROR_MESSAGE_TYPE 0x20
#define REP88_ERROR_MESSAGE_MALFORMD 0x22
#define REP88_ERROR_MESSAGE_LENGTH 0x23
@@ -85,78 +83,56 @@ static inline int convert_error(struct zcrypt_queue *zq,
int queue = AP_QID_QUEUE(zq->queue->qid);
switch (ehdr->reply_code) {
- case REP82_ERROR_OPERAND_INVALID:
- case REP82_ERROR_OPERAND_SIZE:
- case REP82_ERROR_EVEN_MOD_IN_OPND:
- case REP88_ERROR_MESSAGE_MALFORMD:
- case REP82_ERROR_INVALID_DOMAIN_PRECHECK:
- case REP82_ERROR_INVALID_DOMAIN_PENDING:
- case REP82_ERROR_INVALID_SPECIAL_CMD:
- case REP82_ERROR_FILTERED_BY_HYPERVISOR:
- // REP88_ERROR_INVALID_KEY // '82' CEX2A
- // REP88_ERROR_OPERAND // '84' CEX2A
- // REP88_ERROR_OPERAND_EVEN_MOD // '85' CEX2A
- /* Invalid input data. */
+ case REP82_ERROR_INVALID_MSG_LEN: /* 0x23 */
+ case REP82_ERROR_RESERVD_FIELD: /* 0x24 */
+ case REP82_ERROR_FORMAT_FIELD: /* 0x29 */
+ case REP82_ERROR_MALFORMED_MSG: /* 0x40 */
+ case REP82_ERROR_INVALID_SPECIAL_CMD: /* 0x41 */
+ case REP82_ERROR_MESSAGE_LENGTH: /* 0x80 */
+ case REP82_ERROR_OPERAND_INVALID: /* 0x82 */
+ case REP82_ERROR_OPERAND_SIZE: /* 0x84 */
+ case REP82_ERROR_EVEN_MOD_IN_OPND: /* 0x85 */
+ case REP82_ERROR_INVALID_DOMAIN_PENDING: /* 0x8A */
+ case REP82_ERROR_FILTERED_BY_HYPERVISOR: /* 0x8B */
+ case REP82_ERROR_PACKET_TRUNCATED: /* 0xA0 */
+ case REP88_ERROR_MESSAGE_MALFORMD: /* 0x22 */
+ case REP88_ERROR_KEY_TYPE: /* 0x34 */
+ /* RY indicates malformed request */
ZCRYPT_DBF(DBF_WARN,
- "device=%02x.%04x reply=0x%02x => rc=EINVAL\n",
+ "dev=%02x.%04x RY=0x%02x => rc=EINVAL\n",
card, queue, ehdr->reply_code);
return -EINVAL;
- case REP82_ERROR_MESSAGE_TYPE:
- // REP88_ERROR_MESSAGE_TYPE // '20' CEX2A
+ case REP82_ERROR_MACHINE_FAILURE: /* 0x10 */
+ case REP82_ERROR_MESSAGE_TYPE: /* 0x20 */
+ case REP82_ERROR_TRANSPORT_FAIL: /* 0x90 */
/*
- * To sent a message of the wrong type is a bug in the
- * device driver. Send error msg, disable the device
- * and then repeat the request.
+ * Msg to wrong type or card/infrastructure failure.
+ * Trigger rescan of the ap bus, trigger retry request.
*/
atomic_set(&zcrypt_rescan_req, 1);
- zq->online = 0;
- pr_err("Cryptographic device %02x.%04x failed and was set offline\n",
- card, queue);
- ZCRYPT_DBF(DBF_ERR,
- "device=%02x.%04x reply=0x%02x => online=0 rc=EAGAIN\n",
- card, queue, ehdr->reply_code);
- return -EAGAIN;
- case REP82_ERROR_TRANSPORT_FAIL:
- /* Card or infrastructure failure, disable card */
- atomic_set(&zcrypt_rescan_req, 1);
- zq->online = 0;
- pr_err("Cryptographic device %02x.%04x failed and was set offline\n",
- card, queue);
/* For type 86 response show the apfs value (failure reason) */
- if (ehdr->type == TYPE86_RSP_CODE) {
+ if (ehdr->reply_code == REP82_ERROR_TRANSPORT_FAIL &&
+ ehdr->type == TYPE86_RSP_CODE) {
struct {
struct type86_hdr hdr;
struct type86_fmt2_ext fmt2;
} __packed * head = reply->msg;
unsigned int apfs = *((u32 *)head->fmt2.apfs);
- ZCRYPT_DBF(DBF_ERR,
- "device=%02x.%04x reply=0x%02x apfs=0x%x => online=0 rc=EAGAIN\n",
- card, queue, apfs, ehdr->reply_code);
+ ZCRYPT_DBF(DBF_WARN,
+ "dev=%02x.%04x RY=0x%02x apfs=0x%x => bus rescan, rc=EAGAIN\n",
+ card, queue, ehdr->reply_code, apfs);
} else
- ZCRYPT_DBF(DBF_ERR,
- "device=%02x.%04x reply=0x%02x => online=0 rc=EAGAIN\n",
+ ZCRYPT_DBF(DBF_WARN,
+ "dev=%02x.%04x RY=0x%02x => bus rescan, rc=EAGAIN\n",
card, queue, ehdr->reply_code);
return -EAGAIN;
- case REP82_ERROR_MACHINE_FAILURE:
- // REP88_ERROR_MODULE_FAILURE // '10' CEX2A
- /* If a card fails disable it and repeat the request. */
- atomic_set(&zcrypt_rescan_req, 1);
- zq->online = 0;
- pr_err("Cryptographic device %02x.%04x failed and was set offline\n",
- card, queue);
- ZCRYPT_DBF(DBF_ERR,
- "device=%02x.%04x reply=0x%02x => online=0 rc=EAGAIN\n",
- card, queue, ehdr->reply_code);
- return -EAGAIN;
default:
- zq->online = 0;
- pr_err("Cryptographic device %02x.%04x failed and was set offline\n",
- card, queue);
- ZCRYPT_DBF(DBF_ERR,
- "device=%02x.%04x reply=0x%02x => online=0 rc=EAGAIN\n",
+ /* Assume request is valid and a retry will be worth it */
+ ZCRYPT_DBF(DBF_WARN,
+ "dev=%02x.%04x RY=0x%02x => rc=EAGAIN\n",
card, queue, ehdr->reply_code);
- return -EAGAIN; /* repeat the request on a different device. */
+ return -EAGAIN;
}
}
diff --git a/drivers/s390/crypto/zcrypt_msgtype50.c b/drivers/s390/crypto/zcrypt_msgtype50.c
index 7aedc338b445..bf14ee445f89 100644
--- a/drivers/s390/crypto/zcrypt_msgtype50.c
+++ b/drivers/s390/crypto/zcrypt_msgtype50.c
@@ -246,6 +246,12 @@ static int ICAMEX_msg_to_type50MEX_msg(struct zcrypt_queue *zq,
copy_from_user(exp, mex->b_key, mod_len) ||
copy_from_user(inp, mex->inputdata, mod_len))
return -EFAULT;
+
+#ifdef CONFIG_ZCRYPT_DEBUG
+ if (ap_msg->fi.flags & AP_FI_FLAG_TOGGLE_SPECIAL)
+ ap_msg->flags ^= AP_MSG_FLAG_SPECIAL;
+#endif
+
return 0;
}
@@ -332,6 +338,11 @@ static int ICACRT_msg_to_type50CRT_msg(struct zcrypt_queue *zq,
copy_from_user(inp, crt->inputdata, mod_len))
return -EFAULT;
+#ifdef CONFIG_ZCRYPT_DEBUG
+ if (ap_msg->fi.flags & AP_FI_FLAG_TOGGLE_SPECIAL)
+ ap_msg->flags ^= AP_MSG_FLAG_SPECIAL;
+#endif
+
return 0;
}
@@ -356,15 +367,15 @@ static int convert_type80(struct zcrypt_queue *zq,
if (t80h->len < sizeof(*t80h) + outputdatalength) {
/* The result is too short, the CEXxA card may not do that.. */
zq->online = 0;
- pr_err("Cryptographic device %02x.%04x failed and was set offline\n",
+ pr_err("Crypto dev=%02x.%04x code=0x%02x => online=0 rc=EAGAIN\n",
AP_QID_CARD(zq->queue->qid),
- AP_QID_QUEUE(zq->queue->qid));
- ZCRYPT_DBF(DBF_ERR,
- "device=%02x.%04x code=0x%02x => online=0 rc=EAGAIN\n",
- AP_QID_CARD(zq->queue->qid),
- AP_QID_QUEUE(zq->queue->qid),
- t80h->code);
- return -EAGAIN; /* repeat the request on a different device. */
+ AP_QID_QUEUE(zq->queue->qid),
+ t80h->code);
+ ZCRYPT_DBF_ERR("dev=%02x.%04x code=0x%02x => online=0 rc=EAGAIN\n",
+ AP_QID_CARD(zq->queue->qid),
+ AP_QID_QUEUE(zq->queue->qid),
+ t80h->code);
+ return -EAGAIN;
}
if (zq->zcard->user_space_type == ZCRYPT_CEX2A)
BUG_ON(t80h->len > CEX2A_MAX_RESPONSE_SIZE);
@@ -376,10 +387,10 @@ static int convert_type80(struct zcrypt_queue *zq,
return 0;
}
-static int convert_response(struct zcrypt_queue *zq,
- struct ap_message *reply,
- char __user *outputdata,
- unsigned int outputdatalength)
+static int convert_response_cex2a(struct zcrypt_queue *zq,
+ struct ap_message *reply,
+ char __user *outputdata,
+ unsigned int outputdatalength)
{
/* Response type byte is the second byte in the response. */
unsigned char rtype = ((unsigned char *) reply->msg)[1];
@@ -393,15 +404,15 @@ static int convert_response(struct zcrypt_queue *zq,
outputdata, outputdatalength);
default: /* Unknown response type, this should NEVER EVER happen */
zq->online = 0;
- pr_err("Cryptographic device %02x.%04x failed and was set offline\n",
+ pr_err("Crypto dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n",
AP_QID_CARD(zq->queue->qid),
- AP_QID_QUEUE(zq->queue->qid));
- ZCRYPT_DBF(DBF_ERR,
- "device=%02x.%04x rtype=0x%02x => online=0 rc=EAGAIN\n",
- AP_QID_CARD(zq->queue->qid),
- AP_QID_QUEUE(zq->queue->qid),
- (unsigned int) rtype);
- return -EAGAIN; /* repeat the request on a different device. */
+ AP_QID_QUEUE(zq->queue->qid),
+ (int) rtype);
+ ZCRYPT_DBF_ERR("dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n",
+ AP_QID_CARD(zq->queue->qid),
+ AP_QID_QUEUE(zq->queue->qid),
+ (int) rtype);
+ return -EAGAIN;
}
}
@@ -450,39 +461,41 @@ static atomic_t zcrypt_step = ATOMIC_INIT(0);
* @mex: pointer to the modexpo request buffer
*/
static long zcrypt_cex2a_modexpo(struct zcrypt_queue *zq,
- struct ica_rsa_modexpo *mex)
+ struct ica_rsa_modexpo *mex,
+ struct ap_message *ap_msg)
{
- struct ap_message ap_msg;
struct completion work;
int rc;
- ap_init_message(&ap_msg);
if (zq->zcard->user_space_type == ZCRYPT_CEX2A)
- ap_msg.msg = kmalloc(MSGTYPE50_CRB2_MAX_MSG_SIZE, GFP_KERNEL);
+ ap_msg->msg = kmalloc(MSGTYPE50_CRB2_MAX_MSG_SIZE, GFP_KERNEL);
else
- ap_msg.msg = kmalloc(MSGTYPE50_CRB3_MAX_MSG_SIZE, GFP_KERNEL);
- if (!ap_msg.msg)
+ ap_msg->msg = kmalloc(MSGTYPE50_CRB3_MAX_MSG_SIZE, GFP_KERNEL);
+ if (!ap_msg->msg)
return -ENOMEM;
- ap_msg.receive = zcrypt_cex2a_receive;
- ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
- atomic_inc_return(&zcrypt_step);
- ap_msg.private = &work;
- rc = ICAMEX_msg_to_type50MEX_msg(zq, &ap_msg, mex);
+ ap_msg->receive = zcrypt_cex2a_receive;
+ ap_msg->psmid = (((unsigned long long) current->pid) << 32) +
+ atomic_inc_return(&zcrypt_step);
+ ap_msg->private = &work;
+ rc = ICAMEX_msg_to_type50MEX_msg(zq, ap_msg, mex);
if (rc)
- goto out_free;
+ goto out;
init_completion(&work);
- ap_queue_message(zq->queue, &ap_msg);
+ rc = ap_queue_message(zq->queue, ap_msg);
+ if (rc)
+ goto out;
rc = wait_for_completion_interruptible(&work);
if (rc == 0) {
- rc = ap_msg.rc;
+ rc = ap_msg->rc;
if (rc == 0)
- rc = convert_response(zq, &ap_msg, mex->outputdata,
- mex->outputdatalength);
+ rc = convert_response_cex2a(zq, ap_msg,
+ mex->outputdata,
+ mex->outputdatalength);
} else
/* Signal pending. */
- ap_cancel_message(zq->queue, &ap_msg);
-out_free:
- kfree(ap_msg.msg);
+ ap_cancel_message(zq->queue, ap_msg);
+out:
+ ap_msg->private = NULL;
return rc;
}
@@ -494,39 +507,41 @@ out_free:
* @crt: pointer to the modexpoc_crt request buffer
*/
static long zcrypt_cex2a_modexpo_crt(struct zcrypt_queue *zq,
- struct ica_rsa_modexpo_crt *crt)
+ struct ica_rsa_modexpo_crt *crt,
+ struct ap_message *ap_msg)
{
- struct ap_message ap_msg;
struct completion work;
int rc;
- ap_init_message(&ap_msg);
if (zq->zcard->user_space_type == ZCRYPT_CEX2A)
- ap_msg.msg = kmalloc(MSGTYPE50_CRB2_MAX_MSG_SIZE, GFP_KERNEL);
+ ap_msg->msg = kmalloc(MSGTYPE50_CRB2_MAX_MSG_SIZE, GFP_KERNEL);
else
- ap_msg.msg = kmalloc(MSGTYPE50_CRB3_MAX_MSG_SIZE, GFP_KERNEL);
- if (!ap_msg.msg)
+ ap_msg->msg = kmalloc(MSGTYPE50_CRB3_MAX_MSG_SIZE, GFP_KERNEL);
+ if (!ap_msg->msg)
return -ENOMEM;
- ap_msg.receive = zcrypt_cex2a_receive;
- ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
- atomic_inc_return(&zcrypt_step);
- ap_msg.private = &work;
- rc = ICACRT_msg_to_type50CRT_msg(zq, &ap_msg, crt);
+ ap_msg->receive = zcrypt_cex2a_receive;
+ ap_msg->psmid = (((unsigned long long) current->pid) << 32) +
+ atomic_inc_return(&zcrypt_step);
+ ap_msg->private = &work;
+ rc = ICACRT_msg_to_type50CRT_msg(zq, ap_msg, crt);
if (rc)
- goto out_free;
+ goto out;
init_completion(&work);
- ap_queue_message(zq->queue, &ap_msg);
+ rc = ap_queue_message(zq->queue, ap_msg);
+ if (rc)
+ goto out;
rc = wait_for_completion_interruptible(&work);
if (rc == 0) {
- rc = ap_msg.rc;
+ rc = ap_msg->rc;
if (rc == 0)
- rc = convert_response(zq, &ap_msg, crt->outputdata,
- crt->outputdatalength);
+ rc = convert_response_cex2a(zq, ap_msg,
+ crt->outputdata,
+ crt->outputdatalength);
} else
/* Signal pending. */
- ap_cancel_message(zq->queue, &ap_msg);
-out_free:
- kfree(ap_msg.msg);
+ ap_cancel_message(zq->queue, ap_msg);
+out:
+ ap_msg->private = NULL;
return rc;
}
diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c
index d77991c74c25..307f90657d1d 100644
--- a/drivers/s390/crypto/zcrypt_msgtype6.c
+++ b/drivers/s390/crypto/zcrypt_msgtype6.c
@@ -388,7 +388,7 @@ struct type86_fmt2_msg {
struct type86_fmt2_ext fmt2;
} __packed;
-static int XCRB_msg_to_type6CPRB_msgX(struct ap_message *ap_msg,
+static int XCRB_msg_to_type6CPRB_msgX(bool userspace, struct ap_message *ap_msg,
struct ica_xcRB *xcRB,
unsigned int *fcode,
unsigned short **dom)
@@ -465,8 +465,8 @@ static int XCRB_msg_to_type6CPRB_msgX(struct ap_message *ap_msg,
msg->hdr.FromCardLen2 = xcRB->reply_data_length;
/* prepare CPRB */
- if (copy_from_user(&(msg->cprbx), xcRB->request_control_blk_addr,
- xcRB->request_control_blk_length))
+ if (z_copy_from_user(userspace, &(msg->cprbx), xcRB->request_control_blk_addr,
+ xcRB->request_control_blk_length))
return -EFAULT;
if (msg->cprbx.cprb_len + sizeof(msg->hdr.function_code) >
xcRB->request_control_blk_length)
@@ -482,18 +482,23 @@ static int XCRB_msg_to_type6CPRB_msgX(struct ap_message *ap_msg,
|| memcmp(function_code, "AU", 2) == 0)
ap_msg->flags |= AP_MSG_FLAG_SPECIAL;
+#ifdef CONFIG_ZCRYPT_DEBUG
+ if (ap_msg->fi.flags & AP_FI_FLAG_TOGGLE_SPECIAL)
+ ap_msg->flags ^= AP_MSG_FLAG_SPECIAL;
+#endif
+
/* copy data block */
if (xcRB->request_data_length &&
- copy_from_user(req_data, xcRB->request_data_address,
- xcRB->request_data_length))
+ z_copy_from_user(userspace, req_data, xcRB->request_data_address,
+ xcRB->request_data_length))
return -EFAULT;
return 0;
}
-static int xcrb_msg_to_type6_ep11cprb_msgx(struct ap_message *ap_msg,
- struct ep11_urb *xcRB,
- unsigned int *fcode)
+static int xcrb_msg_to_type6_ep11cprb_msgx(bool userspace, struct ap_message *ap_msg,
+ struct ep11_urb *xcRB,
+ unsigned int *fcode)
{
unsigned int lfmt;
static struct type6_hdr static_type6_ep11_hdr = {
@@ -543,8 +548,8 @@ static int xcrb_msg_to_type6_ep11cprb_msgx(struct ap_message *ap_msg,
msg->hdr.FromCardLen1 = xcRB->resp_len;
/* Import CPRB data from the ioctl input parameter */
- if (copy_from_user(&(msg->cprbx.cprb_len),
- (char __force __user *)xcRB->req, xcRB->req_len)) {
+ if (z_copy_from_user(userspace, &(msg->cprbx.cprb_len),
+ (char __force __user *)xcRB->req, xcRB->req_len)) {
return -EFAULT;
}
@@ -569,6 +574,11 @@ static int xcrb_msg_to_type6_ep11cprb_msgx(struct ap_message *ap_msg,
if (msg->cprbx.flags & 0x20)
ap_msg->flags |= AP_MSG_FLAG_SPECIAL;
+#ifdef CONFIG_ZCRYPT_DEBUG
+ if (ap_msg->fi.flags & AP_FI_FLAG_TOGGLE_SPECIAL)
+ ap_msg->flags ^= AP_MSG_FLAG_SPECIAL;
+#endif
+
return 0;
}
@@ -650,23 +660,22 @@ static int convert_type86_ica(struct zcrypt_queue *zq,
(service_rc == 8 && service_rs == 72) ||
(service_rc == 8 && service_rs == 770) ||
(service_rc == 12 && service_rs == 769)) {
- ZCRYPT_DBF(DBF_DEBUG,
- "device=%02x.%04x rc/rs=%d/%d => rc=EINVAL\n",
- AP_QID_CARD(zq->queue->qid),
- AP_QID_QUEUE(zq->queue->qid),
- (int) service_rc, (int) service_rs);
+ ZCRYPT_DBF_WARN("dev=%02x.%04x rc/rs=%d/%d => rc=EINVAL\n",
+ AP_QID_CARD(zq->queue->qid),
+ AP_QID_QUEUE(zq->queue->qid),
+ (int) service_rc, (int) service_rs);
return -EINVAL;
}
zq->online = 0;
- pr_err("Cryptographic device %02x.%04x failed and was set offline\n",
+ pr_err("Crypto dev=%02x.%04x rc/rs=%d/%d online=0 rc=EAGAIN\n",
AP_QID_CARD(zq->queue->qid),
- AP_QID_QUEUE(zq->queue->qid));
- ZCRYPT_DBF(DBF_ERR,
- "device=%02x.%04x rc/rs=%d/%d => online=0 rc=EAGAIN\n",
- AP_QID_CARD(zq->queue->qid),
- AP_QID_QUEUE(zq->queue->qid),
- (int) service_rc, (int) service_rs);
- return -EAGAIN; /* repeat the request on a different device. */
+ AP_QID_QUEUE(zq->queue->qid),
+ (int) service_rc, (int) service_rs);
+ ZCRYPT_DBF_ERR("dev=%02x.%04x rc/rs=%d/%d => online=0 rc=EAGAIN\n",
+ AP_QID_CARD(zq->queue->qid),
+ AP_QID_QUEUE(zq->queue->qid),
+ (int) service_rc, (int) service_rs);
+ return -EAGAIN;
}
data = msg->text;
reply_len = msg->length - 2;
@@ -707,7 +716,7 @@ static int convert_type86_ica(struct zcrypt_queue *zq,
*
* Returns 0 on success or -EINVAL, -EFAULT, -EAGAIN in case of an error.
*/
-static int convert_type86_xcrb(struct zcrypt_queue *zq,
+static int convert_type86_xcrb(bool userspace, struct zcrypt_queue *zq,
struct ap_message *reply,
struct ica_xcRB *xcRB)
{
@@ -715,15 +724,15 @@ static int convert_type86_xcrb(struct zcrypt_queue *zq,
char *data = reply->msg;
/* Copy CPRB to user */
- if (copy_to_user(xcRB->reply_control_blk_addr,
- data + msg->fmt2.offset1, msg->fmt2.count1))
+ if (z_copy_to_user(userspace, xcRB->reply_control_blk_addr,
+ data + msg->fmt2.offset1, msg->fmt2.count1))
return -EFAULT;
xcRB->reply_control_blk_length = msg->fmt2.count1;
/* Copy data buffer to user */
if (msg->fmt2.count2)
- if (copy_to_user(xcRB->reply_data_addr,
- data + msg->fmt2.offset2, msg->fmt2.count2))
+ if (z_copy_to_user(userspace, xcRB->reply_data_addr,
+ data + msg->fmt2.offset2, msg->fmt2.count2))
return -EFAULT;
xcRB->reply_data_length = msg->fmt2.count2;
return 0;
@@ -738,7 +747,7 @@ static int convert_type86_xcrb(struct zcrypt_queue *zq,
*
* Returns 0 on success or -EINVAL, -EFAULT, -EAGAIN in case of an error.
*/
-static int convert_type86_ep11_xcrb(struct zcrypt_queue *zq,
+static int convert_type86_ep11_xcrb(bool userspace, struct zcrypt_queue *zq,
struct ap_message *reply,
struct ep11_urb *xcRB)
{
@@ -749,8 +758,8 @@ static int convert_type86_ep11_xcrb(struct zcrypt_queue *zq,
return -EINVAL;
/* Copy response CPRB to user */
- if (copy_to_user((char __force __user *)xcRB->resp,
- data + msg->fmt2.offset1, msg->fmt2.count1))
+ if (z_copy_to_user(userspace, (char __force __user *)xcRB->resp,
+ data + msg->fmt2.offset1, msg->fmt2.count1))
return -EFAULT;
xcRB->resp_len = msg->fmt2.count1;
return 0;
@@ -800,23 +809,24 @@ static int convert_response_ica(struct zcrypt_queue *zq,
return convert_type86_ica(zq, reply,
outputdata, outputdatalength);
fallthrough; /* wrong cprb version is an unknown response */
- default: /* Unknown response type, this should NEVER EVER happen */
+ default:
+ /* Unknown response type, this should NEVER EVER happen */
zq->online = 0;
- pr_err("Cryptographic device %02x.%04x failed and was set offline\n",
+ pr_err("Crypto dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n",
AP_QID_CARD(zq->queue->qid),
- AP_QID_QUEUE(zq->queue->qid));
- ZCRYPT_DBF(DBF_ERR,
- "device=%02x.%04x rtype=0x%02x => online=0 rc=EAGAIN\n",
- AP_QID_CARD(zq->queue->qid),
- AP_QID_QUEUE(zq->queue->qid),
- (int) msg->hdr.type);
- return -EAGAIN; /* repeat the request on a different device. */
+ AP_QID_QUEUE(zq->queue->qid),
+ (int) msg->hdr.type);
+ ZCRYPT_DBF_ERR("dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n",
+ AP_QID_CARD(zq->queue->qid),
+ AP_QID_QUEUE(zq->queue->qid),
+ (int) msg->hdr.type);
+ return -EAGAIN;
}
}
-static int convert_response_xcrb(struct zcrypt_queue *zq,
- struct ap_message *reply,
- struct ica_xcRB *xcRB)
+static int convert_response_xcrb(bool userspace, struct zcrypt_queue *zq,
+ struct ap_message *reply,
+ struct ica_xcRB *xcRB)
{
struct type86x_reply *msg = reply->msg;
@@ -831,25 +841,25 @@ static int convert_response_xcrb(struct zcrypt_queue *zq,
return convert_error(zq, reply);
}
if (msg->cprbx.cprb_ver_id == 0x02)
- return convert_type86_xcrb(zq, reply, xcRB);
+ return convert_type86_xcrb(userspace, zq, reply, xcRB);
fallthrough; /* wrong cprb version is an unknown response */
default: /* Unknown response type, this should NEVER EVER happen */
xcRB->status = 0x0008044DL; /* HDD_InvalidParm */
zq->online = 0;
- pr_err("Cryptographic device %02x.%04x failed and was set offline\n",
+ pr_err("Crypto dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n",
AP_QID_CARD(zq->queue->qid),
- AP_QID_QUEUE(zq->queue->qid));
- ZCRYPT_DBF(DBF_ERR,
- "device=%02x.%04x rtype=0x%02x => online=0 rc=EAGAIN\n",
- AP_QID_CARD(zq->queue->qid),
- AP_QID_QUEUE(zq->queue->qid),
- (int) msg->hdr.type);
- return -EAGAIN; /* repeat the request on a different device. */
+ AP_QID_QUEUE(zq->queue->qid),
+ (int) msg->hdr.type);
+ ZCRYPT_DBF_ERR("dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n",
+ AP_QID_CARD(zq->queue->qid),
+ AP_QID_QUEUE(zq->queue->qid),
+ (int) msg->hdr.type);
+ return -EAGAIN;
}
}
-static int convert_response_ep11_xcrb(struct zcrypt_queue *zq,
- struct ap_message *reply, struct ep11_urb *xcRB)
+static int convert_response_ep11_xcrb(bool userspace, struct zcrypt_queue *zq,
+ struct ap_message *reply, struct ep11_urb *xcRB)
{
struct type86_ep11_reply *msg = reply->msg;
@@ -861,19 +871,19 @@ static int convert_response_ep11_xcrb(struct zcrypt_queue *zq,
if (msg->hdr.reply_code)
return convert_error(zq, reply);
if (msg->cprbx.cprb_ver_id == 0x04)
- return convert_type86_ep11_xcrb(zq, reply, xcRB);
+ return convert_type86_ep11_xcrb(userspace, zq, reply, xcRB);
fallthrough; /* wrong cprb version is an unknown resp */
default: /* Unknown response type, this should NEVER EVER happen */
zq->online = 0;
- pr_err("Cryptographic device %02x.%04x failed and was set offline\n",
+ pr_err("Crypto dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n",
AP_QID_CARD(zq->queue->qid),
- AP_QID_QUEUE(zq->queue->qid));
- ZCRYPT_DBF(DBF_ERR,
- "device=%02x.%04x rtype=0x%02x => online=0 rc=EAGAIN\n",
- AP_QID_CARD(zq->queue->qid),
- AP_QID_QUEUE(zq->queue->qid),
- (int) msg->hdr.type);
- return -EAGAIN; /* repeat the request on a different device. */
+ AP_QID_QUEUE(zq->queue->qid),
+ (int) msg->hdr.type);
+ ZCRYPT_DBF_ERR("dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n",
+ AP_QID_CARD(zq->queue->qid),
+ AP_QID_QUEUE(zq->queue->qid),
+ (int) msg->hdr.type);
+ return -EAGAIN;
}
}
@@ -895,15 +905,15 @@ static int convert_response_rng(struct zcrypt_queue *zq,
fallthrough; /* wrong cprb version is an unknown response */
default: /* Unknown response type, this should NEVER EVER happen */
zq->online = 0;
- pr_err("Cryptographic device %02x.%04x failed and was set offline\n",
+ pr_err("Crypto dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n",
AP_QID_CARD(zq->queue->qid),
- AP_QID_QUEUE(zq->queue->qid));
- ZCRYPT_DBF(DBF_ERR,
- "device=%02x.%04x rtype=0x%02x => online=0 rc=EAGAIN\n",
- AP_QID_CARD(zq->queue->qid),
- AP_QID_QUEUE(zq->queue->qid),
- (int) msg->hdr.type);
- return -EAGAIN; /* repeat the request on a different device. */
+ AP_QID_QUEUE(zq->queue->qid),
+ (int) msg->hdr.type);
+ ZCRYPT_DBF_ERR("dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n",
+ AP_QID_CARD(zq->queue->qid),
+ AP_QID_QUEUE(zq->queue->qid),
+ (int) msg->hdr.type);
+ return -EAGAIN;
}
}
@@ -1007,39 +1017,42 @@ static atomic_t zcrypt_step = ATOMIC_INIT(0);
* @mex: pointer to the modexpo request buffer
*/
static long zcrypt_msgtype6_modexpo(struct zcrypt_queue *zq,
- struct ica_rsa_modexpo *mex)
+ struct ica_rsa_modexpo *mex,
+ struct ap_message *ap_msg)
{
- struct ap_message ap_msg;
struct response_type resp_type = {
.type = CEXXC_RESPONSE_TYPE_ICA,
};
int rc;
- ap_init_message(&ap_msg);
- ap_msg.msg = (void *) get_zeroed_page(GFP_KERNEL);
- if (!ap_msg.msg)
+ ap_msg->msg = (void *) get_zeroed_page(GFP_KERNEL);
+ if (!ap_msg->msg)
return -ENOMEM;
- ap_msg.receive = zcrypt_msgtype6_receive;
- ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
- atomic_inc_return(&zcrypt_step);
- ap_msg.private = &resp_type;
- rc = ICAMEX_msg_to_type6MEX_msgX(zq, &ap_msg, mex);
+ ap_msg->receive = zcrypt_msgtype6_receive;
+ ap_msg->psmid = (((unsigned long long) current->pid) << 32) +
+ atomic_inc_return(&zcrypt_step);
+ ap_msg->private = &resp_type;
+ rc = ICAMEX_msg_to_type6MEX_msgX(zq, ap_msg, mex);
if (rc)
goto out_free;
init_completion(&resp_type.work);
- ap_queue_message(zq->queue, &ap_msg);
+ rc = ap_queue_message(zq->queue, ap_msg);
+ if (rc)
+ goto out_free;
rc = wait_for_completion_interruptible(&resp_type.work);
if (rc == 0) {
- rc = ap_msg.rc;
+ rc = ap_msg->rc;
if (rc == 0)
- rc = convert_response_ica(zq, &ap_msg,
+ rc = convert_response_ica(zq, ap_msg,
mex->outputdata,
mex->outputdatalength);
} else
/* Signal pending. */
- ap_cancel_message(zq->queue, &ap_msg);
+ ap_cancel_message(zq->queue, ap_msg);
out_free:
- free_page((unsigned long) ap_msg.msg);
+ free_page((unsigned long) ap_msg->msg);
+ ap_msg->private = NULL;
+ ap_msg->msg = NULL;
return rc;
}
@@ -1051,40 +1064,43 @@ out_free:
* @crt: pointer to the modexpoc_crt request buffer
*/
static long zcrypt_msgtype6_modexpo_crt(struct zcrypt_queue *zq,
- struct ica_rsa_modexpo_crt *crt)
+ struct ica_rsa_modexpo_crt *crt,
+ struct ap_message *ap_msg)
{
- struct ap_message ap_msg;
struct response_type resp_type = {
.type = CEXXC_RESPONSE_TYPE_ICA,
};
int rc;
- ap_init_message(&ap_msg);
- ap_msg.msg = (void *) get_zeroed_page(GFP_KERNEL);
- if (!ap_msg.msg)
+ ap_msg->msg = (void *) get_zeroed_page(GFP_KERNEL);
+ if (!ap_msg->msg)
return -ENOMEM;
- ap_msg.receive = zcrypt_msgtype6_receive;
- ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
- atomic_inc_return(&zcrypt_step);
- ap_msg.private = &resp_type;
- rc = ICACRT_msg_to_type6CRT_msgX(zq, &ap_msg, crt);
+ ap_msg->receive = zcrypt_msgtype6_receive;
+ ap_msg->psmid = (((unsigned long long) current->pid) << 32) +
+ atomic_inc_return(&zcrypt_step);
+ ap_msg->private = &resp_type;
+ rc = ICACRT_msg_to_type6CRT_msgX(zq, ap_msg, crt);
if (rc)
goto out_free;
init_completion(&resp_type.work);
- ap_queue_message(zq->queue, &ap_msg);
+ rc = ap_queue_message(zq->queue, ap_msg);
+ if (rc)
+ goto out_free;
rc = wait_for_completion_interruptible(&resp_type.work);
if (rc == 0) {
- rc = ap_msg.rc;
+ rc = ap_msg->rc;
if (rc == 0)
- rc = convert_response_ica(zq, &ap_msg,
+ rc = convert_response_ica(zq, ap_msg,
crt->outputdata,
crt->outputdatalength);
} else {
/* Signal pending. */
- ap_cancel_message(zq->queue, &ap_msg);
+ ap_cancel_message(zq->queue, ap_msg);
}
out_free:
- free_page((unsigned long) ap_msg.msg);
+ free_page((unsigned long) ap_msg->msg);
+ ap_msg->private = NULL;
+ ap_msg->msg = NULL;
return rc;
}
@@ -1095,9 +1111,9 @@ out_free:
* by the caller with ap_init_message(). Also the caller has to
* make sure ap_release_message() is always called even on failure.
*/
-unsigned int get_cprb_fc(struct ica_xcRB *xcRB,
- struct ap_message *ap_msg,
- unsigned int *func_code, unsigned short **dom)
+unsigned int get_cprb_fc(bool userspace, struct ica_xcRB *xcRB,
+ struct ap_message *ap_msg,
+ unsigned int *func_code, unsigned short **dom)
{
struct response_type resp_type = {
.type = CEXXC_RESPONSE_TYPE_XCRB,
@@ -1112,7 +1128,7 @@ unsigned int get_cprb_fc(struct ica_xcRB *xcRB,
ap_msg->private = kmemdup(&resp_type, sizeof(resp_type), GFP_KERNEL);
if (!ap_msg->private)
return -ENOMEM;
- return XCRB_msg_to_type6CPRB_msgX(ap_msg, xcRB, func_code, dom);
+ return XCRB_msg_to_type6CPRB_msgX(userspace, ap_msg, xcRB, func_code, dom);
}
/**
@@ -1122,24 +1138,26 @@ unsigned int get_cprb_fc(struct ica_xcRB *xcRB,
* CEXxC device to the request distributor
* @xcRB: pointer to the send_cprb request buffer
*/
-static long zcrypt_msgtype6_send_cprb(struct zcrypt_queue *zq,
- struct ica_xcRB *xcRB,
- struct ap_message *ap_msg)
+static long zcrypt_msgtype6_send_cprb(bool userspace, struct zcrypt_queue *zq,
+ struct ica_xcRB *xcRB,
+ struct ap_message *ap_msg)
{
int rc;
struct response_type *rtype = (struct response_type *)(ap_msg->private);
init_completion(&rtype->work);
- ap_queue_message(zq->queue, ap_msg);
+ rc = ap_queue_message(zq->queue, ap_msg);
+ if (rc)
+ goto out;
rc = wait_for_completion_interruptible(&rtype->work);
if (rc == 0) {
rc = ap_msg->rc;
if (rc == 0)
- rc = convert_response_xcrb(zq, ap_msg, xcRB);
+ rc = convert_response_xcrb(userspace, zq, ap_msg, xcRB);
} else
/* Signal pending. */
ap_cancel_message(zq->queue, ap_msg);
-
+out:
return rc;
}
@@ -1150,9 +1168,9 @@ static long zcrypt_msgtype6_send_cprb(struct zcrypt_queue *zq,
* by the caller with ap_init_message(). Also the caller has to
* make sure ap_release_message() is always called even on failure.
*/
-unsigned int get_ep11cprb_fc(struct ep11_urb *xcrb,
- struct ap_message *ap_msg,
- unsigned int *func_code)
+unsigned int get_ep11cprb_fc(bool userspace, struct ep11_urb *xcrb,
+ struct ap_message *ap_msg,
+ unsigned int *func_code)
{
struct response_type resp_type = {
.type = CEXXC_RESPONSE_TYPE_EP11,
@@ -1167,7 +1185,7 @@ unsigned int get_ep11cprb_fc(struct ep11_urb *xcrb,
ap_msg->private = kmemdup(&resp_type, sizeof(resp_type), GFP_KERNEL);
if (!ap_msg->private)
return -ENOMEM;
- return xcrb_msg_to_type6_ep11cprb_msgx(ap_msg, xcrb, func_code);
+ return xcrb_msg_to_type6_ep11cprb_msgx(userspace, ap_msg, xcrb, func_code);
}
/**
@@ -1177,7 +1195,7 @@ unsigned int get_ep11cprb_fc(struct ep11_urb *xcrb,
* CEX4P device to the request distributor
* @xcRB: pointer to the ep11 user request block
*/
-static long zcrypt_msgtype6_send_ep11_cprb(struct zcrypt_queue *zq,
+static long zcrypt_msgtype6_send_ep11_cprb(bool userspace, struct zcrypt_queue *zq,
struct ep11_urb *xcrb,
struct ap_message *ap_msg)
{
@@ -1232,16 +1250,18 @@ static long zcrypt_msgtype6_send_ep11_cprb(struct zcrypt_queue *zq,
}
init_completion(&rtype->work);
- ap_queue_message(zq->queue, ap_msg);
+ rc = ap_queue_message(zq->queue, ap_msg);
+ if (rc)
+ goto out;
rc = wait_for_completion_interruptible(&rtype->work);
if (rc == 0) {
rc = ap_msg->rc;
if (rc == 0)
- rc = convert_response_ep11_xcrb(zq, ap_msg, xcrb);
+ rc = convert_response_ep11_xcrb(userspace, zq, ap_msg, xcrb);
} else
/* Signal pending. */
ap_cancel_message(zq->queue, ap_msg);
-
+out:
return rc;
}
@@ -1293,7 +1313,9 @@ static long zcrypt_msgtype6_rng(struct zcrypt_queue *zq,
msg->cprbx.domain = AP_QID_QUEUE(zq->queue->qid);
init_completion(&rtype->work);
- ap_queue_message(zq->queue, ap_msg);
+ rc = ap_queue_message(zq->queue, ap_msg);
+ if (rc)
+ goto out;
rc = wait_for_completion_interruptible(&rtype->work);
if (rc == 0) {
rc = ap_msg->rc;
@@ -1302,7 +1324,7 @@ static long zcrypt_msgtype6_rng(struct zcrypt_queue *zq,
} else
/* Signal pending. */
ap_cancel_message(zq->queue, ap_msg);
-
+out:
return rc;
}
diff --git a/drivers/s390/crypto/zcrypt_msgtype6.h b/drivers/s390/crypto/zcrypt_msgtype6.h
index 0de280a81dd4..0a0bf074206b 100644
--- a/drivers/s390/crypto/zcrypt_msgtype6.h
+++ b/drivers/s390/crypto/zcrypt_msgtype6.h
@@ -96,9 +96,9 @@ struct type86_fmt2_ext {
unsigned int offset4; /* 0x00000000 */
} __packed;
-unsigned int get_cprb_fc(struct ica_xcRB *, struct ap_message *,
+unsigned int get_cprb_fc(bool userspace, struct ica_xcRB *, struct ap_message *,
unsigned int *, unsigned short **);
-unsigned int get_ep11cprb_fc(struct ep11_urb *, struct ap_message *,
+unsigned int get_ep11cprb_fc(bool userspace, struct ep11_urb *, struct ap_message *,
unsigned int *);
unsigned int get_rng_fc(struct ap_message *, int *, unsigned int *);
diff --git a/drivers/s390/crypto/zcrypt_queue.c b/drivers/s390/crypto/zcrypt_queue.c
index 8bae6ad159a7..3c207066313c 100644
--- a/drivers/s390/crypto/zcrypt_queue.c
+++ b/drivers/s390/crypto/zcrypt_queue.c
@@ -40,22 +40,27 @@ static ssize_t online_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- struct zcrypt_queue *zq = to_ap_queue(dev)->private;
+ struct ap_queue *aq = to_ap_queue(dev);
+ struct zcrypt_queue *zq = aq->private;
+ int online = aq->config && zq->online ? 1 : 0;
- return scnprintf(buf, PAGE_SIZE, "%d\n", zq->online);
+ return scnprintf(buf, PAGE_SIZE, "%d\n", online);
}
static ssize_t online_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
- struct zcrypt_queue *zq = to_ap_queue(dev)->private;
+ struct ap_queue *aq = to_ap_queue(dev);
+ struct zcrypt_queue *zq = aq->private;
struct zcrypt_card *zc = zq->zcard;
int online;
if (sscanf(buf, "%d\n", &online) != 1 || online < 0 || online > 1)
return -EINVAL;
+ if (online && (!aq->config || !aq->card->config))
+ return -ENODEV;
if (online && !zc->online)
return -EINVAL;
zq->online = online;
diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c
index e78d65bd46b1..a8a514074084 100644
--- a/drivers/s390/scsi/zfcp_qdio.c
+++ b/drivers/s390/scsi/zfcp_qdio.c
@@ -380,8 +380,6 @@ int zfcp_qdio_open(struct zfcp_qdio *qdio)
&qdio->adapter->status);
init_data.q_format = QDIO_ZFCP_QFMT;
- memcpy(init_data.adapter_name, dev_name(&cdev->dev), 8);
- ASCEBC(init_data.adapter_name, 8);
init_data.qib_rflags = QIB_RFLAGS_ENABLE_DATA_DIV;
if (enable_multibuffer)
init_data.qdr_ac |= QDR_AC_MULTI_BUFFER_ENABLE;
diff --git a/drivers/scsi/cxlflash/ocxl_hw.c b/drivers/scsi/cxlflash/ocxl_hw.c
index 7018cd802569..e4e0d767b98e 100644
--- a/drivers/scsi/cxlflash/ocxl_hw.c
+++ b/drivers/scsi/cxlflash/ocxl_hw.c
@@ -15,7 +15,8 @@
#include <linux/pseudo_fs.h>
#include <linux/poll.h>
#include <linux/sched/signal.h>
-
+#include <linux/interrupt.h>
+#include <asm/xive.h>
#include <misc/ocxl.h>
#include <uapi/misc/cxl.h>
@@ -180,7 +181,7 @@ static int afu_map_irq(u64 flags, struct ocxlflash_context *ctx, int num,
struct ocxl_hw_afu *afu = ctx->hw_afu;
struct device *dev = afu->dev;
struct ocxlflash_irqs *irq;
- void __iomem *vtrig;
+ struct xive_irq_data *xd;
u32 virq;
int rc = 0;
@@ -204,15 +205,15 @@ static int afu_map_irq(u64 flags, struct ocxlflash_context *ctx, int num,
goto err1;
}
- vtrig = ioremap(irq->ptrig, PAGE_SIZE);
- if (unlikely(!vtrig)) {
- dev_err(dev, "%s: Trigger page mapping failed\n", __func__);
- rc = -ENOMEM;
+ xd = irq_get_handler_data(virq);
+ if (unlikely(!xd)) {
+ dev_err(dev, "%s: Can't get interrupt data\n", __func__);
+ rc = -ENXIO;
goto err2;
}
irq->virq = virq;
- irq->vtrig = vtrig;
+ irq->vtrig = xd->trig_mmio;
out:
return rc;
err2:
@@ -259,8 +260,6 @@ static void afu_unmap_irq(u64 flags, struct ocxlflash_context *ctx, int num,
}
irq = &ctx->irqs[num];
- if (irq->vtrig)
- iounmap(irq->vtrig);
if (irq_find_mapping(NULL, irq->hwirq)) {
free_irq(irq->virq, cookie);
@@ -615,7 +614,6 @@ static int alloc_afu_irqs(struct ocxlflash_context *ctx, int num)
struct ocxl_hw_afu *afu = ctx->hw_afu;
struct device *dev = afu->dev;
struct ocxlflash_irqs *irqs;
- u64 addr;
int rc = 0;
int hwirq;
int i;
@@ -640,7 +638,7 @@ static int alloc_afu_irqs(struct ocxlflash_context *ctx, int num)
}
for (i = 0; i < num; i++) {
- rc = ocxl_link_irq_alloc(afu->link_token, &hwirq, &addr);
+ rc = ocxl_link_irq_alloc(afu->link_token, &hwirq);
if (unlikely(rc)) {
dev_err(dev, "%s: ocxl_link_irq_alloc failed rc=%d\n",
__func__, rc);
@@ -648,7 +646,6 @@ static int alloc_afu_irqs(struct ocxlflash_context *ctx, int num)
}
irqs[i].hwirq = hwirq;
- irqs[i].ptrig = addr;
}
ctx->irqs = irqs;
diff --git a/drivers/scsi/cxlflash/ocxl_hw.h b/drivers/scsi/cxlflash/ocxl_hw.h
index fc6ad4f985de..f2fe88816bea 100644
--- a/drivers/scsi/cxlflash/ocxl_hw.h
+++ b/drivers/scsi/cxlflash/ocxl_hw.h
@@ -13,7 +13,6 @@
struct ocxlflash_irqs {
int hwirq;
u32 virq;
- u64 ptrig;
void __iomem *vtrig;
};
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index b668224f906d..7edc8dc6bbab 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -346,13 +346,13 @@ config RCAR_THERMAL
thermal framework.
config RCAR_GEN3_THERMAL
- tristate "Renesas R-Car Gen3 thermal driver"
+ tristate "Renesas R-Car Gen3 and RZ/G2 thermal driver"
depends on ARCH_RENESAS || COMPILE_TEST
depends on HAS_IOMEM
depends on OF
help
- Enable this to plug the R-Car Gen3 thermal sensor driver into the Linux
- thermal framework.
+ Enable this to plug the R-Car Gen3 or RZ/G2 thermal sensor driver into
+ the Linux thermal framework.
config KIRKWOOD_THERMAL
tristate "Temperature sensor on Marvell Kirkwood SoCs"
diff --git a/drivers/thermal/cpufreq_cooling.c b/drivers/thermal/cpufreq_cooling.c
index 6cf23a54e853..cc2959f22f01 100644
--- a/drivers/thermal/cpufreq_cooling.c
+++ b/drivers/thermal/cpufreq_cooling.c
@@ -182,7 +182,6 @@ static u32 get_dynamic_power(struct cpufreq_cooling_device *cpufreq_cdev,
/**
* cpufreq_get_requested_power() - get the current power
* @cdev: &thermal_cooling_device pointer
- * @tz: a valid thermal zone device pointer
* @power: pointer in which to store the resulting power
*
* Calculate the current power consumption of the cpus in milliwatts
@@ -203,7 +202,6 @@ static u32 get_dynamic_power(struct cpufreq_cooling_device *cpufreq_cdev,
* Return: 0 on success, -E* if getting the static power failed.
*/
static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev,
- struct thermal_zone_device *tz,
u32 *power)
{
unsigned long freq;
@@ -253,7 +251,6 @@ static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev,
/**
* cpufreq_state2power() - convert a cpu cdev state to power consumed
* @cdev: &thermal_cooling_device pointer
- * @tz: a valid thermal zone device pointer
* @state: cooling device state to be converted
* @power: pointer in which to store the resulting power
*
@@ -266,7 +263,6 @@ static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev,
* when calculating the static power.
*/
static int cpufreq_state2power(struct thermal_cooling_device *cdev,
- struct thermal_zone_device *tz,
unsigned long state, u32 *power)
{
unsigned int freq, num_cpus, idx;
@@ -288,7 +284,6 @@ static int cpufreq_state2power(struct thermal_cooling_device *cdev,
/**
* cpufreq_power2state() - convert power to a cooling device state
* @cdev: &thermal_cooling_device pointer
- * @tz: a valid thermal zone device pointer
* @power: power in milliwatts to be converted
* @state: pointer in which to store the resulting state
*
@@ -306,8 +301,7 @@ static int cpufreq_state2power(struct thermal_cooling_device *cdev,
* device.
*/
static int cpufreq_power2state(struct thermal_cooling_device *cdev,
- struct thermal_zone_device *tz, u32 power,
- unsigned long *state)
+ u32 power, unsigned long *state)
{
unsigned int target_freq;
u32 last_load, normalised_power;
diff --git a/drivers/thermal/cpuidle_cooling.c b/drivers/thermal/cpuidle_cooling.c
index 78e3e8238116..7ecab4b16b29 100644
--- a/drivers/thermal/cpuidle_cooling.c
+++ b/drivers/thermal/cpuidle_cooling.c
@@ -30,7 +30,7 @@ static DEFINE_IDA(cpuidle_ida);
/**
* cpuidle_cooling_runtime - Running time computation
- * @idle_duration_us: the idle cooling device
+ * @idle_duration_us: CPU idle time to inject in microseconds
* @state: a percentile based number
*
* The running duration is computed from the idle injection duration
diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
index a12d29096229..dfab49a67252 100644
--- a/drivers/thermal/devfreq_cooling.c
+++ b/drivers/thermal/devfreq_cooling.c
@@ -229,7 +229,6 @@ static inline unsigned long get_total_power(struct devfreq_cooling_device *dfc,
static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cdev,
- struct thermal_zone_device *tz,
u32 *power)
{
struct devfreq_cooling_device *dfc = cdev->devdata;
@@ -289,7 +288,6 @@ fail:
}
static int devfreq_cooling_state2power(struct thermal_cooling_device *cdev,
- struct thermal_zone_device *tz,
unsigned long state,
u32 *power)
{
@@ -308,7 +306,6 @@ static int devfreq_cooling_state2power(struct thermal_cooling_device *cdev,
}
static int devfreq_cooling_power2state(struct thermal_cooling_device *cdev,
- struct thermal_zone_device *tz,
u32 power, unsigned long *state)
{
struct devfreq_cooling_device *dfc = cdev->devdata;
diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c
index 5cb518d8f156..ab0be26f0816 100644
--- a/drivers/thermal/gov_power_allocator.c
+++ b/drivers/thermal/gov_power_allocator.c
@@ -96,7 +96,7 @@ static u32 estimate_sustainable_power(struct thermal_zone_device *tz)
if (instance->trip != params->trip_max_desired_temperature)
continue;
- if (power_actor_get_min_power(cdev, tz, &min_power))
+ if (power_actor_get_min_power(cdev, &min_power))
continue;
sustainable_power += min_power;
@@ -388,7 +388,7 @@ static int allocate_power(struct thermal_zone_device *tz,
if (!cdev_is_power_actor(cdev))
continue;
- if (cdev->ops->get_requested_power(cdev, tz, &req_power[i]))
+ if (cdev->ops->get_requested_power(cdev, &req_power[i]))
continue;
if (!total_weight)
@@ -398,7 +398,7 @@ static int allocate_power(struct thermal_zone_device *tz,
weighted_req_power[i] = frac_to_int(weight * req_power[i]);
- if (power_actor_get_max_power(cdev, tz, &max_power[i]))
+ if (power_actor_get_max_power(cdev, &max_power[i]))
continue;
total_req_power += req_power[i];
diff --git a/drivers/thermal/imx8mm_thermal.c b/drivers/thermal/imx8mm_thermal.c
index f5124f14cf81..a1e4f9bb4cb0 100644
--- a/drivers/thermal/imx8mm_thermal.c
+++ b/drivers/thermal/imx8mm_thermal.c
@@ -146,13 +146,9 @@ static int imx8mm_tmu_probe(struct platform_device *pdev)
return PTR_ERR(tmu->base);
tmu->clk = devm_clk_get(&pdev->dev, NULL);
- if (IS_ERR(tmu->clk)) {
- ret = PTR_ERR(tmu->clk);
- if (ret != -EPROBE_DEFER)
- dev_err(&pdev->dev,
- "failed to get tmu clock: %d\n", ret);
- return ret;
- }
+ if (IS_ERR(tmu->clk))
+ return dev_err_probe(&pdev->dev, PTR_ERR(tmu->clk),
+ "failed to get tmu clock\n");
ret = clk_prepare_enable(tmu->clk);
if (ret) {
diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
index 3f74ab4c1ab9..2c7473d86a59 100644
--- a/drivers/thermal/imx_thermal.c
+++ b/drivers/thermal/imx_thermal.c
@@ -716,14 +716,9 @@ static int imx_thermal_probe(struct platform_device *pdev)
if (of_find_property(pdev->dev.of_node, "nvmem-cells", NULL)) {
ret = imx_init_from_nvmem_cells(pdev);
- if (ret) {
- if (ret == -EPROBE_DEFER)
- return ret;
-
- dev_err(&pdev->dev, "failed to init from nvmem: %d\n",
- ret);
- return ret;
- }
+ if (ret)
+ return dev_err_probe(&pdev->dev, ret,
+ "failed to init from nvmem\n");
} else {
ret = imx_init_from_tempmon_data(pdev);
if (ret) {
@@ -746,14 +741,9 @@ static int imx_thermal_probe(struct platform_device *pdev)
data->socdata->power_down_mask);
ret = imx_thermal_register_legacy_cooling(data);
- if (ret) {
- if (ret == -EPROBE_DEFER)
- return ret;
-
- dev_err(&pdev->dev,
- "failed to register cpufreq cooling device: %d\n", ret);
- return ret;
- }
+ if (ret)
+ return dev_err_probe(&pdev->dev, ret,
+ "failed to register cpufreq cooling device\n");
data->thermal_clk = devm_clk_get(&pdev->dev, NULL);
if (IS_ERR(data->thermal_clk)) {
diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
index 4f5859d4c780..0966551cbaaa 100644
--- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
+++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
@@ -14,6 +14,7 @@
#define INT3400_THERMAL_TABLE_CHANGED 0x83
#define INT3400_ODVP_CHANGED 0x88
+#define INT3400_KEEP_ALIVE 0xA0
enum int3400_thermal_uuid {
INT3400_THERMAL_PASSIVE_1,
@@ -83,8 +84,33 @@ static struct bin_attribute *data_attributes[] = {
NULL,
};
+static ssize_t imok_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct int3400_thermal_priv *priv = dev_get_drvdata(dev);
+ acpi_status status;
+ int input, ret;
+
+ ret = kstrtouint(buf, 10, &input);
+ if (ret)
+ return ret;
+ status = acpi_execute_simple_method(priv->adev->handle, "IMOK", input);
+ if (ACPI_FAILURE(status))
+ return -EIO;
+
+ return count;
+}
+
+static DEVICE_ATTR_WO(imok);
+
+static struct attribute *imok_attr[] = {
+ &dev_attr_imok.attr,
+ NULL
+};
+
static const struct attribute_group data_attribute_group = {
.bin_attrs = data_attributes,
+ .attrs = imok_attr,
};
static ssize_t available_uuids_show(struct device *dev,
@@ -349,30 +375,33 @@ static void int3400_notify(acpi_handle handle,
{
struct int3400_thermal_priv *priv = data;
char *thermal_prop[5];
+ int therm_event;
if (!priv)
return;
switch (event) {
case INT3400_THERMAL_TABLE_CHANGED:
- thermal_prop[0] = kasprintf(GFP_KERNEL, "NAME=%s",
- priv->thermal->type);
- thermal_prop[1] = kasprintf(GFP_KERNEL, "TEMP=%d",
- priv->thermal->temperature);
- thermal_prop[2] = kasprintf(GFP_KERNEL, "TRIP=");
- thermal_prop[3] = kasprintf(GFP_KERNEL, "EVENT=%d",
- THERMAL_TABLE_CHANGED);
- thermal_prop[4] = NULL;
- kobject_uevent_env(&priv->thermal->device.kobj, KOBJ_CHANGE,
- thermal_prop);
+ therm_event = THERMAL_TABLE_CHANGED;
+ break;
+ case INT3400_KEEP_ALIVE:
+ therm_event = THERMAL_EVENT_KEEP_ALIVE;
break;
case INT3400_ODVP_CHANGED:
evaluate_odvp(priv);
+ therm_event = THERMAL_DEVICE_POWER_CAPABILITY_CHANGED;
break;
default:
/* Ignore unknown notification codes sent to INT3400 device */
- break;
+ return;
}
+
+ thermal_prop[0] = kasprintf(GFP_KERNEL, "NAME=%s", priv->thermal->type);
+ thermal_prop[1] = kasprintf(GFP_KERNEL, "TEMP=%d", priv->thermal->temperature);
+ thermal_prop[2] = kasprintf(GFP_KERNEL, "TRIP=");
+ thermal_prop[3] = kasprintf(GFP_KERNEL, "EVENT=%d", therm_event);
+ thermal_prop[4] = NULL;
+ kobject_uevent_env(&priv->thermal->device.kobj, KOBJ_CHANGE, thermal_prop);
}
static int int3400_thermal_get_temp(struct thermal_zone_device *thermal,
diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c
index 787710bb88fe..5c2a13bf249c 100644
--- a/drivers/thermal/rcar_thermal.c
+++ b/drivers/thermal/rcar_thermal.c
@@ -546,11 +546,11 @@ static int rcar_thermal_probe(struct platform_device *pdev)
if (ret < 0)
goto error_unregister;
- if (chip->use_of_thermal)
+ if (chip->use_of_thermal) {
priv->zone = devm_thermal_zone_of_sensor_register(
dev, i, priv,
&rcar_thermal_zone_of_ops);
- else {
+ } else {
priv->zone = thermal_zone_device_register(
"rcar_thermal",
1, 0, priv,
diff --git a/drivers/thermal/st/Kconfig b/drivers/thermal/st/Kconfig
index 3c3b695cc3e9..58ece381956b 100644
--- a/drivers/thermal/st/Kconfig
+++ b/drivers/thermal/st/Kconfig
@@ -23,5 +23,5 @@ config STM32_THERMAL
help
Support for thermal framework on STMicroelectronics STM32 series of
SoCs. This thermal driver allows to access to general thermal framework
- functionalities and to acces to SoC sensor functionalities. This
+ functionalities and to access to SoC sensor functionalities. This
configuration is fully dependent of MACH_STM32MP157.
diff --git a/drivers/thermal/st/stm_thermal.c b/drivers/thermal/st/stm_thermal.c
index 331e2b768df5..5fd3fb8912a6 100644
--- a/drivers/thermal/st/stm_thermal.c
+++ b/drivers/thermal/st/stm_thermal.c
@@ -446,14 +446,9 @@ thermal_unprepare:
#ifdef CONFIG_PM_SLEEP
static int stm_thermal_suspend(struct device *dev)
{
- int ret;
struct stm_thermal_sensor *sensor = dev_get_drvdata(dev);
- ret = stm_thermal_sensor_off(sensor);
- if (ret)
- return ret;
-
- return 0;
+ return stm_thermal_sensor_off(sensor);
}
static int stm_thermal_resume(struct device *dev)
diff --git a/drivers/thermal/sun8i_thermal.c b/drivers/thermal/sun8i_thermal.c
index 74d73be16496..f8b13071a6f4 100644
--- a/drivers/thermal/sun8i_thermal.c
+++ b/drivers/thermal/sun8i_thermal.c
@@ -244,7 +244,7 @@ static int sun50i_h6_ths_calibrate(struct ths_device *tmdev,
ft_temp = (caldata[0] & FT_TEMP_MASK) * 100;
for (i = 0; i < tmdev->chip->sensor_num; i++) {
- int sensor_reg = caldata[i + 1];
+ int sensor_reg = caldata[i + 1] & TEMP_CALIB_MASK;
int cdata, offset;
int sensor_temp = tmdev->chip->calc_temp(tmdev, i, sensor_reg);
@@ -590,6 +590,19 @@ static const struct ths_thermal_chip sun50i_a64_ths = {
.calc_temp = sun8i_ths_calc_temp,
};
+static const struct ths_thermal_chip sun50i_a100_ths = {
+ .sensor_num = 3,
+ .has_bus_clk_reset = true,
+ .ft_deviation = 8000,
+ .offset = 187744,
+ .scale = 672,
+ .temp_data_base = SUN50I_H6_THS_TEMP_DATA,
+ .calibrate = sun50i_h6_ths_calibrate,
+ .init = sun50i_h6_thermal_init,
+ .irq_ack = sun50i_h6_irq_ack,
+ .calc_temp = sun8i_ths_calc_temp,
+};
+
static const struct ths_thermal_chip sun50i_h5_ths = {
.sensor_num = 2,
.has_mod_clk = true,
@@ -619,6 +632,7 @@ static const struct of_device_id of_ths_match[] = {
{ .compatible = "allwinner,sun8i-h3-ths", .data = &sun8i_h3_ths },
{ .compatible = "allwinner,sun8i-r40-ths", .data = &sun8i_r40_ths },
{ .compatible = "allwinner,sun50i-a64-ths", .data = &sun50i_a64_ths },
+ { .compatible = "allwinner,sun50i-a100-ths", .data = &sun50i_a100_ths },
{ .compatible = "allwinner,sun50i-h5-ths", .data = &sun50i_h5_ths },
{ .compatible = "allwinner,sun50i-h6-ths", .data = &sun50i_h6_ths },
{ /* sentinel */ },
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index a6616e530a84..c6d74bc1c90b 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -603,7 +603,6 @@ static void thermal_zone_device_check(struct work_struct *work)
/**
* power_actor_get_max_power() - get the maximum power that a cdev can consume
* @cdev: pointer to &thermal_cooling_device
- * @tz: a valid thermal zone device pointer
* @max_power: pointer in which to store the maximum power
*
* Calculate the maximum power consumption in milliwats that the
@@ -613,18 +612,17 @@ static void thermal_zone_device_check(struct work_struct *work)
* power_actor API or -E* on other error.
*/
int power_actor_get_max_power(struct thermal_cooling_device *cdev,
- struct thermal_zone_device *tz, u32 *max_power)
+ u32 *max_power)
{
if (!cdev_is_power_actor(cdev))
return -EINVAL;
- return cdev->ops->state2power(cdev, tz, 0, max_power);
+ return cdev->ops->state2power(cdev, 0, max_power);
}
/**
* power_actor_get_min_power() - get the mainimum power that a cdev can consume
* @cdev: pointer to &thermal_cooling_device
- * @tz: a valid thermal zone device pointer
* @min_power: pointer in which to store the minimum power
*
* Calculate the minimum power consumption in milliwatts that the
@@ -634,7 +632,7 @@ int power_actor_get_max_power(struct thermal_cooling_device *cdev,
* power_actor API or -E* on other error.
*/
int power_actor_get_min_power(struct thermal_cooling_device *cdev,
- struct thermal_zone_device *tz, u32 *min_power)
+ u32 *min_power)
{
unsigned long max_state;
int ret;
@@ -646,7 +644,7 @@ int power_actor_get_min_power(struct thermal_cooling_device *cdev,
if (ret)
return ret;
- return cdev->ops->state2power(cdev, tz, max_state, min_power);
+ return cdev->ops->state2power(cdev, max_state, min_power);
}
/**
@@ -670,7 +668,7 @@ int power_actor_set_power(struct thermal_cooling_device *cdev,
if (!cdev_is_power_actor(cdev))
return -EINVAL;
- ret = cdev->ops->power2state(cdev, instance->tz, power, &state);
+ ret = cdev->ops->power2state(cdev, power, &state);
if (ret)
return ret;
@@ -1652,7 +1650,6 @@ static int __init thermal_init(void)
if (result)
goto error;
- mutex_init(&poweroff_lock);
result = thermal_register_governors();
if (result)
goto error;
diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h
index e00fc5585ea8..764c2de31771 100644
--- a/drivers/thermal/thermal_core.h
+++ b/drivers/thermal/thermal_core.h
@@ -66,9 +66,9 @@ static inline bool cdev_is_power_actor(struct thermal_cooling_device *cdev)
}
int power_actor_get_max_power(struct thermal_cooling_device *cdev,
- struct thermal_zone_device *tz, u32 *max_power);
+ u32 *max_power);
int power_actor_get_min_power(struct thermal_cooling_device *cdev,
- struct thermal_zone_device *tz, u32 *min_power);
+ u32 *min_power);
int power_actor_set_power(struct thermal_cooling_device *cdev,
struct thermal_instance *ti, u32 power);
/**
diff --git a/drivers/thermal/thermal_netlink.c b/drivers/thermal/thermal_netlink.c
index da2891fcac89..1234dbe95895 100644
--- a/drivers/thermal/thermal_netlink.c
+++ b/drivers/thermal/thermal_netlink.c
@@ -78,7 +78,7 @@ int thermal_genl_sampling_temp(int id, int temp)
hdr = genlmsg_put(skb, 0, 0, &thermal_gnl_family, 0,
THERMAL_GENL_SAMPLING_TEMP);
if (!hdr)
- return -EMSGSIZE;
+ goto out_free;
if (nla_put_u32(skb, THERMAL_GENL_ATTR_TZ_ID, id))
goto out_cancel;
@@ -93,6 +93,7 @@ int thermal_genl_sampling_temp(int id, int temp)
return 0;
out_cancel:
genlmsg_cancel(skb, hdr);
+out_free:
nlmsg_free(skb);
return -EMSGSIZE;
diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c
index 8c231219e15d..a6f371fc9af2 100644
--- a/drivers/thermal/thermal_sysfs.c
+++ b/drivers/thermal/thermal_sysfs.c
@@ -448,7 +448,7 @@ static umode_t thermal_zone_passive_is_visible(struct kobject *kobj,
struct attribute *attr,
int attrno)
{
- struct device *dev = container_of(kobj, struct device, kobj);
+ struct device *dev = kobj_to_dev(kobj);
struct thermal_zone_device *tz;
enum thermal_trip_type trip_type;
int count, passive = 0;
diff --git a/drivers/thermal/ti-soc-thermal/ti-bandgap.c b/drivers/thermal/ti-soc-thermal/ti-bandgap.c
index ab19ceff6e2a..5e596168ba73 100644
--- a/drivers/thermal/ti-soc-thermal/ti-bandgap.c
+++ b/drivers/thermal/ti-soc-thermal/ti-bandgap.c
@@ -25,10 +25,20 @@
#include <linux/of_platform.h>
#include <linux/of_irq.h>
#include <linux/io.h>
+#include <linux/cpu_pm.h>
+#include <linux/device.h>
+#include <linux/pm_runtime.h>
+#include <linux/pm.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
#include "ti-bandgap.h"
static int ti_bandgap_force_single_read(struct ti_bandgap *bgp, int id);
+#ifdef CONFIG_PM_SLEEP
+static int bandgap_omap_cpu_notifier(struct notifier_block *nb,
+ unsigned long cmd, void *v);
+#endif
/*** Helper functions to access registers and their bitfields ***/
@@ -1008,6 +1018,11 @@ int ti_bandgap_probe(struct platform_device *pdev)
}
}
+#ifdef CONFIG_PM_SLEEP
+ bgp->nb.notifier_call = bandgap_omap_cpu_notifier;
+ cpu_pm_register_notifier(&bgp->nb);
+#endif
+
return 0;
remove_last_cooling:
@@ -1041,7 +1056,9 @@ int ti_bandgap_remove(struct platform_device *pdev)
struct ti_bandgap *bgp = platform_get_drvdata(pdev);
int i;
- /* First thing is to remove sensor interfaces */
+ cpu_pm_unregister_notifier(&bgp->nb);
+
+ /* Remove sensor interfaces */
for (i = 0; i < bgp->conf->sensor_count; i++) {
if (bgp->conf->sensors[i].unregister_cooling)
bgp->conf->sensors[i].unregister_cooling(bgp, i);
@@ -1150,9 +1167,43 @@ static int ti_bandgap_suspend(struct device *dev)
if (TI_BANDGAP_HAS(bgp, CLK_CTRL))
clk_disable_unprepare(bgp->fclock);
+ bgp->is_suspended = true;
+
return err;
}
+static int bandgap_omap_cpu_notifier(struct notifier_block *nb,
+ unsigned long cmd, void *v)
+{
+ struct ti_bandgap *bgp;
+
+ bgp = container_of(nb, struct ti_bandgap, nb);
+
+ spin_lock(&bgp->lock);
+ switch (cmd) {
+ case CPU_CLUSTER_PM_ENTER:
+ if (bgp->is_suspended)
+ break;
+ ti_bandgap_save_ctxt(bgp);
+ ti_bandgap_power(bgp, false);
+ if (TI_BANDGAP_HAS(bgp, CLK_CTRL))
+ clk_disable(bgp->fclock);
+ break;
+ case CPU_CLUSTER_PM_ENTER_FAILED:
+ case CPU_CLUSTER_PM_EXIT:
+ if (bgp->is_suspended)
+ break;
+ if (TI_BANDGAP_HAS(bgp, CLK_CTRL))
+ clk_enable(bgp->fclock);
+ ti_bandgap_power(bgp, true);
+ ti_bandgap_restore_ctxt(bgp);
+ break;
+ }
+ spin_unlock(&bgp->lock);
+
+ return NOTIFY_OK;
+}
+
static int ti_bandgap_resume(struct device *dev)
{
struct ti_bandgap *bgp = dev_get_drvdata(dev);
@@ -1161,6 +1212,7 @@ static int ti_bandgap_resume(struct device *dev)
clk_prepare_enable(bgp->fclock);
ti_bandgap_power(bgp, true);
+ bgp->is_suspended = false;
return ti_bandgap_restore_ctxt(bgp);
}
diff --git a/drivers/thermal/ti-soc-thermal/ti-bandgap.h b/drivers/thermal/ti-soc-thermal/ti-bandgap.h
index fce4657e9486..ed0ea4b17b25 100644
--- a/drivers/thermal/ti-soc-thermal/ti-bandgap.h
+++ b/drivers/thermal/ti-soc-thermal/ti-bandgap.h
@@ -12,6 +12,10 @@
#include <linux/spinlock.h>
#include <linux/types.h>
#include <linux/err.h>
+#include <linux/cpu_pm.h>
+#include <linux/device.h>
+#include <linux/pm_runtime.h>
+#include <linux/pm.h>
struct gpio_desc;
@@ -203,6 +207,8 @@ struct ti_bandgap {
int irq;
struct gpio_desc *tshut_gpiod;
u32 clk_rate;
+ struct notifier_block nb;
+ unsigned int is_suspended:1;
};
/**
diff --git a/drivers/tty/serial/sb1250-duart.c b/drivers/tty/serial/sb1250-duart.c
index bd5e7e9938ce..22c7bc90b104 100644
--- a/drivers/tty/serial/sb1250-duart.c
+++ b/drivers/tty/serial/sb1250-duart.c
@@ -35,7 +35,6 @@
#include <linux/refcount.h>
#include <asm/io.h>
-#include <asm/war.h>
#include <asm/sibyte/sb1250.h>
#include <asm/sibyte/sb1250_uart.h>
@@ -157,7 +156,7 @@ static unsigned char read_sbdchn(struct sbd_port *sport, int reg)
unsigned char retval;
retval = __read_sbdchn(sport, reg);
- if (SIBYTE_1956_WAR)
+ if (IS_ENABLED(CONFIG_SB1_PASS_2_WORKAROUNDS))
__war_sbd1956(sport);
return retval;
}
@@ -167,7 +166,7 @@ static unsigned char read_sbdshr(struct sbd_port *sport, int reg)
unsigned char retval;
retval = __read_sbdshr(sport, reg);
- if (SIBYTE_1956_WAR)
+ if (IS_ENABLED(CONFIG_SB1_PASS_2_WORKAROUNDS))
__war_sbd1956(sport);
return retval;
}
@@ -175,14 +174,14 @@ static unsigned char read_sbdshr(struct sbd_port *sport, int reg)
static void write_sbdchn(struct sbd_port *sport, int reg, unsigned int value)
{
__write_sbdchn(sport, reg, value);
- if (SIBYTE_1956_WAR)
+ if (IS_ENABLED(CONFIG_SB1_PASS_2_WORKAROUNDS))
__war_sbd1956(sport);
}
static void write_sbdshr(struct sbd_port *sport, int reg, unsigned int value)
{
__write_sbdshr(sport, reg, value);
- if (SIBYTE_1956_WAR)
+ if (IS_ENABLED(CONFIG_SB1_PASS_2_WORKAROUNDS))
__war_sbd1956(sport);
}
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 1ab1f5cda4ac..b0f4b92a87ed 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -1480,31 +1480,29 @@ static int vfio_pci_zap_and_vma_lock(struct vfio_pci_device *vdev, bool try)
} else {
mmap_read_lock(mm);
}
- if (mmget_still_valid(mm)) {
- if (try) {
- if (!mutex_trylock(&vdev->vma_lock)) {
- mmap_read_unlock(mm);
- mmput(mm);
- return 0;
- }
- } else {
- mutex_lock(&vdev->vma_lock);
+ if (try) {
+ if (!mutex_trylock(&vdev->vma_lock)) {
+ mmap_read_unlock(mm);
+ mmput(mm);
+ return 0;
}
- list_for_each_entry_safe(mmap_vma, tmp,
- &vdev->vma_list, vma_next) {
- struct vm_area_struct *vma = mmap_vma->vma;
+ } else {
+ mutex_lock(&vdev->vma_lock);
+ }
+ list_for_each_entry_safe(mmap_vma, tmp,
+ &vdev->vma_list, vma_next) {
+ struct vm_area_struct *vma = mmap_vma->vma;
- if (vma->vm_mm != mm)
- continue;
+ if (vma->vm_mm != mm)
+ continue;
- list_del(&mmap_vma->vma_next);
- kfree(mmap_vma);
+ list_del(&mmap_vma->vma_next);
+ kfree(mmap_vma);
- zap_vma_ptes(vma, vma->vm_start,
- vma->vm_end - vma->vm_start);
- }
- mutex_unlock(&vdev->vma_lock);
+ zap_vma_ptes(vma, vma->vm_start,
+ vma->vm_end - vma->vm_start);
}
+ mutex_unlock(&vdev->vma_lock);
mmap_read_unlock(mm);
mmput(mm);
}
diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c
index 834b7c13ef3d..ba4de598f663 100644
--- a/drivers/virtio/virtio_mem.c
+++ b/drivers/virtio/virtio_mem.c
@@ -424,7 +424,8 @@ static int virtio_mem_mb_add(struct virtio_mem *vm, unsigned long mb_id)
dev_dbg(&vm->vdev->dev, "adding memory block: %lu\n", mb_id);
return add_memory_driver_managed(nid, addr, memory_block_size_bytes(),
- vm->resource_name);
+ vm->resource_name,
+ MEMHP_MERGE_RESOURCE);
}
/*
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 51427c752b37..b57b2067ecbf 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -331,7 +331,7 @@ static enum bp_state reserve_additional_memory(void)
mutex_unlock(&balloon_mutex);
/* add_memory_resource() requires the device_hotplug lock */
lock_device_hotplug();
- rc = add_memory_resource(nid, resource);
+ rc = add_memory_resource(nid, resource, MEMHP_MERGE_RESOURCE);
unlock_device_hotplug();
mutex_lock(&balloon_mutex);
diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c
index 64df919a2111..fe5ad0e89cd8 100644
--- a/drivers/xen/events/events_2l.c
+++ b/drivers/xen/events/events_2l.c
@@ -91,6 +91,8 @@ static void evtchn_2l_unmask(evtchn_port_t port)
BUG_ON(!irqs_disabled());
+ smp_wmb(); /* All writes before unmask must be visible. */
+
if (unlikely((cpu != cpu_from_evtchn(port))))
do_hypercall = 1;
else {
@@ -159,7 +161,7 @@ static inline xen_ulong_t active_evtchns(unsigned int cpu,
* a bitset of words which contain pending event bits. The second
* level is a bitset of pending events themselves.
*/
-static void evtchn_2l_handle_events(unsigned cpu)
+static void evtchn_2l_handle_events(unsigned cpu, struct evtchn_loop_ctrl *ctrl)
{
int irq;
xen_ulong_t pending_words;
@@ -240,10 +242,7 @@ static void evtchn_2l_handle_events(unsigned cpu)
/* Process port. */
port = (word_idx * BITS_PER_EVTCHN_WORD) + bit_idx;
- irq = get_evtchn_to_irq(port);
-
- if (irq != -1)
- generic_handle_irq(irq);
+ handle_irq_for_port(port, ctrl);
bit_idx = (bit_idx + 1) % BITS_PER_EVTCHN_WORD;
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 6f02c18fa65c..cc317739e786 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -33,6 +33,10 @@
#include <linux/slab.h>
#include <linux/irqnr.h>
#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/cpuhotplug.h>
+#include <linux/atomic.h>
+#include <linux/ktime.h>
#ifdef CONFIG_X86
#include <asm/desc.h>
@@ -63,6 +67,15 @@
#include "events_internal.h"
+#undef MODULE_PARAM_PREFIX
+#define MODULE_PARAM_PREFIX "xen."
+
+static uint __read_mostly event_loop_timeout = 2;
+module_param(event_loop_timeout, uint, 0644);
+
+static uint __read_mostly event_eoi_delay = 10;
+module_param(event_eoi_delay, uint, 0644);
+
const struct evtchn_ops *evtchn_ops;
/*
@@ -71,6 +84,24 @@ const struct evtchn_ops *evtchn_ops;
*/
static DEFINE_MUTEX(irq_mapping_update_lock);
+/*
+ * Lock protecting event handling loop against removing event channels.
+ * Adding of event channels is no issue as the associated IRQ becomes active
+ * only after everything is setup (before request_[threaded_]irq() the handler
+ * can't be entered for an event, as the event channel will be unmasked only
+ * then).
+ */
+static DEFINE_RWLOCK(evtchn_rwlock);
+
+/*
+ * Lock hierarchy:
+ *
+ * irq_mapping_update_lock
+ * evtchn_rwlock
+ * IRQ-desc lock
+ * percpu eoi_list_lock
+ */
+
static LIST_HEAD(xen_irq_list_head);
/* IRQ <-> VIRQ mapping. */
@@ -95,17 +126,20 @@ static bool (*pirq_needs_eoi)(unsigned irq);
static struct irq_info *legacy_info_ptrs[NR_IRQS_LEGACY];
static struct irq_chip xen_dynamic_chip;
+static struct irq_chip xen_lateeoi_chip;
static struct irq_chip xen_percpu_chip;
static struct irq_chip xen_pirq_chip;
static void enable_dynirq(struct irq_data *data);
static void disable_dynirq(struct irq_data *data);
+static DEFINE_PER_CPU(unsigned int, irq_epoch);
+
static void clear_evtchn_to_irq_row(unsigned row)
{
unsigned col;
for (col = 0; col < EVTCHN_PER_ROW; col++)
- evtchn_to_irq[row][col] = -1;
+ WRITE_ONCE(evtchn_to_irq[row][col], -1);
}
static void clear_evtchn_to_irq_all(void)
@@ -142,7 +176,7 @@ static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq)
clear_evtchn_to_irq_row(row);
}
- evtchn_to_irq[row][col] = irq;
+ WRITE_ONCE(evtchn_to_irq[row][col], irq);
return 0;
}
@@ -152,7 +186,7 @@ int get_evtchn_to_irq(evtchn_port_t evtchn)
return -1;
if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL)
return -1;
- return evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)];
+ return READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]);
}
/* Get info for IRQ */
@@ -261,10 +295,14 @@ static void xen_irq_info_cleanup(struct irq_info *info)
*/
evtchn_port_t evtchn_from_irq(unsigned irq)
{
- if (WARN(irq >= nr_irqs, "Invalid irq %d!\n", irq))
+ const struct irq_info *info = NULL;
+
+ if (likely(irq < nr_irqs))
+ info = info_for_irq(irq);
+ if (!info)
return 0;
- return info_for_irq(irq)->evtchn;
+ return info->evtchn;
}
unsigned int irq_from_evtchn(evtchn_port_t evtchn)
@@ -375,9 +413,157 @@ void notify_remote_via_irq(int irq)
}
EXPORT_SYMBOL_GPL(notify_remote_via_irq);
+struct lateeoi_work {
+ struct delayed_work delayed;
+ spinlock_t eoi_list_lock;
+ struct list_head eoi_list;
+};
+
+static DEFINE_PER_CPU(struct lateeoi_work, lateeoi);
+
+static void lateeoi_list_del(struct irq_info *info)
+{
+ struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
+ unsigned long flags;
+
+ spin_lock_irqsave(&eoi->eoi_list_lock, flags);
+ list_del_init(&info->eoi_list);
+ spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
+}
+
+static void lateeoi_list_add(struct irq_info *info)
+{
+ struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
+ struct irq_info *elem;
+ u64 now = get_jiffies_64();
+ unsigned long delay;
+ unsigned long flags;
+
+ if (now < info->eoi_time)
+ delay = info->eoi_time - now;
+ else
+ delay = 1;
+
+ spin_lock_irqsave(&eoi->eoi_list_lock, flags);
+
+ if (list_empty(&eoi->eoi_list)) {
+ list_add(&info->eoi_list, &eoi->eoi_list);
+ mod_delayed_work_on(info->eoi_cpu, system_wq,
+ &eoi->delayed, delay);
+ } else {
+ list_for_each_entry_reverse(elem, &eoi->eoi_list, eoi_list) {
+ if (elem->eoi_time <= info->eoi_time)
+ break;
+ }
+ list_add(&info->eoi_list, &elem->eoi_list);
+ }
+
+ spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
+}
+
+static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious)
+{
+ evtchn_port_t evtchn;
+ unsigned int cpu;
+ unsigned int delay = 0;
+
+ evtchn = info->evtchn;
+ if (!VALID_EVTCHN(evtchn) || !list_empty(&info->eoi_list))
+ return;
+
+ if (spurious) {
+ if ((1 << info->spurious_cnt) < (HZ << 2))
+ info->spurious_cnt++;
+ if (info->spurious_cnt > 1) {
+ delay = 1 << (info->spurious_cnt - 2);
+ if (delay > HZ)
+ delay = HZ;
+ if (!info->eoi_time)
+ info->eoi_cpu = smp_processor_id();
+ info->eoi_time = get_jiffies_64() + delay;
+ }
+ } else {
+ info->spurious_cnt = 0;
+ }
+
+ cpu = info->eoi_cpu;
+ if (info->eoi_time &&
+ (info->irq_epoch == per_cpu(irq_epoch, cpu) || delay)) {
+ lateeoi_list_add(info);
+ return;
+ }
+
+ info->eoi_time = 0;
+ unmask_evtchn(evtchn);
+}
+
+static void xen_irq_lateeoi_worker(struct work_struct *work)
+{
+ struct lateeoi_work *eoi;
+ struct irq_info *info;
+ u64 now = get_jiffies_64();
+ unsigned long flags;
+
+ eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed);
+
+ read_lock_irqsave(&evtchn_rwlock, flags);
+
+ while (true) {
+ spin_lock(&eoi->eoi_list_lock);
+
+ info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info,
+ eoi_list);
+
+ if (info == NULL || now < info->eoi_time) {
+ spin_unlock(&eoi->eoi_list_lock);
+ break;
+ }
+
+ list_del_init(&info->eoi_list);
+
+ spin_unlock(&eoi->eoi_list_lock);
+
+ info->eoi_time = 0;
+
+ xen_irq_lateeoi_locked(info, false);
+ }
+
+ if (info)
+ mod_delayed_work_on(info->eoi_cpu, system_wq,
+ &eoi->delayed, info->eoi_time - now);
+
+ read_unlock_irqrestore(&evtchn_rwlock, flags);
+}
+
+static void xen_cpu_init_eoi(unsigned int cpu)
+{
+ struct lateeoi_work *eoi = &per_cpu(lateeoi, cpu);
+
+ INIT_DELAYED_WORK(&eoi->delayed, xen_irq_lateeoi_worker);
+ spin_lock_init(&eoi->eoi_list_lock);
+ INIT_LIST_HEAD(&eoi->eoi_list);
+}
+
+void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags)
+{
+ struct irq_info *info;
+ unsigned long flags;
+
+ read_lock_irqsave(&evtchn_rwlock, flags);
+
+ info = info_for_irq(irq);
+
+ if (info)
+ xen_irq_lateeoi_locked(info, eoi_flags & XEN_EOI_FLAG_SPURIOUS);
+
+ read_unlock_irqrestore(&evtchn_rwlock, flags);
+}
+EXPORT_SYMBOL_GPL(xen_irq_lateeoi);
+
static void xen_irq_init(unsigned irq)
{
struct irq_info *info;
+
#ifdef CONFIG_SMP
/* By default all event channels notify CPU#0. */
cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(0));
@@ -392,6 +578,7 @@ static void xen_irq_init(unsigned irq)
set_info_for_irq(irq, info);
+ INIT_LIST_HEAD(&info->eoi_list);
list_add_tail(&info->list, &xen_irq_list_head);
}
@@ -440,16 +627,24 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi)
static void xen_free_irq(unsigned irq)
{
struct irq_info *info = info_for_irq(irq);
+ unsigned long flags;
if (WARN_ON(!info))
return;
+ write_lock_irqsave(&evtchn_rwlock, flags);
+
+ if (!list_empty(&info->eoi_list))
+ lateeoi_list_del(info);
+
list_del(&info->list);
set_info_for_irq(irq, NULL);
WARN_ON(info->refcnt > 0);
+ write_unlock_irqrestore(&evtchn_rwlock, flags);
+
kfree(info);
/* Legacy IRQ descriptors are managed by the arch. */
@@ -841,7 +1036,7 @@ int xen_pirq_from_irq(unsigned irq)
}
EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
-int bind_evtchn_to_irq(evtchn_port_t evtchn)
+static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip)
{
int irq;
int ret;
@@ -858,7 +1053,7 @@ int bind_evtchn_to_irq(evtchn_port_t evtchn)
if (irq < 0)
goto out;
- irq_set_chip_and_handler_name(irq, &xen_dynamic_chip,
+ irq_set_chip_and_handler_name(irq, chip,
handle_edge_irq, "event");
ret = xen_irq_info_evtchn_setup(irq, evtchn);
@@ -879,8 +1074,19 @@ out:
return irq;
}
+
+int bind_evtchn_to_irq(evtchn_port_t evtchn)
+{
+ return bind_evtchn_to_irq_chip(evtchn, &xen_dynamic_chip);
+}
EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
+int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn)
+{
+ return bind_evtchn_to_irq_chip(evtchn, &xen_lateeoi_chip);
+}
+EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi);
+
static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
{
struct evtchn_bind_ipi bind_ipi;
@@ -922,8 +1128,9 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
return irq;
}
-int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
- evtchn_port_t remote_port)
+static int bind_interdomain_evtchn_to_irq_chip(unsigned int remote_domain,
+ evtchn_port_t remote_port,
+ struct irq_chip *chip)
{
struct evtchn_bind_interdomain bind_interdomain;
int err;
@@ -934,10 +1141,26 @@ int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
&bind_interdomain);
- return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
+ return err ? : bind_evtchn_to_irq_chip(bind_interdomain.local_port,
+ chip);
+}
+
+int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
+ evtchn_port_t remote_port)
+{
+ return bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port,
+ &xen_dynamic_chip);
}
EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq);
+int bind_interdomain_evtchn_to_irq_lateeoi(unsigned int remote_domain,
+ evtchn_port_t remote_port)
+{
+ return bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port,
+ &xen_lateeoi_chip);
+}
+EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi);
+
static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn)
{
struct evtchn_status status;
@@ -1034,14 +1257,15 @@ static void unbind_from_irq(unsigned int irq)
mutex_unlock(&irq_mapping_update_lock);
}
-int bind_evtchn_to_irqhandler(evtchn_port_t evtchn,
- irq_handler_t handler,
- unsigned long irqflags,
- const char *devname, void *dev_id)
+static int bind_evtchn_to_irqhandler_chip(evtchn_port_t evtchn,
+ irq_handler_t handler,
+ unsigned long irqflags,
+ const char *devname, void *dev_id,
+ struct irq_chip *chip)
{
int irq, retval;
- irq = bind_evtchn_to_irq(evtchn);
+ irq = bind_evtchn_to_irq_chip(evtchn, chip);
if (irq < 0)
return irq;
retval = request_irq(irq, handler, irqflags, devname, dev_id);
@@ -1052,18 +1276,38 @@ int bind_evtchn_to_irqhandler(evtchn_port_t evtchn,
return irq;
}
+
+int bind_evtchn_to_irqhandler(evtchn_port_t evtchn,
+ irq_handler_t handler,
+ unsigned long irqflags,
+ const char *devname, void *dev_id)
+{
+ return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags,
+ devname, dev_id,
+ &xen_dynamic_chip);
+}
EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
-int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
- evtchn_port_t remote_port,
- irq_handler_t handler,
- unsigned long irqflags,
- const char *devname,
- void *dev_id)
+int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn,
+ irq_handler_t handler,
+ unsigned long irqflags,
+ const char *devname, void *dev_id)
+{
+ return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags,
+ devname, dev_id,
+ &xen_lateeoi_chip);
+}
+EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler_lateeoi);
+
+static int bind_interdomain_evtchn_to_irqhandler_chip(
+ unsigned int remote_domain, evtchn_port_t remote_port,
+ irq_handler_t handler, unsigned long irqflags,
+ const char *devname, void *dev_id, struct irq_chip *chip)
{
int irq, retval;
- irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port);
+ irq = bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port,
+ chip);
if (irq < 0)
return irq;
@@ -1075,8 +1319,33 @@ int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
return irq;
}
+
+int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
+ evtchn_port_t remote_port,
+ irq_handler_t handler,
+ unsigned long irqflags,
+ const char *devname,
+ void *dev_id)
+{
+ return bind_interdomain_evtchn_to_irqhandler_chip(remote_domain,
+ remote_port, handler, irqflags, devname,
+ dev_id, &xen_dynamic_chip);
+}
EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler);
+int bind_interdomain_evtchn_to_irqhandler_lateeoi(unsigned int remote_domain,
+ evtchn_port_t remote_port,
+ irq_handler_t handler,
+ unsigned long irqflags,
+ const char *devname,
+ void *dev_id)
+{
+ return bind_interdomain_evtchn_to_irqhandler_chip(remote_domain,
+ remote_port, handler, irqflags, devname,
+ dev_id, &xen_lateeoi_chip);
+}
+EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler_lateeoi);
+
int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
irq_handler_t handler,
unsigned long irqflags, const char *devname, void *dev_id)
@@ -1189,7 +1458,7 @@ int evtchn_get(evtchn_port_t evtchn)
goto done;
err = -EINVAL;
- if (info->refcnt <= 0)
+ if (info->refcnt <= 0 || info->refcnt == SHRT_MAX)
goto done;
info->refcnt++;
@@ -1228,21 +1497,81 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
notify_remote_via_irq(irq);
}
+struct evtchn_loop_ctrl {
+ ktime_t timeout;
+ unsigned count;
+ bool defer_eoi;
+};
+
+void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
+{
+ int irq;
+ struct irq_info *info;
+
+ irq = get_evtchn_to_irq(port);
+ if (irq == -1)
+ return;
+
+ /*
+ * Check for timeout every 256 events.
+ * We are setting the timeout value only after the first 256
+ * events in order to not hurt the common case of few loop
+ * iterations. The 256 is basically an arbitrary value.
+ *
+ * In case we are hitting the timeout we need to defer all further
+ * EOIs in order to ensure to leave the event handling loop rather
+ * sooner than later.
+ */
+ if (!ctrl->defer_eoi && !(++ctrl->count & 0xff)) {
+ ktime_t kt = ktime_get();
+
+ if (!ctrl->timeout) {
+ kt = ktime_add_ms(kt,
+ jiffies_to_msecs(event_loop_timeout));
+ ctrl->timeout = kt;
+ } else if (kt > ctrl->timeout) {
+ ctrl->defer_eoi = true;
+ }
+ }
+
+ info = info_for_irq(irq);
+
+ if (ctrl->defer_eoi) {
+ info->eoi_cpu = smp_processor_id();
+ info->irq_epoch = __this_cpu_read(irq_epoch);
+ info->eoi_time = get_jiffies_64() + event_eoi_delay;
+ }
+
+ generic_handle_irq(irq);
+}
+
static void __xen_evtchn_do_upcall(void)
{
struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
int cpu = smp_processor_id();
+ struct evtchn_loop_ctrl ctrl = { 0 };
+
+ read_lock(&evtchn_rwlock);
do {
vcpu_info->evtchn_upcall_pending = 0;
- xen_evtchn_handle_events(cpu);
+ xen_evtchn_handle_events(cpu, &ctrl);
BUG_ON(!irqs_disabled());
virt_rmb(); /* Hypervisor can set upcall pending. */
} while (vcpu_info->evtchn_upcall_pending);
+
+ read_unlock(&evtchn_rwlock);
+
+ /*
+ * Increment irq_epoch only now to defer EOIs only for
+ * xen_irq_lateeoi() invocations occurring from inside the loop
+ * above.
+ */
+ __this_cpu_inc(irq_epoch);
}
void xen_evtchn_do_upcall(struct pt_regs *regs)
@@ -1606,6 +1935,21 @@ static struct irq_chip xen_dynamic_chip __read_mostly = {
.irq_retrigger = retrigger_dynirq,
};
+static struct irq_chip xen_lateeoi_chip __read_mostly = {
+ /* The chip name needs to contain "xen-dyn" for irqbalance to work. */
+ .name = "xen-dyn-lateeoi",
+
+ .irq_disable = disable_dynirq,
+ .irq_mask = disable_dynirq,
+ .irq_unmask = enable_dynirq,
+
+ .irq_ack = mask_ack_dynirq,
+ .irq_mask_ack = mask_ack_dynirq,
+
+ .irq_set_affinity = set_affinity_irq,
+ .irq_retrigger = retrigger_dynirq,
+};
+
static struct irq_chip xen_pirq_chip __read_mostly = {
.name = "xen-pirq",
@@ -1676,12 +2020,31 @@ void xen_setup_callback_vector(void) {}
static inline void xen_alloc_callback_vector(void) {}
#endif
-#undef MODULE_PARAM_PREFIX
-#define MODULE_PARAM_PREFIX "xen."
-
static bool fifo_events = true;
module_param(fifo_events, bool, 0);
+static int xen_evtchn_cpu_prepare(unsigned int cpu)
+{
+ int ret = 0;
+
+ xen_cpu_init_eoi(cpu);
+
+ if (evtchn_ops->percpu_init)
+ ret = evtchn_ops->percpu_init(cpu);
+
+ return ret;
+}
+
+static int xen_evtchn_cpu_dead(unsigned int cpu)
+{
+ int ret = 0;
+
+ if (evtchn_ops->percpu_deinit)
+ ret = evtchn_ops->percpu_deinit(cpu);
+
+ return ret;
+}
+
void __init xen_init_IRQ(void)
{
int ret = -EINVAL;
@@ -1692,6 +2055,12 @@ void __init xen_init_IRQ(void)
if (ret < 0)
xen_evtchn_2l_init();
+ xen_cpu_init_eoi(smp_processor_id());
+
+ cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE,
+ "xen/evtchn:prepare",
+ xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead);
+
evtchn_to_irq = kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()),
sizeof(*evtchn_to_irq), GFP_KERNEL);
BUG_ON(!evtchn_to_irq);
diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c
index c60ee0450173..6085a808da95 100644
--- a/drivers/xen/events/events_fifo.c
+++ b/drivers/xen/events/events_fifo.c
@@ -227,19 +227,25 @@ static bool evtchn_fifo_is_masked(evtchn_port_t port)
return sync_test_bit(EVTCHN_FIFO_BIT(MASKED, word), BM(word));
}
/*
- * Clear MASKED, spinning if BUSY is set.
+ * Clear MASKED if not PENDING, spinning if BUSY is set.
+ * Return true if mask was cleared.
*/
-static void clear_masked(volatile event_word_t *word)
+static bool clear_masked_cond(volatile event_word_t *word)
{
event_word_t new, old, w;
w = *word;
do {
+ if (w & (1 << EVTCHN_FIFO_PENDING))
+ return false;
+
old = w & ~(1 << EVTCHN_FIFO_BUSY);
new = old & ~(1 << EVTCHN_FIFO_MASKED);
w = sync_cmpxchg(word, old, new);
} while (w != old);
+
+ return true;
}
static void evtchn_fifo_unmask(evtchn_port_t port)
@@ -248,8 +254,7 @@ static void evtchn_fifo_unmask(evtchn_port_t port)
BUG_ON(!irqs_disabled());
- clear_masked(word);
- if (evtchn_fifo_is_pending(port)) {
+ if (!clear_masked_cond(word)) {
struct evtchn_unmask unmask = { .port = port };
(void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
}
@@ -270,19 +275,9 @@ static uint32_t clear_linked(volatile event_word_t *word)
return w & EVTCHN_FIFO_LINK_MASK;
}
-static void handle_irq_for_port(evtchn_port_t port)
-{
- int irq;
-
- irq = get_evtchn_to_irq(port);
- if (irq != -1)
- generic_handle_irq(irq);
-}
-
-static void consume_one_event(unsigned cpu,
+static void consume_one_event(unsigned cpu, struct evtchn_loop_ctrl *ctrl,
struct evtchn_fifo_control_block *control_block,
- unsigned priority, unsigned long *ready,
- bool drop)
+ unsigned priority, unsigned long *ready)
{
struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu);
uint32_t head;
@@ -315,16 +310,17 @@ static void consume_one_event(unsigned cpu,
clear_bit(priority, ready);
if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port)) {
- if (unlikely(drop))
+ if (unlikely(!ctrl))
pr_warn("Dropping pending event for port %u\n", port);
else
- handle_irq_for_port(port);
+ handle_irq_for_port(port, ctrl);
}
q->head[priority] = head;
}
-static void __evtchn_fifo_handle_events(unsigned cpu, bool drop)
+static void __evtchn_fifo_handle_events(unsigned cpu,
+ struct evtchn_loop_ctrl *ctrl)
{
struct evtchn_fifo_control_block *control_block;
unsigned long ready;
@@ -336,14 +332,15 @@ static void __evtchn_fifo_handle_events(unsigned cpu, bool drop)
while (ready) {
q = find_first_bit(&ready, EVTCHN_FIFO_MAX_QUEUES);
- consume_one_event(cpu, control_block, q, &ready, drop);
+ consume_one_event(cpu, ctrl, control_block, q, &ready);
ready |= xchg(&control_block->ready, 0);
}
}
-static void evtchn_fifo_handle_events(unsigned cpu)
+static void evtchn_fifo_handle_events(unsigned cpu,
+ struct evtchn_loop_ctrl *ctrl)
{
- __evtchn_fifo_handle_events(cpu, false);
+ __evtchn_fifo_handle_events(cpu, ctrl);
}
static void evtchn_fifo_resume(void)
@@ -380,21 +377,6 @@ static void evtchn_fifo_resume(void)
event_array_pages = 0;
}
-static const struct evtchn_ops evtchn_ops_fifo = {
- .max_channels = evtchn_fifo_max_channels,
- .nr_channels = evtchn_fifo_nr_channels,
- .setup = evtchn_fifo_setup,
- .bind_to_cpu = evtchn_fifo_bind_to_cpu,
- .clear_pending = evtchn_fifo_clear_pending,
- .set_pending = evtchn_fifo_set_pending,
- .is_pending = evtchn_fifo_is_pending,
- .test_and_set_mask = evtchn_fifo_test_and_set_mask,
- .mask = evtchn_fifo_mask,
- .unmask = evtchn_fifo_unmask,
- .handle_events = evtchn_fifo_handle_events,
- .resume = evtchn_fifo_resume,
-};
-
static int evtchn_fifo_alloc_control_block(unsigned cpu)
{
void *control_block = NULL;
@@ -417,19 +399,36 @@ static int evtchn_fifo_alloc_control_block(unsigned cpu)
return ret;
}
-static int xen_evtchn_cpu_prepare(unsigned int cpu)
+static int evtchn_fifo_percpu_init(unsigned int cpu)
{
if (!per_cpu(cpu_control_block, cpu))
return evtchn_fifo_alloc_control_block(cpu);
return 0;
}
-static int xen_evtchn_cpu_dead(unsigned int cpu)
+static int evtchn_fifo_percpu_deinit(unsigned int cpu)
{
- __evtchn_fifo_handle_events(cpu, true);
+ __evtchn_fifo_handle_events(cpu, NULL);
return 0;
}
+static const struct evtchn_ops evtchn_ops_fifo = {
+ .max_channels = evtchn_fifo_max_channels,
+ .nr_channels = evtchn_fifo_nr_channels,
+ .setup = evtchn_fifo_setup,
+ .bind_to_cpu = evtchn_fifo_bind_to_cpu,
+ .clear_pending = evtchn_fifo_clear_pending,
+ .set_pending = evtchn_fifo_set_pending,
+ .is_pending = evtchn_fifo_is_pending,
+ .test_and_set_mask = evtchn_fifo_test_and_set_mask,
+ .mask = evtchn_fifo_mask,
+ .unmask = evtchn_fifo_unmask,
+ .handle_events = evtchn_fifo_handle_events,
+ .resume = evtchn_fifo_resume,
+ .percpu_init = evtchn_fifo_percpu_init,
+ .percpu_deinit = evtchn_fifo_percpu_deinit,
+};
+
int __init xen_evtchn_fifo_init(void)
{
int cpu = smp_processor_id();
@@ -443,9 +442,5 @@ int __init xen_evtchn_fifo_init(void)
evtchn_ops = &evtchn_ops_fifo;
- cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE,
- "xen/evtchn:prepare",
- xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead);
-
return ret;
}
diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h
index 10684feb094e..82937d90d7d7 100644
--- a/drivers/xen/events/events_internal.h
+++ b/drivers/xen/events/events_internal.h
@@ -30,11 +30,16 @@ enum xen_irq_type {
*/
struct irq_info {
struct list_head list;
- int refcnt;
+ struct list_head eoi_list;
+ short refcnt;
+ short spurious_cnt;
enum xen_irq_type type; /* type */
unsigned irq;
evtchn_port_t evtchn; /* event channel */
unsigned short cpu; /* cpu bound */
+ unsigned short eoi_cpu; /* EOI must happen on this cpu */
+ unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */
+ u64 eoi_time; /* Time in jiffies when to EOI. */
union {
unsigned short virq;
@@ -53,6 +58,8 @@ struct irq_info {
#define PIRQ_SHAREABLE (1 << 1)
#define PIRQ_MSI_GROUP (1 << 2)
+struct evtchn_loop_ctrl;
+
struct evtchn_ops {
unsigned (*max_channels)(void);
unsigned (*nr_channels)(void);
@@ -67,14 +74,18 @@ struct evtchn_ops {
void (*mask)(evtchn_port_t port);
void (*unmask)(evtchn_port_t port);
- void (*handle_events)(unsigned cpu);
+ void (*handle_events)(unsigned cpu, struct evtchn_loop_ctrl *ctrl);
void (*resume)(void);
+
+ int (*percpu_init)(unsigned int cpu);
+ int (*percpu_deinit)(unsigned int cpu);
};
extern const struct evtchn_ops *evtchn_ops;
extern int **evtchn_to_irq;
int get_evtchn_to_irq(evtchn_port_t evtchn);
+void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl);
struct irq_info *info_for_irq(unsigned irq);
unsigned cpu_from_irq(unsigned irq);
@@ -132,9 +143,10 @@ static inline void unmask_evtchn(evtchn_port_t port)
return evtchn_ops->unmask(port);
}
-static inline void xen_evtchn_handle_events(unsigned cpu)
+static inline void xen_evtchn_handle_events(unsigned cpu,
+ struct evtchn_loop_ctrl *ctrl)
{
- return evtchn_ops->handle_events(cpu);
+ return evtchn_ops->handle_events(cpu, ctrl);
}
static inline void xen_evtchn_resume(void)
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index 6e0b1dd5573c..5dc016d68f83 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -167,7 +167,6 @@ static irqreturn_t evtchn_interrupt(int irq, void *data)
"Interrupt for port %u, but apparently not enabled; per-user %p\n",
evtchn->port, u);
- disable_irq_nosync(irq);
evtchn->enabled = false;
spin_lock(&u->ring_prod_lock);
@@ -293,7 +292,7 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf,
evtchn = find_evtchn(u, port);
if (evtchn && !evtchn->enabled) {
evtchn->enabled = true;
- enable_irq(irq_from_evtchn(port));
+ xen_irq_lateeoi(irq_from_evtchn(port), 0);
}
}
@@ -393,8 +392,8 @@ static int evtchn_bind_to_user(struct per_user_data *u, evtchn_port_t port)
if (rc < 0)
goto err;
- rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, 0,
- u->name, evtchn);
+ rc = bind_evtchn_to_irqhandler_lateeoi(port, evtchn_interrupt, 0,
+ u->name, evtchn);
if (rc < 0)
goto err;
diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c
index 9eae1fceec1e..a7d293fa8d14 100644
--- a/drivers/xen/pvcalls-back.c
+++ b/drivers/xen/pvcalls-back.c
@@ -66,6 +66,7 @@ struct sock_mapping {
atomic_t write;
atomic_t io;
atomic_t release;
+ atomic_t eoi;
void (*saved_data_ready)(struct sock *sk);
struct pvcalls_ioworker ioworker;
};
@@ -87,7 +88,7 @@ static int pvcalls_back_release_active(struct xenbus_device *dev,
struct pvcalls_fedata *fedata,
struct sock_mapping *map);
-static void pvcalls_conn_back_read(void *opaque)
+static bool pvcalls_conn_back_read(void *opaque)
{
struct sock_mapping *map = (struct sock_mapping *)opaque;
struct msghdr msg;
@@ -107,17 +108,17 @@ static void pvcalls_conn_back_read(void *opaque)
virt_mb();
if (error)
- return;
+ return false;
size = pvcalls_queued(prod, cons, array_size);
if (size >= array_size)
- return;
+ return false;
spin_lock_irqsave(&map->sock->sk->sk_receive_queue.lock, flags);
if (skb_queue_empty(&map->sock->sk->sk_receive_queue)) {
atomic_set(&map->read, 0);
spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock,
flags);
- return;
+ return true;
}
spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock, flags);
wanted = array_size - size;
@@ -141,7 +142,7 @@ static void pvcalls_conn_back_read(void *opaque)
ret = inet_recvmsg(map->sock, &msg, wanted, MSG_DONTWAIT);
WARN_ON(ret > wanted);
if (ret == -EAGAIN) /* shouldn't happen */
- return;
+ return true;
if (!ret)
ret = -ENOTCONN;
spin_lock_irqsave(&map->sock->sk->sk_receive_queue.lock, flags);
@@ -160,10 +161,10 @@ static void pvcalls_conn_back_read(void *opaque)
virt_wmb();
notify_remote_via_irq(map->irq);
- return;
+ return true;
}
-static void pvcalls_conn_back_write(struct sock_mapping *map)
+static bool pvcalls_conn_back_write(struct sock_mapping *map)
{
struct pvcalls_data_intf *intf = map->ring;
struct pvcalls_data *data = &map->data;
@@ -180,7 +181,7 @@ static void pvcalls_conn_back_write(struct sock_mapping *map)
array_size = XEN_FLEX_RING_SIZE(map->ring_order);
size = pvcalls_queued(prod, cons, array_size);
if (size == 0)
- return;
+ return false;
memset(&msg, 0, sizeof(msg));
msg.msg_flags |= MSG_DONTWAIT;
@@ -198,12 +199,11 @@ static void pvcalls_conn_back_write(struct sock_mapping *map)
atomic_set(&map->write, 0);
ret = inet_sendmsg(map->sock, &msg, size);
- if (ret == -EAGAIN || (ret >= 0 && ret < size)) {
+ if (ret == -EAGAIN) {
atomic_inc(&map->write);
atomic_inc(&map->io);
+ return true;
}
- if (ret == -EAGAIN)
- return;
/* write the data, then update the indexes */
virt_wmb();
@@ -216,9 +216,13 @@ static void pvcalls_conn_back_write(struct sock_mapping *map)
}
/* update the indexes, then notify the other end */
virt_wmb();
- if (prod != cons + ret)
+ if (prod != cons + ret) {
atomic_inc(&map->write);
+ atomic_inc(&map->io);
+ }
notify_remote_via_irq(map->irq);
+
+ return true;
}
static void pvcalls_back_ioworker(struct work_struct *work)
@@ -227,6 +231,7 @@ static void pvcalls_back_ioworker(struct work_struct *work)
struct pvcalls_ioworker, register_work);
struct sock_mapping *map = container_of(ioworker, struct sock_mapping,
ioworker);
+ unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS;
while (atomic_read(&map->io) > 0) {
if (atomic_read(&map->release) > 0) {
@@ -234,10 +239,18 @@ static void pvcalls_back_ioworker(struct work_struct *work)
return;
}
- if (atomic_read(&map->read) > 0)
- pvcalls_conn_back_read(map);
- if (atomic_read(&map->write) > 0)
- pvcalls_conn_back_write(map);
+ if (atomic_read(&map->read) > 0 &&
+ pvcalls_conn_back_read(map))
+ eoi_flags = 0;
+ if (atomic_read(&map->write) > 0 &&
+ pvcalls_conn_back_write(map))
+ eoi_flags = 0;
+
+ if (atomic_read(&map->eoi) > 0 && !atomic_read(&map->write)) {
+ atomic_set(&map->eoi, 0);
+ xen_irq_lateeoi(map->irq, eoi_flags);
+ eoi_flags = XEN_EOI_FLAG_SPURIOUS;
+ }
atomic_dec(&map->io);
}
@@ -334,12 +347,9 @@ static struct sock_mapping *pvcalls_new_active_socket(
goto out;
map->bytes = page;
- ret = bind_interdomain_evtchn_to_irqhandler(fedata->dev->otherend_id,
- evtchn,
- pvcalls_back_conn_event,
- 0,
- "pvcalls-backend",
- map);
+ ret = bind_interdomain_evtchn_to_irqhandler_lateeoi(
+ fedata->dev->otherend_id, evtchn,
+ pvcalls_back_conn_event, 0, "pvcalls-backend", map);
if (ret < 0)
goto out;
map->irq = ret;
@@ -873,15 +883,18 @@ static irqreturn_t pvcalls_back_event(int irq, void *dev_id)
{
struct xenbus_device *dev = dev_id;
struct pvcalls_fedata *fedata = NULL;
+ unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS;
- if (dev == NULL)
- return IRQ_HANDLED;
+ if (dev) {
+ fedata = dev_get_drvdata(&dev->dev);
+ if (fedata) {
+ pvcalls_back_work(fedata);
+ eoi_flags = 0;
+ }
+ }
- fedata = dev_get_drvdata(&dev->dev);
- if (fedata == NULL)
- return IRQ_HANDLED;
+ xen_irq_lateeoi(irq, eoi_flags);
- pvcalls_back_work(fedata);
return IRQ_HANDLED;
}
@@ -891,12 +904,15 @@ static irqreturn_t pvcalls_back_conn_event(int irq, void *sock_map)
struct pvcalls_ioworker *iow;
if (map == NULL || map->sock == NULL || map->sock->sk == NULL ||
- map->sock->sk->sk_user_data != map)
+ map->sock->sk->sk_user_data != map) {
+ xen_irq_lateeoi(irq, 0);
return IRQ_HANDLED;
+ }
iow = &map->ioworker;
atomic_inc(&map->write);
+ atomic_inc(&map->eoi);
atomic_inc(&map->io);
queue_work(iow->wq, &iow->register_work);
@@ -932,7 +948,7 @@ static int backend_connect(struct xenbus_device *dev)
goto error;
}
- err = bind_interdomain_evtchn_to_irq(dev->otherend_id, evtchn);
+ err = bind_interdomain_evtchn_to_irq_lateeoi(dev->otherend_id, evtchn);
if (err < 0)
goto error;
fedata->irq = err;
diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c
index e876c3d6dad1..cb904ac83006 100644
--- a/drivers/xen/xen-pciback/pci_stub.c
+++ b/drivers/xen/xen-pciback/pci_stub.c
@@ -734,10 +734,17 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
wmb();
notify_remote_via_irq(pdev->evtchn_irq);
+ /* Enable IRQ to signal "request done". */
+ xen_pcibk_lateeoi(pdev, 0);
+
ret = wait_event_timeout(xen_pcibk_aer_wait_queue,
!(test_bit(_XEN_PCIB_active, (unsigned long *)
&sh_info->flags)), 300*HZ);
+ /* Enable IRQ for pcifront request if not already active. */
+ if (!test_bit(_PDEVF_op_active, &pdev->flags))
+ xen_pcibk_lateeoi(pdev, 0);
+
if (!ret) {
if (test_bit(_XEN_PCIB_active,
(unsigned long *)&sh_info->flags)) {
@@ -751,12 +758,6 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
}
clear_bit(_PCIB_op_pending, (unsigned long *)&pdev->flags);
- if (test_bit(_XEN_PCIF_active,
- (unsigned long *)&sh_info->flags)) {
- dev_dbg(&psdev->dev->dev, "schedule pci_conf service\n");
- xen_pcibk_test_and_schedule_op(psdev->pdev);
- }
-
res = (pci_ers_result_t)aer_op->err;
return res;
}
diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h
index f1ed2dbf685c..95e28ee48d52 100644
--- a/drivers/xen/xen-pciback/pciback.h
+++ b/drivers/xen/xen-pciback/pciback.h
@@ -14,6 +14,7 @@
#include <linux/spinlock.h>
#include <linux/workqueue.h>
#include <linux/atomic.h>
+#include <xen/events.h>
#include <xen/interface/io/pciif.h>
#define DRV_NAME "xen-pciback"
@@ -27,6 +28,8 @@ struct pci_dev_entry {
#define PDEVF_op_active (1<<(_PDEVF_op_active))
#define _PCIB_op_pending (1)
#define PCIB_op_pending (1<<(_PCIB_op_pending))
+#define _EOI_pending (2)
+#define EOI_pending (1<<(_EOI_pending))
struct xen_pcibk_device {
void *pci_dev_data;
@@ -183,10 +186,15 @@ static inline void xen_pcibk_release_devices(struct xen_pcibk_device *pdev)
irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id);
void xen_pcibk_do_op(struct work_struct *data);
+static inline void xen_pcibk_lateeoi(struct xen_pcibk_device *pdev,
+ unsigned int eoi_flag)
+{
+ if (test_and_clear_bit(_EOI_pending, &pdev->flags))
+ xen_irq_lateeoi(pdev->evtchn_irq, eoi_flag);
+}
+
int xen_pcibk_xenbus_register(void);
void xen_pcibk_xenbus_unregister(void);
-
-void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev);
#endif
/* Handles shared IRQs that can to device domain and control domain. */
diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c
index e11a7438e1a2..3fbc21466a93 100644
--- a/drivers/xen/xen-pciback/pciback_ops.c
+++ b/drivers/xen/xen-pciback/pciback_ops.c
@@ -276,26 +276,41 @@ int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev,
return 0;
}
#endif
+
+static inline bool xen_pcibk_test_op_pending(struct xen_pcibk_device *pdev)
+{
+ return test_bit(_XEN_PCIF_active,
+ (unsigned long *)&pdev->sh_info->flags) &&
+ !test_and_set_bit(_PDEVF_op_active, &pdev->flags);
+}
+
/*
* Now the same evtchn is used for both pcifront conf_read_write request
* as well as pcie aer front end ack. We use a new work_queue to schedule
* xen_pcibk conf_read_write service for avoiding confict with aer_core
* do_recovery job which also use the system default work_queue
*/
-void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev)
+static void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev)
{
+ bool eoi = true;
+
/* Check that frontend is requesting an operation and that we are not
* already processing a request */
- if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags)
- && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) {
+ if (xen_pcibk_test_op_pending(pdev)) {
schedule_work(&pdev->op_work);
+ eoi = false;
}
/*_XEN_PCIB_active should have been cleared by pcifront. And also make
sure xen_pcibk is waiting for ack by checking _PCIB_op_pending*/
if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags)
&& test_bit(_PCIB_op_pending, &pdev->flags)) {
wake_up(&xen_pcibk_aer_wait_queue);
+ eoi = false;
}
+
+ /* EOI if there was nothing to do. */
+ if (eoi)
+ xen_pcibk_lateeoi(pdev, XEN_EOI_FLAG_SPURIOUS);
}
/* Performing the configuration space reads/writes must not be done in atomic
@@ -303,10 +318,8 @@ void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev)
* use of semaphores). This function is intended to be called from a work
* queue in process context taking a struct xen_pcibk_device as a parameter */
-void xen_pcibk_do_op(struct work_struct *data)
+static void xen_pcibk_do_one_op(struct xen_pcibk_device *pdev)
{
- struct xen_pcibk_device *pdev =
- container_of(data, struct xen_pcibk_device, op_work);
struct pci_dev *dev;
struct xen_pcibk_dev_data *dev_data = NULL;
struct xen_pci_op *op = &pdev->op;
@@ -379,16 +392,31 @@ void xen_pcibk_do_op(struct work_struct *data)
smp_mb__before_atomic(); /* /after/ clearing PCIF_active */
clear_bit(_PDEVF_op_active, &pdev->flags);
smp_mb__after_atomic(); /* /before/ final check for work */
+}
- /* Check to see if the driver domain tried to start another request in
- * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active.
- */
- xen_pcibk_test_and_schedule_op(pdev);
+void xen_pcibk_do_op(struct work_struct *data)
+{
+ struct xen_pcibk_device *pdev =
+ container_of(data, struct xen_pcibk_device, op_work);
+
+ do {
+ xen_pcibk_do_one_op(pdev);
+ } while (xen_pcibk_test_op_pending(pdev));
+
+ xen_pcibk_lateeoi(pdev, 0);
}
irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id)
{
struct xen_pcibk_device *pdev = dev_id;
+ bool eoi;
+
+ /* IRQs might come in before pdev->evtchn_irq is written. */
+ if (unlikely(pdev->evtchn_irq != irq))
+ pdev->evtchn_irq = irq;
+
+ eoi = test_and_set_bit(_EOI_pending, &pdev->flags);
+ WARN(eoi, "IRQ while EOI pending\n");
xen_pcibk_test_and_schedule_op(pdev);
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index b500466a6c37..4b99ec3dec58 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -123,7 +123,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,
pdev->sh_info = vaddr;
- err = bind_interdomain_evtchn_to_irqhandler(
+ err = bind_interdomain_evtchn_to_irqhandler_lateeoi(
pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event,
0, DRV_NAME, pdev);
if (err < 0) {
diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c
index 1e8cfd80a4e6..4acc4e899600 100644
--- a/drivers/xen/xen-scsiback.c
+++ b/drivers/xen/xen-scsiback.c
@@ -91,7 +91,6 @@ struct vscsibk_info {
unsigned int irq;
struct vscsiif_back_ring ring;
- int ring_error;
spinlock_t ring_lock;
atomic_t nr_unreplied_reqs;
@@ -722,7 +721,8 @@ static struct vscsibk_pend *prepare_pending_reqs(struct vscsibk_info *info,
return pending_req;
}
-static int scsiback_do_cmd_fn(struct vscsibk_info *info)
+static int scsiback_do_cmd_fn(struct vscsibk_info *info,
+ unsigned int *eoi_flags)
{
struct vscsiif_back_ring *ring = &info->ring;
struct vscsiif_request ring_req;
@@ -739,11 +739,12 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info)
rc = ring->rsp_prod_pvt;
pr_warn("Dom%d provided bogus ring requests (%#x - %#x = %u). Halting ring processing\n",
info->domid, rp, rc, rp - rc);
- info->ring_error = 1;
- return 0;
+ return -EINVAL;
}
while ((rc != rp)) {
+ *eoi_flags &= ~XEN_EOI_FLAG_SPURIOUS;
+
if (RING_REQUEST_CONS_OVERFLOW(ring, rc))
break;
@@ -802,13 +803,16 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info)
static irqreturn_t scsiback_irq_fn(int irq, void *dev_id)
{
struct vscsibk_info *info = dev_id;
+ int rc;
+ unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS;
- if (info->ring_error)
- return IRQ_HANDLED;
-
- while (scsiback_do_cmd_fn(info))
+ while ((rc = scsiback_do_cmd_fn(info, &eoi_flags)) > 0)
cond_resched();
+ /* In case of a ring error we keep the event channel masked. */
+ if (!rc)
+ xen_irq_lateeoi(irq, eoi_flags);
+
return IRQ_HANDLED;
}
@@ -829,7 +833,7 @@ static int scsiback_init_sring(struct vscsibk_info *info, grant_ref_t ring_ref,
sring = (struct vscsiif_sring *)area;
BACK_RING_INIT(&info->ring, sring, PAGE_SIZE);
- err = bind_interdomain_evtchn_to_irq(info->domid, evtchn);
+ err = bind_interdomain_evtchn_to_irq_lateeoi(info->domid, evtchn);
if (err < 0)
goto unmap_page;
@@ -1253,7 +1257,6 @@ static int scsiback_probe(struct xenbus_device *dev,
info->domid = dev->otherend_id;
spin_lock_init(&info->ring_lock);
- info->ring_error = 0;
atomic_set(&info->nr_unreplied_reqs, 0);
init_waitqueue_head(&info->waiting_to_free);
info->dev = dev;
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index 2690318ad50f..fd80e318b99c 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -73,16 +73,13 @@ struct map_ring_valloc {
struct xenbus_map_node *node;
/* Why do we need two arrays? See comment of __xenbus_map_ring */
- union {
- unsigned long addrs[XENBUS_MAX_RING_GRANTS];
- pte_t *ptes[XENBUS_MAX_RING_GRANTS];
- };
+ unsigned long addrs[XENBUS_MAX_RING_GRANTS];
phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS];
struct gnttab_map_grant_ref map[XENBUS_MAX_RING_GRANTS];
struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS];
- unsigned int idx; /* HVM only. */
+ unsigned int idx;
};
static DEFINE_SPINLOCK(xenbus_valloc_lock);
@@ -686,6 +683,14 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
#ifdef CONFIG_XEN_PV
+static int map_ring_apply(pte_t *pte, unsigned long addr, void *data)
+{
+ struct map_ring_valloc *info = data;
+
+ info->phys_addrs[info->idx++] = arbitrary_virt_to_machine(pte).maddr;
+ return 0;
+}
+
static int xenbus_map_ring_pv(struct xenbus_device *dev,
struct map_ring_valloc *info,
grant_ref_t *gnt_refs,
@@ -694,18 +699,15 @@ static int xenbus_map_ring_pv(struct xenbus_device *dev,
{
struct xenbus_map_node *node = info->node;
struct vm_struct *area;
- int err = GNTST_okay;
- int i;
- bool leaked;
+ bool leaked = false;
+ int err = -ENOMEM;
- area = alloc_vm_area(XEN_PAGE_SIZE * nr_grefs, info->ptes);
+ area = get_vm_area(XEN_PAGE_SIZE * nr_grefs, VM_IOREMAP);
if (!area)
return -ENOMEM;
-
- for (i = 0; i < nr_grefs; i++)
- info->phys_addrs[i] =
- arbitrary_virt_to_machine(info->ptes[i]).maddr;
-
+ if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
+ XEN_PAGE_SIZE * nr_grefs, map_ring_apply, info))
+ goto failed;
err = __xenbus_map_ring(dev, gnt_refs, nr_grefs, node->handles,
info, GNTMAP_host_map | GNTMAP_contains_pte,
&leaked);
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 5b79cdceefa0..52233fa6195f 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -18,8 +18,10 @@
static unsigned __read_mostly afs_cell_gc_delay = 10;
static unsigned __read_mostly afs_cell_min_ttl = 10 * 60;
static unsigned __read_mostly afs_cell_max_ttl = 24 * 60 * 60;
+static atomic_t cell_debug_id;
-static void afs_manage_cell(struct work_struct *);
+static void afs_queue_cell_manager(struct afs_net *);
+static void afs_manage_cell_work(struct work_struct *);
static void afs_dec_cells_outstanding(struct afs_net *net)
{
@@ -37,19 +39,22 @@ static void afs_set_cell_timer(struct afs_net *net, time64_t delay)
atomic_inc(&net->cells_outstanding);
if (timer_reduce(&net->cells_timer, jiffies + delay * HZ))
afs_dec_cells_outstanding(net);
+ } else {
+ afs_queue_cell_manager(net);
}
}
/*
- * Look up and get an activation reference on a cell record under RCU
- * conditions. The caller must hold the RCU read lock.
+ * Look up and get an activation reference on a cell record. The caller must
+ * hold net->cells_lock at least read-locked.
*/
-struct afs_cell *afs_lookup_cell_rcu(struct afs_net *net,
- const char *name, unsigned int namesz)
+static struct afs_cell *afs_find_cell_locked(struct afs_net *net,
+ const char *name, unsigned int namesz,
+ enum afs_cell_trace reason)
{
struct afs_cell *cell = NULL;
struct rb_node *p;
- int n, seq = 0, ret = 0;
+ int n;
_enter("%*.*s", namesz, namesz, name);
@@ -58,61 +63,48 @@ struct afs_cell *afs_lookup_cell_rcu(struct afs_net *net,
if (namesz > AFS_MAXCELLNAME)
return ERR_PTR(-ENAMETOOLONG);
- do {
- /* Unfortunately, rbtree walking doesn't give reliable results
- * under just the RCU read lock, so we have to check for
- * changes.
- */
- if (cell)
- afs_put_cell(net, cell);
- cell = NULL;
- ret = -ENOENT;
-
- read_seqbegin_or_lock(&net->cells_lock, &seq);
-
- if (!name) {
- cell = rcu_dereference_raw(net->ws_cell);
- if (cell) {
- afs_get_cell(cell);
- ret = 0;
- break;
- }
- ret = -EDESTADDRREQ;
- continue;
- }
+ if (!name) {
+ cell = net->ws_cell;
+ if (!cell)
+ return ERR_PTR(-EDESTADDRREQ);
+ goto found;
+ }
- p = rcu_dereference_raw(net->cells.rb_node);
- while (p) {
- cell = rb_entry(p, struct afs_cell, net_node);
-
- n = strncasecmp(cell->name, name,
- min_t(size_t, cell->name_len, namesz));
- if (n == 0)
- n = cell->name_len - namesz;
- if (n < 0) {
- p = rcu_dereference_raw(p->rb_left);
- } else if (n > 0) {
- p = rcu_dereference_raw(p->rb_right);
- } else {
- if (atomic_inc_not_zero(&cell->usage)) {
- ret = 0;
- break;
- }
- /* We want to repeat the search, this time with
- * the lock properly locked.
- */
- }
- cell = NULL;
- }
+ p = net->cells.rb_node;
+ while (p) {
+ cell = rb_entry(p, struct afs_cell, net_node);
- } while (need_seqretry(&net->cells_lock, seq));
+ n = strncasecmp(cell->name, name,
+ min_t(size_t, cell->name_len, namesz));
+ if (n == 0)
+ n = cell->name_len - namesz;
+ if (n < 0)
+ p = p->rb_left;
+ else if (n > 0)
+ p = p->rb_right;
+ else
+ goto found;
+ }
- done_seqretry(&net->cells_lock, seq);
+ return ERR_PTR(-ENOENT);
- if (ret != 0 && cell)
- afs_put_cell(net, cell);
+found:
+ return afs_use_cell(cell, reason);
+}
- return ret == 0 ? cell : ERR_PTR(ret);
+/*
+ * Look up and get an activation reference on a cell record.
+ */
+struct afs_cell *afs_find_cell(struct afs_net *net,
+ const char *name, unsigned int namesz,
+ enum afs_cell_trace reason)
+{
+ struct afs_cell *cell;
+
+ down_read(&net->cells_lock);
+ cell = afs_find_cell_locked(net, name, namesz, reason);
+ up_read(&net->cells_lock);
+ return cell;
}
/*
@@ -166,8 +158,9 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
cell->name[i] = tolower(name[i]);
cell->name[i] = 0;
- atomic_set(&cell->usage, 2);
- INIT_WORK(&cell->manager, afs_manage_cell);
+ atomic_set(&cell->ref, 1);
+ atomic_set(&cell->active, 0);
+ INIT_WORK(&cell->manager, afs_manage_cell_work);
cell->volumes = RB_ROOT;
INIT_HLIST_HEAD(&cell->proc_volumes);
seqlock_init(&cell->volume_lock);
@@ -206,6 +199,9 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
cell->dns_source = vllist->source;
cell->dns_status = vllist->status;
smp_store_release(&cell->dns_lookup_count, 1); /* vs source/status */
+ atomic_inc(&net->cells_outstanding);
+ cell->debug_id = atomic_inc_return(&cell_debug_id);
+ trace_afs_cell(cell->debug_id, 1, 0, afs_cell_trace_alloc);
_leave(" = %p", cell);
return cell;
@@ -245,9 +241,7 @@ struct afs_cell *afs_lookup_cell(struct afs_net *net,
_enter("%s,%s", name, vllist);
if (!excl) {
- rcu_read_lock();
- cell = afs_lookup_cell_rcu(net, name, namesz);
- rcu_read_unlock();
+ cell = afs_find_cell(net, name, namesz, afs_cell_trace_use_lookup);
if (!IS_ERR(cell))
goto wait_for_cell;
}
@@ -268,7 +262,7 @@ struct afs_cell *afs_lookup_cell(struct afs_net *net,
/* Find the insertion point and check to see if someone else added a
* cell whilst we were allocating.
*/
- write_seqlock(&net->cells_lock);
+ down_write(&net->cells_lock);
pp = &net->cells.rb_node;
parent = NULL;
@@ -290,23 +284,26 @@ struct afs_cell *afs_lookup_cell(struct afs_net *net,
cell = candidate;
candidate = NULL;
+ atomic_set(&cell->active, 2);
+ trace_afs_cell(cell->debug_id, atomic_read(&cell->ref), 2, afs_cell_trace_insert);
rb_link_node_rcu(&cell->net_node, parent, pp);
rb_insert_color(&cell->net_node, &net->cells);
- atomic_inc(&net->cells_outstanding);
- write_sequnlock(&net->cells_lock);
+ up_write(&net->cells_lock);
- queue_work(afs_wq, &cell->manager);
+ afs_queue_cell(cell, afs_cell_trace_get_queue_new);
wait_for_cell:
+ trace_afs_cell(cell->debug_id, atomic_read(&cell->ref), atomic_read(&cell->active),
+ afs_cell_trace_wait);
_debug("wait_for_cell");
wait_var_event(&cell->state,
({
state = smp_load_acquire(&cell->state); /* vs error */
- state == AFS_CELL_ACTIVE || state == AFS_CELL_FAILED;
+ state == AFS_CELL_ACTIVE || state == AFS_CELL_REMOVED;
}));
/* Check the state obtained from the wait check. */
- if (state == AFS_CELL_FAILED) {
+ if (state == AFS_CELL_REMOVED) {
ret = cell->error;
goto error;
}
@@ -320,16 +317,17 @@ cell_already_exists:
if (excl) {
ret = -EEXIST;
} else {
- afs_get_cell(cursor);
+ afs_use_cell(cursor, afs_cell_trace_use_lookup);
ret = 0;
}
- write_sequnlock(&net->cells_lock);
- kfree(candidate);
+ up_write(&net->cells_lock);
+ if (candidate)
+ afs_put_cell(candidate, afs_cell_trace_put_candidate);
if (ret == 0)
goto wait_for_cell;
goto error_noput;
error:
- afs_put_cell(net, cell);
+ afs_unuse_cell(net, cell, afs_cell_trace_unuse_lookup);
error_noput:
_leave(" = %d [error]", ret);
return ERR_PTR(ret);
@@ -374,15 +372,16 @@ int afs_cell_init(struct afs_net *net, const char *rootcell)
}
if (!test_and_set_bit(AFS_CELL_FL_NO_GC, &new_root->flags))
- afs_get_cell(new_root);
+ afs_use_cell(new_root, afs_cell_trace_use_pin);
/* install the new cell */
- write_seqlock(&net->cells_lock);
- old_root = rcu_access_pointer(net->ws_cell);
- rcu_assign_pointer(net->ws_cell, new_root);
- write_sequnlock(&net->cells_lock);
+ down_write(&net->cells_lock);
+ afs_see_cell(new_root, afs_cell_trace_see_ws);
+ old_root = net->ws_cell;
+ net->ws_cell = new_root;
+ up_write(&net->cells_lock);
- afs_put_cell(net, old_root);
+ afs_unuse_cell(net, old_root, afs_cell_trace_unuse_ws);
_leave(" = 0");
return 0;
}
@@ -488,18 +487,22 @@ out_wake:
static void afs_cell_destroy(struct rcu_head *rcu)
{
struct afs_cell *cell = container_of(rcu, struct afs_cell, rcu);
+ struct afs_net *net = cell->net;
+ int u;
_enter("%p{%s}", cell, cell->name);
- ASSERTCMP(atomic_read(&cell->usage), ==, 0);
+ u = atomic_read(&cell->ref);
+ ASSERTCMP(u, ==, 0);
+ trace_afs_cell(cell->debug_id, u, atomic_read(&cell->active), afs_cell_trace_free);
- afs_put_volume(cell->net, cell->root_volume, afs_volume_trace_put_cell_root);
- afs_put_vlserverlist(cell->net, rcu_access_pointer(cell->vl_servers));
- afs_put_cell(cell->net, cell->alias_of);
+ afs_put_vlserverlist(net, rcu_access_pointer(cell->vl_servers));
+ afs_unuse_cell(net, cell->alias_of, afs_cell_trace_unuse_alias);
key_put(cell->anonymous_key);
kfree(cell->name);
kfree(cell);
+ afs_dec_cells_outstanding(net);
_leave(" [destroyed]");
}
@@ -532,18 +535,63 @@ void afs_cells_timer(struct timer_list *timer)
/*
* Get a reference on a cell record.
*/
-struct afs_cell *afs_get_cell(struct afs_cell *cell)
+struct afs_cell *afs_get_cell(struct afs_cell *cell, enum afs_cell_trace reason)
{
- atomic_inc(&cell->usage);
+ int u;
+
+ if (atomic_read(&cell->ref) <= 0)
+ BUG();
+
+ u = atomic_inc_return(&cell->ref);
+ trace_afs_cell(cell->debug_id, u, atomic_read(&cell->active), reason);
return cell;
}
/*
* Drop a reference on a cell record.
*/
-void afs_put_cell(struct afs_net *net, struct afs_cell *cell)
+void afs_put_cell(struct afs_cell *cell, enum afs_cell_trace reason)
{
+ if (cell) {
+ unsigned int debug_id = cell->debug_id;
+ unsigned int u, a;
+
+ a = atomic_read(&cell->active);
+ u = atomic_dec_return(&cell->ref);
+ trace_afs_cell(debug_id, u, a, reason);
+ if (u == 0) {
+ a = atomic_read(&cell->active);
+ WARN(a != 0, "Cell active count %u > 0\n", a);
+ call_rcu(&cell->rcu, afs_cell_destroy);
+ }
+ }
+}
+
+/*
+ * Note a cell becoming more active.
+ */
+struct afs_cell *afs_use_cell(struct afs_cell *cell, enum afs_cell_trace reason)
+{
+ int u, a;
+
+ if (atomic_read(&cell->ref) <= 0)
+ BUG();
+
+ u = atomic_read(&cell->ref);
+ a = atomic_inc_return(&cell->active);
+ trace_afs_cell(cell->debug_id, u, a, reason);
+ return cell;
+}
+
+/*
+ * Record a cell becoming less active. When the active counter reaches 1, it
+ * is scheduled for destruction, but may get reactivated.
+ */
+void afs_unuse_cell(struct afs_net *net, struct afs_cell *cell, enum afs_cell_trace reason)
+{
+ unsigned int debug_id = cell->debug_id;
time64_t now, expire_delay;
+ int u, a;
if (!cell)
return;
@@ -556,11 +604,35 @@ void afs_put_cell(struct afs_net *net, struct afs_cell *cell)
if (cell->vl_servers->nr_servers)
expire_delay = afs_cell_gc_delay;
- if (atomic_dec_return(&cell->usage) > 1)
- return;
+ u = atomic_read(&cell->ref);
+ a = atomic_dec_return(&cell->active);
+ trace_afs_cell(debug_id, u, a, reason);
+ WARN_ON(a == 0);
+ if (a == 1)
+ /* 'cell' may now be garbage collected. */
+ afs_set_cell_timer(net, expire_delay);
+}
+
+/*
+ * Note that a cell has been seen.
+ */
+void afs_see_cell(struct afs_cell *cell, enum afs_cell_trace reason)
+{
+ int u, a;
+
+ u = atomic_read(&cell->ref);
+ a = atomic_read(&cell->active);
+ trace_afs_cell(cell->debug_id, u, a, reason);
+}
- /* 'cell' may now be garbage collected. */
- afs_set_cell_timer(net, expire_delay);
+/*
+ * Queue a cell for management, giving the workqueue a ref to hold.
+ */
+void afs_queue_cell(struct afs_cell *cell, enum afs_cell_trace reason)
+{
+ afs_get_cell(cell, reason);
+ if (!queue_work(afs_wq, &cell->manager))
+ afs_put_cell(cell, afs_cell_trace_put_queue_fail);
}
/*
@@ -660,12 +732,10 @@ static void afs_deactivate_cell(struct afs_net *net, struct afs_cell *cell)
* Manage a cell record, initialising and destroying it, maintaining its DNS
* records.
*/
-static void afs_manage_cell(struct work_struct *work)
+static void afs_manage_cell(struct afs_cell *cell)
{
- struct afs_cell *cell = container_of(work, struct afs_cell, manager);
struct afs_net *net = cell->net;
- bool deleted;
- int ret, usage;
+ int ret, active;
_enter("%s", cell->name);
@@ -674,14 +744,19 @@ again:
switch (cell->state) {
case AFS_CELL_INACTIVE:
case AFS_CELL_FAILED:
- write_seqlock(&net->cells_lock);
- usage = 1;
- deleted = atomic_try_cmpxchg_relaxed(&cell->usage, &usage, 0);
- if (deleted)
+ down_write(&net->cells_lock);
+ active = 1;
+ if (atomic_try_cmpxchg_relaxed(&cell->active, &active, 0)) {
rb_erase(&cell->net_node, &net->cells);
- write_sequnlock(&net->cells_lock);
- if (deleted)
+ trace_afs_cell(cell->debug_id, atomic_read(&cell->ref), 0,
+ afs_cell_trace_unuse_delete);
+ smp_store_release(&cell->state, AFS_CELL_REMOVED);
+ }
+ up_write(&net->cells_lock);
+ if (cell->state == AFS_CELL_REMOVED) {
+ wake_up_var(&cell->state);
goto final_destruction;
+ }
if (cell->state == AFS_CELL_FAILED)
goto done;
smp_store_release(&cell->state, AFS_CELL_UNSET);
@@ -703,7 +778,7 @@ again:
goto again;
case AFS_CELL_ACTIVE:
- if (atomic_read(&cell->usage) > 1) {
+ if (atomic_read(&cell->active) > 1) {
if (test_and_clear_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags)) {
ret = afs_update_cell(cell);
if (ret < 0)
@@ -716,13 +791,16 @@ again:
goto again;
case AFS_CELL_DEACTIVATING:
- if (atomic_read(&cell->usage) > 1)
+ if (atomic_read(&cell->active) > 1)
goto reverse_deactivation;
afs_deactivate_cell(net, cell);
smp_store_release(&cell->state, AFS_CELL_INACTIVE);
wake_up_var(&cell->state);
goto again;
+ case AFS_CELL_REMOVED:
+ goto done;
+
default:
break;
}
@@ -748,9 +826,18 @@ done:
return;
final_destruction:
- call_rcu(&cell->rcu, afs_cell_destroy);
- afs_dec_cells_outstanding(net);
- _leave(" [destruct %d]", atomic_read(&net->cells_outstanding));
+ /* The root volume is pinning the cell */
+ afs_put_volume(cell->net, cell->root_volume, afs_volume_trace_put_cell_root);
+ cell->root_volume = NULL;
+ afs_put_cell(cell, afs_cell_trace_put_destroy);
+}
+
+static void afs_manage_cell_work(struct work_struct *work)
+{
+ struct afs_cell *cell = container_of(work, struct afs_cell, manager);
+
+ afs_manage_cell(cell);
+ afs_put_cell(cell, afs_cell_trace_put_queue_work);
}
/*
@@ -779,26 +866,29 @@ void afs_manage_cells(struct work_struct *work)
* lack of use and cells whose DNS results have expired and dispatch
* their managers.
*/
- read_seqlock_excl(&net->cells_lock);
+ down_read(&net->cells_lock);
for (cursor = rb_first(&net->cells); cursor; cursor = rb_next(cursor)) {
struct afs_cell *cell =
rb_entry(cursor, struct afs_cell, net_node);
- unsigned usage;
+ unsigned active;
bool sched_cell = false;
- usage = atomic_read(&cell->usage);
- _debug("manage %s %u", cell->name, usage);
+ active = atomic_read(&cell->active);
+ trace_afs_cell(cell->debug_id, atomic_read(&cell->ref),
+ active, afs_cell_trace_manage);
- ASSERTCMP(usage, >=, 1);
+ ASSERTCMP(active, >=, 1);
if (purging) {
- if (test_and_clear_bit(AFS_CELL_FL_NO_GC, &cell->flags))
- usage = atomic_dec_return(&cell->usage);
- ASSERTCMP(usage, ==, 1);
+ if (test_and_clear_bit(AFS_CELL_FL_NO_GC, &cell->flags)) {
+ active = atomic_dec_return(&cell->active);
+ trace_afs_cell(cell->debug_id, atomic_read(&cell->ref),
+ active, afs_cell_trace_unuse_pin);
+ }
}
- if (usage == 1) {
+ if (active == 1) {
struct afs_vlserver_list *vllist;
time64_t expire_at = cell->last_inactive;
@@ -821,10 +911,10 @@ void afs_manage_cells(struct work_struct *work)
}
if (sched_cell)
- queue_work(afs_wq, &cell->manager);
+ afs_queue_cell(cell, afs_cell_trace_get_queue_manage);
}
- read_sequnlock_excl(&net->cells_lock);
+ up_read(&net->cells_lock);
/* Update the timer on the way out. We have to pass an increment on
* cells_outstanding in the namespace that we are in to the timer or
@@ -854,11 +944,11 @@ void afs_cell_purge(struct afs_net *net)
_enter("");
- write_seqlock(&net->cells_lock);
- ws = rcu_access_pointer(net->ws_cell);
- RCU_INIT_POINTER(net->ws_cell, NULL);
- write_sequnlock(&net->cells_lock);
- afs_put_cell(net, ws);
+ down_write(&net->cells_lock);
+ ws = net->ws_cell;
+ net->ws_cell = NULL;
+ up_write(&net->cells_lock);
+ afs_unuse_cell(net, ws, afs_cell_trace_unuse_ws);
_debug("del timer");
if (del_timer_sync(&net->cells_timer))
diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c
index 7b784af604fd..db832cc931c8 100644
--- a/fs/afs/dynroot.c
+++ b/fs/afs/dynroot.c
@@ -123,9 +123,9 @@ static int afs_probe_cell_name(struct dentry *dentry)
len--;
}
- cell = afs_lookup_cell_rcu(net, name, len);
+ cell = afs_find_cell(net, name, len, afs_cell_trace_use_probe);
if (!IS_ERR(cell)) {
- afs_put_cell(net, cell);
+ afs_unuse_cell(net, cell, afs_cell_trace_unuse_probe);
return 0;
}
@@ -179,7 +179,6 @@ static struct dentry *afs_lookup_atcell(struct dentry *dentry)
struct afs_cell *cell;
struct afs_net *net = afs_d2net(dentry);
struct dentry *ret;
- unsigned int seq = 0;
char *name;
int len;
@@ -191,17 +190,13 @@ static struct dentry *afs_lookup_atcell(struct dentry *dentry)
if (!name)
goto out_p;
- rcu_read_lock();
- do {
- read_seqbegin_or_lock(&net->cells_lock, &seq);
- cell = rcu_dereference_raw(net->ws_cell);
- if (cell) {
- len = cell->name_len;
- memcpy(name, cell->name, len + 1);
- }
- } while (need_seqretry(&net->cells_lock, seq));
- done_seqretry(&net->cells_lock, seq);
- rcu_read_unlock();
+ down_read(&net->cells_lock);
+ cell = net->ws_cell;
+ if (cell) {
+ len = cell->name_len;
+ memcpy(name, cell->name, len + 1);
+ }
+ up_read(&net->cells_lock);
ret = ERR_PTR(-ENOENT);
if (!cell)
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index e5f0446f27e5..81b0485fd22a 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -263,11 +263,11 @@ struct afs_net {
/* Cell database */
struct rb_root cells;
- struct afs_cell __rcu *ws_cell;
+ struct afs_cell *ws_cell;
struct work_struct cells_manager;
struct timer_list cells_timer;
atomic_t cells_outstanding;
- seqlock_t cells_lock;
+ struct rw_semaphore cells_lock;
struct mutex cells_alias_lock;
struct mutex proc_cells_lock;
@@ -326,6 +326,7 @@ enum afs_cell_state {
AFS_CELL_DEACTIVATING,
AFS_CELL_INACTIVE,
AFS_CELL_FAILED,
+ AFS_CELL_REMOVED,
};
/*
@@ -363,7 +364,8 @@ struct afs_cell {
#endif
time64_t dns_expiry; /* Time AFSDB/SRV record expires */
time64_t last_inactive; /* Time of last drop of usage count */
- atomic_t usage;
+ atomic_t ref; /* Struct refcount */
+ atomic_t active; /* Active usage counter */
unsigned long flags;
#define AFS_CELL_FL_NO_GC 0 /* The cell was added manually, don't auto-gc */
#define AFS_CELL_FL_DO_LOOKUP 1 /* DNS lookup requested */
@@ -373,6 +375,7 @@ struct afs_cell {
enum dns_record_source dns_source:8; /* Latest source of data from lookup */
enum dns_lookup_status dns_status:8; /* Latest status of data from lookup */
unsigned int dns_lookup_count; /* Counter of DNS lookups */
+ unsigned int debug_id;
/* The volumes belonging to this cell */
struct rb_root volumes; /* Tree of volumes on this server */
@@ -917,11 +920,16 @@ static inline bool afs_cb_is_broken(unsigned int cb_break,
* cell.c
*/
extern int afs_cell_init(struct afs_net *, const char *);
-extern struct afs_cell *afs_lookup_cell_rcu(struct afs_net *, const char *, unsigned);
+extern struct afs_cell *afs_find_cell(struct afs_net *, const char *, unsigned,
+ enum afs_cell_trace);
extern struct afs_cell *afs_lookup_cell(struct afs_net *, const char *, unsigned,
const char *, bool);
-extern struct afs_cell *afs_get_cell(struct afs_cell *);
-extern void afs_put_cell(struct afs_net *, struct afs_cell *);
+extern struct afs_cell *afs_use_cell(struct afs_cell *, enum afs_cell_trace);
+extern void afs_unuse_cell(struct afs_net *, struct afs_cell *, enum afs_cell_trace);
+extern struct afs_cell *afs_get_cell(struct afs_cell *, enum afs_cell_trace);
+extern void afs_see_cell(struct afs_cell *, enum afs_cell_trace);
+extern void afs_put_cell(struct afs_cell *, enum afs_cell_trace);
+extern void afs_queue_cell(struct afs_cell *, enum afs_cell_trace);
extern void afs_manage_cells(struct work_struct *);
extern void afs_cells_timer(struct timer_list *);
extern void __net_exit afs_cell_purge(struct afs_net *);
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 31b472f7c734..accdd8970e7c 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -78,7 +78,7 @@ static int __net_init afs_net_init(struct net *net_ns)
mutex_init(&net->socket_mutex);
net->cells = RB_ROOT;
- seqlock_init(&net->cells_lock);
+ init_rwsem(&net->cells_lock);
INIT_WORK(&net->cells_manager, afs_manage_cells);
timer_setup(&net->cells_timer, afs_cells_timer, 0);
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 79bc5f1338ed..052dab2f5c03 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -88,7 +88,7 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt)
ctx->force = true;
}
if (ctx->cell) {
- afs_put_cell(ctx->net, ctx->cell);
+ afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_mntpt);
ctx->cell = NULL;
}
if (test_bit(AFS_VNODE_PSEUDODIR, &vnode->flags)) {
@@ -124,7 +124,7 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt)
char *buf;
if (src_as->cell)
- ctx->cell = afs_get_cell(src_as->cell);
+ ctx->cell = afs_use_cell(src_as->cell, afs_cell_trace_use_mntpt);
if (size < 2 || size > PAGE_SIZE - 1)
return -EINVAL;
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index e8babb62ed44..065a28bfa3f1 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -38,7 +38,7 @@ static int afs_proc_cells_show(struct seq_file *m, void *v)
if (v == SEQ_START_TOKEN) {
/* display header on line 1 */
- seq_puts(m, "USE TTL SV ST NAME\n");
+ seq_puts(m, "USE ACT TTL SV ST NAME\n");
return 0;
}
@@ -46,10 +46,11 @@ static int afs_proc_cells_show(struct seq_file *m, void *v)
vllist = rcu_dereference(cell->vl_servers);
/* display one cell per line on subsequent lines */
- seq_printf(m, "%3u %6lld %2u %2u %s\n",
- atomic_read(&cell->usage),
+ seq_printf(m, "%3u %3u %6lld %2u %2u %s\n",
+ atomic_read(&cell->ref),
+ atomic_read(&cell->active),
cell->dns_expiry - ktime_get_real_seconds(),
- vllist->nr_servers,
+ vllist ? vllist->nr_servers : 0,
cell->state,
cell->name);
return 0;
@@ -128,7 +129,7 @@ static int afs_proc_cells_write(struct file *file, char *buf, size_t size)
}
if (test_and_set_bit(AFS_CELL_FL_NO_GC, &cell->flags))
- afs_put_cell(net, cell);
+ afs_unuse_cell(net, cell, afs_cell_trace_unuse_no_pin);
} else {
goto inval;
}
@@ -154,13 +155,11 @@ static int afs_proc_rootcell_show(struct seq_file *m, void *v)
struct afs_net *net;
net = afs_seq2net_single(m);
- if (rcu_access_pointer(net->ws_cell)) {
- rcu_read_lock();
- cell = rcu_dereference(net->ws_cell);
- if (cell)
- seq_printf(m, "%s\n", cell->name);
- rcu_read_unlock();
- }
+ down_read(&net->cells_lock);
+ cell = net->ws_cell;
+ if (cell)
+ seq_printf(m, "%s\n", cell->name);
+ up_read(&net->cells_lock);
return 0;
}
diff --git a/fs/afs/server.c b/fs/afs/server.c
index e82e452e2612..684a2b02b9ff 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -550,7 +550,12 @@ void afs_manage_servers(struct work_struct *work)
_debug("manage %pU %u", &server->uuid, active);
- ASSERTIFCMP(purging, active, ==, 0);
+ if (purging) {
+ trace_afs_server(server, atomic_read(&server->ref),
+ active, afs_server_trace_purging);
+ if (active != 0)
+ pr_notice("Can't purge s=%08x\n", server->debug_id);
+ }
if (active == 0) {
time64_t expire_at = server->unuse_time;
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 3a40ee752c1e..6c5900df6aa5 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -294,7 +294,8 @@ static int afs_parse_source(struct fs_context *fc, struct fs_parameter *param)
cellnamesz, cellnamesz, cellname ?: "");
return PTR_ERR(cell);
}
- afs_put_cell(ctx->net, ctx->cell);
+ afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_parse);
+ afs_see_cell(cell, afs_cell_trace_see_source);
ctx->cell = cell;
}
@@ -389,8 +390,9 @@ static int afs_validate_fc(struct fs_context *fc)
_debug("switch to alias");
key_put(ctx->key);
ctx->key = NULL;
- cell = afs_get_cell(ctx->cell->alias_of);
- afs_put_cell(ctx->net, ctx->cell);
+ cell = afs_use_cell(ctx->cell->alias_of,
+ afs_cell_trace_use_fc_alias);
+ afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_fc);
ctx->cell = cell;
goto reget_key;
}
@@ -507,7 +509,7 @@ static struct afs_super_info *afs_alloc_sbi(struct fs_context *fc)
if (ctx->dyn_root) {
as->dyn_root = true;
} else {
- as->cell = afs_get_cell(ctx->cell);
+ as->cell = afs_use_cell(ctx->cell, afs_cell_trace_use_sbi);
as->volume = afs_get_volume(ctx->volume,
afs_volume_trace_get_alloc_sbi);
}
@@ -520,7 +522,7 @@ static void afs_destroy_sbi(struct afs_super_info *as)
if (as) {
struct afs_net *net = afs_net(as->net_ns);
afs_put_volume(net, as->volume, afs_volume_trace_put_destroy_sbi);
- afs_put_cell(net, as->cell);
+ afs_unuse_cell(net, as->cell, afs_cell_trace_unuse_sbi);
put_net(as->net_ns);
kfree(as);
}
@@ -606,7 +608,7 @@ static void afs_free_fc(struct fs_context *fc)
afs_destroy_sbi(fc->s_fs_info);
afs_put_volume(ctx->net, ctx->volume, afs_volume_trace_put_free_fc);
- afs_put_cell(ctx->net, ctx->cell);
+ afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_fc);
key_put(ctx->key);
kfree(ctx);
}
@@ -633,9 +635,7 @@ static int afs_init_fs_context(struct fs_context *fc)
ctx->net = afs_net(fc->net_ns);
/* Default to the workstation cell. */
- rcu_read_lock();
- cell = afs_lookup_cell_rcu(ctx->net, NULL, 0);
- rcu_read_unlock();
+ cell = afs_find_cell(ctx->net, NULL, 0, afs_cell_trace_use_fc);
if (IS_ERR(cell))
cell = NULL;
ctx->cell = cell;
diff --git a/fs/afs/vl_alias.c b/fs/afs/vl_alias.c
index 5082ef04e99c..f04a80e4f5c3 100644
--- a/fs/afs/vl_alias.c
+++ b/fs/afs/vl_alias.c
@@ -177,7 +177,7 @@ static int afs_compare_cell_roots(struct afs_cell *cell)
is_alias:
rcu_read_unlock();
- cell->alias_of = afs_get_cell(p);
+ cell->alias_of = afs_use_cell(p, afs_cell_trace_use_alias);
return 1;
}
@@ -247,18 +247,18 @@ static int afs_query_for_alias(struct afs_cell *cell, struct key *key)
continue;
if (p->root_volume)
continue; /* Ignore cells that have a root.cell volume. */
- afs_get_cell(p);
+ afs_use_cell(p, afs_cell_trace_use_check_alias);
mutex_unlock(&cell->net->proc_cells_lock);
if (afs_query_for_alias_one(cell, key, p) != 0)
goto is_alias;
if (mutex_lock_interruptible(&cell->net->proc_cells_lock) < 0) {
- afs_put_cell(cell->net, p);
+ afs_unuse_cell(cell->net, p, afs_cell_trace_unuse_check_alias);
return -ERESTARTSYS;
}
- afs_put_cell(cell->net, p);
+ afs_unuse_cell(cell->net, p, afs_cell_trace_unuse_check_alias);
}
mutex_unlock(&cell->net->proc_cells_lock);
diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c
index c0458c903b31..488e58490b16 100644
--- a/fs/afs/vl_rotate.c
+++ b/fs/afs/vl_rotate.c
@@ -45,7 +45,7 @@ static bool afs_start_vl_iteration(struct afs_vl_cursor *vc)
cell->dns_expiry <= ktime_get_real_seconds()) {
dns_lookup_count = smp_load_acquire(&cell->dns_lookup_count);
set_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags);
- queue_work(afs_wq, &cell->manager);
+ afs_queue_cell(cell, afs_cell_trace_get_queue_dns);
if (cell->dns_source == DNS_RECORD_UNAVAILABLE) {
if (wait_var_event_interruptible(
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 9bc0509e3634..f84194b791d3 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -83,7 +83,7 @@ static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params,
volume->vid = vldb->vid[params->type];
volume->update_at = ktime_get_real_seconds() + afs_volume_record_life;
- volume->cell = afs_get_cell(params->cell);
+ volume->cell = afs_get_cell(params->cell, afs_cell_trace_get_vol);
volume->type = params->type;
volume->type_force = params->force;
volume->name_len = vldb->name_len;
@@ -106,7 +106,7 @@ static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params,
return volume;
error_1:
- afs_put_cell(params->net, volume->cell);
+ afs_put_cell(volume->cell, afs_cell_trace_put_vol);
kfree(volume);
error_0:
return ERR_PTR(ret);
@@ -228,7 +228,7 @@ static void afs_destroy_volume(struct afs_net *net, struct afs_volume *volume)
afs_remove_volume_from_cell(volume);
afs_put_serverlist(net, rcu_access_pointer(volume->servers));
- afs_put_cell(net, volume->cell);
+ afs_put_cell(volume->cell, afs_cell_trace_put_vol);
trace_afs_volume(volume->vid, atomic_read(&volume->usage),
afs_volume_trace_free);
kfree_rcu(volume, rcu);
diff --git a/fs/autofs/dev-ioctl.c b/fs/autofs/dev-ioctl.c
index 75105f45c51a..322b7dfb4ea0 100644
--- a/fs/autofs/dev-ioctl.c
+++ b/fs/autofs/dev-ioctl.c
@@ -8,6 +8,7 @@
#include <linux/compat.h>
#include <linux/syscalls.h>
#include <linux/magic.h>
+#include <linux/nospec.h>
#include "autofs_i.h"
@@ -563,7 +564,7 @@ out:
static ioctl_fn lookup_dev_ioctl(unsigned int cmd)
{
- static ioctl_fn _ioctls[] = {
+ static const ioctl_fn _ioctls[] = {
autofs_dev_ioctl_version,
autofs_dev_ioctl_protover,
autofs_dev_ioctl_protosubver,
@@ -581,7 +582,10 @@ static ioctl_fn lookup_dev_ioctl(unsigned int cmd)
};
unsigned int idx = cmd_idx(cmd);
- return (idx >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[idx];
+ if (idx >= ARRAY_SIZE(_ioctls))
+ return NULL;
+ idx = array_index_nospec(idx, ARRAY_SIZE(_ioctls));
+ return _ioctls[idx];
}
/* ioctl dispatcher */
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 13d053982dd7..b6b3d052ca86 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -13,6 +13,7 @@
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/fs.h>
+#include <linux/log2.h>
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/errno.h>
@@ -309,7 +310,10 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
* Grow the stack manually; some architectures have a limit on how
* far ahead a user-space access may be in order to grow the stack.
*/
+ if (mmap_read_lock_killable(mm))
+ return -EINTR;
vma = find_extend_vma(mm, bprm->p);
+ mmap_read_unlock(mm);
if (!vma)
return -EFAULT;
@@ -421,6 +425,26 @@ static int elf_read(struct file *file, void *buf, size_t len, loff_t pos)
return 0;
}
+static unsigned long maximum_alignment(struct elf_phdr *cmds, int nr)
+{
+ unsigned long alignment = 0;
+ int i;
+
+ for (i = 0; i < nr; i++) {
+ if (cmds[i].p_type == PT_LOAD) {
+ unsigned long p_align = cmds[i].p_align;
+
+ /* skip non-power of two alignments as invalid */
+ if (!is_power_of_2(p_align))
+ continue;
+ alignment = max(alignment, p_align);
+ }
+ }
+
+ /* ensure we align to at least one page */
+ return ELF_PAGEALIGN(alignment);
+}
+
/**
* load_elf_phdrs() - load ELF program headers
* @elf_ex: ELF header of the binary whose program headers should be loaded
@@ -1008,6 +1032,7 @@ out_free_interp:
int elf_prot, elf_flags;
unsigned long k, vaddr;
unsigned long total_size = 0;
+ unsigned long alignment;
if (elf_ppnt->p_type != PT_LOAD)
continue;
@@ -1086,6 +1111,9 @@ out_free_interp:
load_bias = ELF_ET_DYN_BASE;
if (current->flags & PF_RANDOMIZE)
load_bias += arch_mmap_rnd();
+ alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
+ if (alignment)
+ load_bias &= ~(alignment - 1);
elf_flags |= MAP_FIXED;
} else
load_bias = 0;
@@ -1389,126 +1417,6 @@ out:
* Jeremy Fitzhardinge <jeremy@sw.oz.au>
*/
-/*
- * The purpose of always_dump_vma() is to make sure that special kernel mappings
- * that are useful for post-mortem analysis are included in every core dump.
- * In that way we ensure that the core dump is fully interpretable later
- * without matching up the same kernel and hardware config to see what PC values
- * meant. These special mappings include - vDSO, vsyscall, and other
- * architecture specific mappings
- */
-static bool always_dump_vma(struct vm_area_struct *vma)
-{
- /* Any vsyscall mappings? */
- if (vma == get_gate_vma(vma->vm_mm))
- return true;
-
- /*
- * Assume that all vmas with a .name op should always be dumped.
- * If this changes, a new vm_ops field can easily be added.
- */
- if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
- return true;
-
- /*
- * arch_vma_name() returns non-NULL for special architecture mappings,
- * such as vDSO sections.
- */
- if (arch_vma_name(vma))
- return true;
-
- return false;
-}
-
-/*
- * Decide what to dump of a segment, part, all or none.
- */
-static unsigned long vma_dump_size(struct vm_area_struct *vma,
- unsigned long mm_flags)
-{
-#define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
-
- /* always dump the vdso and vsyscall sections */
- if (always_dump_vma(vma))
- goto whole;
-
- if (vma->vm_flags & VM_DONTDUMP)
- return 0;
-
- /* support for DAX */
- if (vma_is_dax(vma)) {
- if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
- goto whole;
- if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
- goto whole;
- return 0;
- }
-
- /* Hugetlb memory check */
- if (is_vm_hugetlb_page(vma)) {
- if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
- goto whole;
- if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
- goto whole;
- return 0;
- }
-
- /* Do not dump I/O mapped devices or special mappings */
- if (vma->vm_flags & VM_IO)
- return 0;
-
- /* By default, dump shared memory if mapped from an anonymous file. */
- if (vma->vm_flags & VM_SHARED) {
- if (file_inode(vma->vm_file)->i_nlink == 0 ?
- FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
- goto whole;
- return 0;
- }
-
- /* Dump segments that have been written to. */
- if (vma->anon_vma && FILTER(ANON_PRIVATE))
- goto whole;
- if (vma->vm_file == NULL)
- return 0;
-
- if (FILTER(MAPPED_PRIVATE))
- goto whole;
-
- /*
- * If this looks like the beginning of a DSO or executable mapping,
- * check for an ELF header. If we find one, dump the first page to
- * aid in determining what was mapped here.
- */
- if (FILTER(ELF_HEADERS) &&
- vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
- u32 __user *header = (u32 __user *) vma->vm_start;
- u32 word;
- /*
- * Doing it this way gets the constant folded by GCC.
- */
- union {
- u32 cmp;
- char elfmag[SELFMAG];
- } magic;
- BUILD_BUG_ON(SELFMAG != sizeof word);
- magic.elfmag[EI_MAG0] = ELFMAG0;
- magic.elfmag[EI_MAG1] = ELFMAG1;
- magic.elfmag[EI_MAG2] = ELFMAG2;
- magic.elfmag[EI_MAG3] = ELFMAG3;
- if (unlikely(get_user(word, header)))
- word = 0;
- if (word == magic.cmp)
- return PAGE_SIZE;
- }
-
-#undef FILTER
-
- return 0;
-
-whole:
- return vma->vm_end - vma->vm_start;
-}
-
/* An ELF note in memory */
struct memelfnote
{
@@ -2220,32 +2128,6 @@ static void free_note_info(struct elf_note_info *info)
#endif
-static struct vm_area_struct *first_vma(struct task_struct *tsk,
- struct vm_area_struct *gate_vma)
-{
- struct vm_area_struct *ret = tsk->mm->mmap;
-
- if (ret)
- return ret;
- return gate_vma;
-}
-/*
- * Helper function for iterating across a vma list. It ensures that the caller
- * will visit `gate_vma' prior to terminating the search.
- */
-static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
- struct vm_area_struct *gate_vma)
-{
- struct vm_area_struct *ret;
-
- ret = this_vma->vm_next;
- if (ret)
- return ret;
- if (this_vma == gate_vma)
- return NULL;
- return gate_vma;
-}
-
static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
elf_addr_t e_shoff, int segs)
{
@@ -2272,9 +2154,8 @@ static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
static int elf_core_dump(struct coredump_params *cprm)
{
int has_dumped = 0;
- int segs, i;
- size_t vma_data_size = 0;
- struct vm_area_struct *vma, *gate_vma;
+ int vma_count, segs, i;
+ size_t vma_data_size;
struct elfhdr elf;
loff_t offset = 0, dataoff;
struct elf_note_info info = { };
@@ -2282,30 +2163,16 @@ static int elf_core_dump(struct coredump_params *cprm)
struct elf_shdr *shdr4extnum = NULL;
Elf_Half e_phnum;
elf_addr_t e_shoff;
- elf_addr_t *vma_filesz = NULL;
+ struct core_vma_metadata *vma_meta;
+
+ if (dump_vma_snapshot(cprm, &vma_count, &vma_meta, &vma_data_size))
+ return 0;
- /*
- * We no longer stop all VM operations.
- *
- * This is because those proceses that could possibly change map_count
- * or the mmap / vma pages are now blocked in do_exit on current
- * finishing this core dump.
- *
- * Only ptrace can touch these memory addresses, but it doesn't change
- * the map_count or the pages allocated. So no possibility of crashing
- * exists while dumping the mm->vm_next areas to the core file.
- */
-
/*
* The number of segs are recored into ELF header as 16bit value.
* Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
*/
- segs = current->mm->map_count;
- segs += elf_core_extra_phdrs();
-
- gate_vma = get_gate_vma(current->mm);
- if (gate_vma != NULL)
- segs++;
+ segs = vma_count + elf_core_extra_phdrs();
/* for notes section */
segs++;
@@ -2343,24 +2210,6 @@ static int elf_core_dump(struct coredump_params *cprm)
dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
- /*
- * Zero vma process will get ZERO_SIZE_PTR here.
- * Let coredump continue for register state at least.
- */
- vma_filesz = kvmalloc(array_size(sizeof(*vma_filesz), (segs - 1)),
- GFP_KERNEL);
- if (!vma_filesz)
- goto end_coredump;
-
- for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
- vma = next_vma(vma, gate_vma)) {
- unsigned long dump_size;
-
- dump_size = vma_dump_size(vma, cprm->mm_flags);
- vma_filesz[i++] = dump_size;
- vma_data_size += dump_size;
- }
-
offset += vma_data_size;
offset += elf_core_extra_data_size();
e_shoff = offset;
@@ -2381,21 +2230,23 @@ static int elf_core_dump(struct coredump_params *cprm)
goto end_coredump;
/* Write program headers for segments dump */
- for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
- vma = next_vma(vma, gate_vma)) {
+ for (i = 0; i < vma_count; i++) {
+ struct core_vma_metadata *meta = vma_meta + i;
struct elf_phdr phdr;
phdr.p_type = PT_LOAD;
phdr.p_offset = offset;
- phdr.p_vaddr = vma->vm_start;
+ phdr.p_vaddr = meta->start;
phdr.p_paddr = 0;
- phdr.p_filesz = vma_filesz[i++];
- phdr.p_memsz = vma->vm_end - vma->vm_start;
+ phdr.p_filesz = meta->dump_size;
+ phdr.p_memsz = meta->end - meta->start;
offset += phdr.p_filesz;
- phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
- if (vma->vm_flags & VM_WRITE)
+ phdr.p_flags = 0;
+ if (meta->flags & VM_READ)
+ phdr.p_flags |= PF_R;
+ if (meta->flags & VM_WRITE)
phdr.p_flags |= PF_W;
- if (vma->vm_flags & VM_EXEC)
+ if (meta->flags & VM_EXEC)
phdr.p_flags |= PF_X;
phdr.p_align = ELF_EXEC_PAGESIZE;
@@ -2417,28 +2268,11 @@ static int elf_core_dump(struct coredump_params *cprm)
if (!dump_skip(cprm, dataoff - cprm->pos))
goto end_coredump;
- for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
- vma = next_vma(vma, gate_vma)) {
- unsigned long addr;
- unsigned long end;
+ for (i = 0; i < vma_count; i++) {
+ struct core_vma_metadata *meta = vma_meta + i;
- end = vma->vm_start + vma_filesz[i++];
-
- for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
- struct page *page;
- int stop;
-
- page = get_dump_page(addr);
- if (page) {
- void *kaddr = kmap(page);
- stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
- kunmap(page);
- put_page(page);
- } else
- stop = !dump_skip(cprm, PAGE_SIZE);
- if (stop)
- goto end_coredump;
- }
+ if (!dump_user_range(cprm, meta->start, meta->dump_size))
+ goto end_coredump;
}
dump_truncate(cprm);
@@ -2453,7 +2287,7 @@ static int elf_core_dump(struct coredump_params *cprm)
end_coredump:
free_note_info(&info);
kfree(shdr4extnum);
- kvfree(vma_filesz);
+ kvfree(vma_meta);
kfree(phdr4note);
return has_dumped;
}
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 50f845702b92..be4062b8ba75 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1215,76 +1215,6 @@ struct elf_prstatus_fdpic
int pr_fpvalid; /* True if math co-processor being used. */
};
-/*
- * Decide whether a segment is worth dumping; default is yes to be
- * sure (missing info is worse than too much; etc).
- * Personally I'd include everything, and use the coredump limit...
- *
- * I think we should skip something. But I am not sure how. H.J.
- */
-static int maydump(struct vm_area_struct *vma, unsigned long mm_flags)
-{
- int dump_ok;
-
- /* Do not dump I/O mapped devices or special mappings */
- if (vma->vm_flags & VM_IO) {
- kdcore("%08lx: %08lx: no (IO)", vma->vm_start, vma->vm_flags);
- return 0;
- }
-
- /* If we may not read the contents, don't allow us to dump
- * them either. "dump_write()" can't handle it anyway.
- */
- if (!(vma->vm_flags & VM_READ)) {
- kdcore("%08lx: %08lx: no (!read)", vma->vm_start, vma->vm_flags);
- return 0;
- }
-
- /* support for DAX */
- if (vma_is_dax(vma)) {
- if (vma->vm_flags & VM_SHARED) {
- dump_ok = test_bit(MMF_DUMP_DAX_SHARED, &mm_flags);
- kdcore("%08lx: %08lx: %s (DAX shared)", vma->vm_start,
- vma->vm_flags, dump_ok ? "yes" : "no");
- } else {
- dump_ok = test_bit(MMF_DUMP_DAX_PRIVATE, &mm_flags);
- kdcore("%08lx: %08lx: %s (DAX private)", vma->vm_start,
- vma->vm_flags, dump_ok ? "yes" : "no");
- }
- return dump_ok;
- }
-
- /* By default, dump shared memory if mapped from an anonymous file. */
- if (vma->vm_flags & VM_SHARED) {
- if (file_inode(vma->vm_file)->i_nlink == 0) {
- dump_ok = test_bit(MMF_DUMP_ANON_SHARED, &mm_flags);
- kdcore("%08lx: %08lx: %s (share)", vma->vm_start,
- vma->vm_flags, dump_ok ? "yes" : "no");
- return dump_ok;
- }
-
- dump_ok = test_bit(MMF_DUMP_MAPPED_SHARED, &mm_flags);
- kdcore("%08lx: %08lx: %s (share)", vma->vm_start,
- vma->vm_flags, dump_ok ? "yes" : "no");
- return dump_ok;
- }
-
-#ifdef CONFIG_MMU
- /* By default, if it hasn't been written to, don't write it out */
- if (!vma->anon_vma) {
- dump_ok = test_bit(MMF_DUMP_MAPPED_PRIVATE, &mm_flags);
- kdcore("%08lx: %08lx: %s (!anon)", vma->vm_start,
- vma->vm_flags, dump_ok ? "yes" : "no");
- return dump_ok;
- }
-#endif
-
- dump_ok = test_bit(MMF_DUMP_ANON_PRIVATE, &mm_flags);
- kdcore("%08lx: %08lx: %s", vma->vm_start, vma->vm_flags,
- dump_ok ? "yes" : "no");
- return dump_ok;
-}
-
/* An ELF note in memory */
struct memelfnote
{
@@ -1524,54 +1454,21 @@ static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
/*
* dump the segments for an MMU process
*/
-static bool elf_fdpic_dump_segments(struct coredump_params *cprm)
+static bool elf_fdpic_dump_segments(struct coredump_params *cprm,
+ struct core_vma_metadata *vma_meta,
+ int vma_count)
{
- struct vm_area_struct *vma;
+ int i;
- for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
-#ifdef CONFIG_MMU
- unsigned long addr;
-#endif
+ for (i = 0; i < vma_count; i++) {
+ struct core_vma_metadata *meta = vma_meta + i;
- if (!maydump(vma, cprm->mm_flags))
- continue;
-
-#ifdef CONFIG_MMU
- for (addr = vma->vm_start; addr < vma->vm_end;
- addr += PAGE_SIZE) {
- bool res;
- struct page *page = get_dump_page(addr);
- if (page) {
- void *kaddr = kmap(page);
- res = dump_emit(cprm, kaddr, PAGE_SIZE);
- kunmap(page);
- put_page(page);
- } else {
- res = dump_skip(cprm, PAGE_SIZE);
- }
- if (!res)
- return false;
- }
-#else
- if (!dump_emit(cprm, (void *) vma->vm_start,
- vma->vm_end - vma->vm_start))
+ if (!dump_user_range(cprm, meta->start, meta->dump_size))
return false;
-#endif
}
return true;
}
-static size_t elf_core_vma_data_size(unsigned long mm_flags)
-{
- struct vm_area_struct *vma;
- size_t size = 0;
-
- for (vma = current->mm->mmap; vma; vma = vma->vm_next)
- if (maydump(vma, mm_flags))
- size += vma->vm_end - vma->vm_start;
- return size;
-}
-
/*
* Actual dumper
*
@@ -1582,9 +1479,8 @@ static size_t elf_core_vma_data_size(unsigned long mm_flags)
static int elf_fdpic_core_dump(struct coredump_params *cprm)
{
int has_dumped = 0;
- int segs;
+ int vma_count, segs;
int i;
- struct vm_area_struct *vma;
struct elfhdr *elf = NULL;
loff_t offset = 0, dataoff;
struct memelfnote psinfo_note, auxv_note;
@@ -1598,18 +1494,8 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
elf_addr_t e_shoff;
struct core_thread *ct;
struct elf_thread_status *tmp;
-
- /*
- * We no longer stop all VM operations.
- *
- * This is because those proceses that could possibly change map_count
- * or the mmap / vma pages are now blocked in do_exit on current
- * finishing this core dump.
- *
- * Only ptrace can touch these memory addresses, but it doesn't change
- * the map_count or the pages allocated. So no possibility of crashing
- * exists while dumping the mm->vm_next areas to the core file.
- */
+ struct core_vma_metadata *vma_meta = NULL;
+ size_t vma_data_size;
/* alloc memory for large data structures: too large to be on stack */
elf = kmalloc(sizeof(*elf), GFP_KERNEL);
@@ -1619,6 +1505,9 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
if (!psinfo)
goto end_coredump;
+ if (dump_vma_snapshot(cprm, &vma_count, &vma_meta, &vma_data_size))
+ goto end_coredump;
+
for (ct = current->mm->core_state->dumper.next;
ct; ct = ct->next) {
tmp = elf_dump_thread_status(cprm->siginfo->si_signo,
@@ -1638,8 +1527,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
tmp->next = thread_list;
thread_list = tmp;
- segs = current->mm->map_count;
- segs += elf_core_extra_phdrs();
+ segs = vma_count + elf_core_extra_phdrs();
/* for notes section */
segs++;
@@ -1684,7 +1572,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
/* Page-align dumped data */
dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
- offset += elf_core_vma_data_size(cprm->mm_flags);
+ offset += vma_data_size;
offset += elf_core_extra_data_size();
e_shoff = offset;
@@ -1704,23 +1592,26 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
goto end_coredump;
/* write program headers for segments dump */
- for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
+ for (i = 0; i < vma_count; i++) {
+ struct core_vma_metadata *meta = vma_meta + i;
struct elf_phdr phdr;
size_t sz;
- sz = vma->vm_end - vma->vm_start;
+ sz = meta->end - meta->start;
phdr.p_type = PT_LOAD;
phdr.p_offset = offset;
- phdr.p_vaddr = vma->vm_start;
+ phdr.p_vaddr = meta->start;
phdr.p_paddr = 0;
- phdr.p_filesz = maydump(vma, cprm->mm_flags) ? sz : 0;
+ phdr.p_filesz = meta->dump_size;
phdr.p_memsz = sz;
offset += phdr.p_filesz;
- phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
- if (vma->vm_flags & VM_WRITE)
+ phdr.p_flags = 0;
+ if (meta->flags & VM_READ)
+ phdr.p_flags |= PF_R;
+ if (meta->flags & VM_WRITE)
phdr.p_flags |= PF_W;
- if (vma->vm_flags & VM_EXEC)
+ if (meta->flags & VM_EXEC)
phdr.p_flags |= PF_X;
phdr.p_align = ELF_EXEC_PAGESIZE;
@@ -1752,7 +1643,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
if (!dump_skip(cprm, dataoff - cprm->pos))
goto end_coredump;
- if (!elf_fdpic_dump_segments(cprm))
+ if (!elf_fdpic_dump_segments(cprm, vma_meta, vma_count))
goto end_coredump;
if (!elf_core_write_extra_data(cprm))
@@ -1776,6 +1667,7 @@ end_coredump:
thread_list = thread_list->next;
kfree(tmp);
}
+ kvfree(vma_meta);
kfree(phdr4note);
kfree(elf);
kfree(psinfo);
diff --git a/fs/buffer.c b/fs/buffer.c
index 5a28a6aa7f16..23f645657488 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -842,13 +842,13 @@ struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
struct buffer_head *bh, *head;
gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT;
long offset;
- struct mem_cgroup *memcg;
+ struct mem_cgroup *memcg, *old_memcg;
if (retry)
gfp |= __GFP_NOFAIL;
memcg = get_mem_cgroup_from_page(page);
- memalloc_use_memcg(memcg);
+ old_memcg = set_active_memcg(memcg);
head = NULL;
offset = PAGE_SIZE;
@@ -867,7 +867,7 @@ struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
set_bh_page(bh, page, offset);
}
out:
- memalloc_unuse_memcg();
+ set_active_memcg(old_memcg);
mem_cgroup_put(memcg);
return head;
/*
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index ca2273727225..b0983e2a4e2c 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1168,7 +1168,7 @@ EXPORT_SYMBOL(configfs_depend_item);
/*
* Release the dependent linkage. This is much simpler than
- * configfs_depend_item() because we know that that the client driver is
+ * configfs_depend_item() because we know that the client driver is
* pinned, thus the subsystem is pinned, and therefore configfs is pinned.
*/
void configfs_undepend_item(struct config_item *target)
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index fb65b706cc0d..1f0270229d7b 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -267,7 +267,7 @@ flush_write_buffer(struct file *file, struct configfs_buffer *buffer, size_t cou
* There is no easy way for us to know if userspace is only doing a partial
* write, so we don't support them. We expect the entire buffer to come
* on the first write.
- * Hint: if you're writing a value, first read the file, modify only the
+ * Hint: if you're writing a value, first read the file, modify only
* the value you're changing, then write entire buffer back.
*/
diff --git a/fs/coredump.c b/fs/coredump.c
index 76e7c10edfc0..0cd9056d79cc 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -840,17 +840,17 @@ int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
ssize_t n;
if (cprm->written + nr > cprm->limit)
return 0;
- while (nr) {
- if (dump_interrupted())
- return 0;
- n = __kernel_write(file, addr, nr, &pos);
- if (n <= 0)
- return 0;
- file->f_pos = pos;
- cprm->written += n;
- cprm->pos += n;
- nr -= n;
- }
+
+
+ if (dump_interrupted())
+ return 0;
+ n = __kernel_write(file, addr, nr, &pos);
+ if (n != nr)
+ return 0;
+ file->f_pos = pos;
+ cprm->written += n;
+ cprm->pos += n;
+
return 1;
}
EXPORT_SYMBOL(dump_emit);
@@ -876,6 +876,40 @@ int dump_skip(struct coredump_params *cprm, size_t nr)
}
EXPORT_SYMBOL(dump_skip);
+#ifdef CONFIG_ELF_CORE
+int dump_user_range(struct coredump_params *cprm, unsigned long start,
+ unsigned long len)
+{
+ unsigned long addr;
+
+ for (addr = start; addr < start + len; addr += PAGE_SIZE) {
+ struct page *page;
+ int stop;
+
+ /*
+ * To avoid having to allocate page tables for virtual address
+ * ranges that have never been used yet, and also to make it
+ * easy to generate sparse core files, use a helper that returns
+ * NULL when encountering an empty page table entry that would
+ * otherwise have been filled with the zero page.
+ */
+ page = get_dump_page(addr);
+ if (page) {
+ void *kaddr = kmap(page);
+
+ stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
+ kunmap(page);
+ put_page(page);
+ } else {
+ stop = !dump_skip(cprm, PAGE_SIZE);
+ }
+ if (stop)
+ return 0;
+ }
+ return 1;
+}
+#endif
+
int dump_align(struct coredump_params *cprm, int align)
{
unsigned mod = cprm->pos & (align - 1);
@@ -902,3 +936,183 @@ void dump_truncate(struct coredump_params *cprm)
}
}
EXPORT_SYMBOL(dump_truncate);
+
+/*
+ * The purpose of always_dump_vma() is to make sure that special kernel mappings
+ * that are useful for post-mortem analysis are included in every core dump.
+ * In that way we ensure that the core dump is fully interpretable later
+ * without matching up the same kernel and hardware config to see what PC values
+ * meant. These special mappings include - vDSO, vsyscall, and other
+ * architecture specific mappings
+ */
+static bool always_dump_vma(struct vm_area_struct *vma)
+{
+ /* Any vsyscall mappings? */
+ if (vma == get_gate_vma(vma->vm_mm))
+ return true;
+
+ /*
+ * Assume that all vmas with a .name op should always be dumped.
+ * If this changes, a new vm_ops field can easily be added.
+ */
+ if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
+ return true;
+
+ /*
+ * arch_vma_name() returns non-NULL for special architecture mappings,
+ * such as vDSO sections.
+ */
+ if (arch_vma_name(vma))
+ return true;
+
+ return false;
+}
+
+/*
+ * Decide how much of @vma's contents should be included in a core dump.
+ */
+static unsigned long vma_dump_size(struct vm_area_struct *vma,
+ unsigned long mm_flags)
+{
+#define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
+
+ /* always dump the vdso and vsyscall sections */
+ if (always_dump_vma(vma))
+ goto whole;
+
+ if (vma->vm_flags & VM_DONTDUMP)
+ return 0;
+
+ /* support for DAX */
+ if (vma_is_dax(vma)) {
+ if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
+ goto whole;
+ if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
+ goto whole;
+ return 0;
+ }
+
+ /* Hugetlb memory check */
+ if (is_vm_hugetlb_page(vma)) {
+ if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
+ goto whole;
+ if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
+ goto whole;
+ return 0;
+ }
+
+ /* Do not dump I/O mapped devices or special mappings */
+ if (vma->vm_flags & VM_IO)
+ return 0;
+
+ /* By default, dump shared memory if mapped from an anonymous file. */
+ if (vma->vm_flags & VM_SHARED) {
+ if (file_inode(vma->vm_file)->i_nlink == 0 ?
+ FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
+ goto whole;
+ return 0;
+ }
+
+ /* Dump segments that have been written to. */
+ if ((!IS_ENABLED(CONFIG_MMU) || vma->anon_vma) && FILTER(ANON_PRIVATE))
+ goto whole;
+ if (vma->vm_file == NULL)
+ return 0;
+
+ if (FILTER(MAPPED_PRIVATE))
+ goto whole;
+
+ /*
+ * If this is the beginning of an executable file mapping,
+ * dump the first page to aid in determining what was mapped here.
+ */
+ if (FILTER(ELF_HEADERS) &&
+ vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ) &&
+ (READ_ONCE(file_inode(vma->vm_file)->i_mode) & 0111) != 0)
+ return PAGE_SIZE;
+
+#undef FILTER
+
+ return 0;
+
+whole:
+ return vma->vm_end - vma->vm_start;
+}
+
+static struct vm_area_struct *first_vma(struct task_struct *tsk,
+ struct vm_area_struct *gate_vma)
+{
+ struct vm_area_struct *ret = tsk->mm->mmap;
+
+ if (ret)
+ return ret;
+ return gate_vma;
+}
+
+/*
+ * Helper function for iterating across a vma list. It ensures that the caller
+ * will visit `gate_vma' prior to terminating the search.
+ */
+static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
+ struct vm_area_struct *gate_vma)
+{
+ struct vm_area_struct *ret;
+
+ ret = this_vma->vm_next;
+ if (ret)
+ return ret;
+ if (this_vma == gate_vma)
+ return NULL;
+ return gate_vma;
+}
+
+/*
+ * Under the mmap_lock, take a snapshot of relevant information about the task's
+ * VMAs.
+ */
+int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count,
+ struct core_vma_metadata **vma_meta,
+ size_t *vma_data_size_ptr)
+{
+ struct vm_area_struct *vma, *gate_vma;
+ struct mm_struct *mm = current->mm;
+ int i;
+ size_t vma_data_size = 0;
+
+ /*
+ * Once the stack expansion code is fixed to not change VMA bounds
+ * under mmap_lock in read mode, this can be changed to take the
+ * mmap_lock in read mode.
+ */
+ if (mmap_write_lock_killable(mm))
+ return -EINTR;
+
+ gate_vma = get_gate_vma(mm);
+ *vma_count = mm->map_count + (gate_vma ? 1 : 0);
+
+ *vma_meta = kvmalloc_array(*vma_count, sizeof(**vma_meta), GFP_KERNEL);
+ if (!*vma_meta) {
+ mmap_write_unlock(mm);
+ return -ENOMEM;
+ }
+
+ for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
+ vma = next_vma(vma, gate_vma), i++) {
+ struct core_vma_metadata *m = (*vma_meta) + i;
+
+ m->start = vma->vm_start;
+ m->end = vma->vm_end;
+ m->flags = vma->vm_flags;
+ m->dump_size = vma_dump_size(vma, cprm->mm_flags);
+
+ vma_data_size += m->dump_size;
+ }
+
+ mmap_write_unlock(mm);
+
+ if (WARN_ON(i != *vma_count))
+ return -EFAULT;
+
+ *vma_data_size_ptr = vma_data_size;
+ return 0;
+}
diff --git a/fs/dax.c b/fs/dax.c
index 6ad346352a8c..5b47834f2e1b 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -559,8 +559,11 @@ fallback:
}
/**
- * dax_layout_busy_page - find first pinned page in @mapping
+ * dax_layout_busy_page_range - find first pinned page in @mapping
* @mapping: address space to scan for a page with ref count > 1
+ * @start: Starting offset. Page containing 'start' is included.
+ * @end: End offset. Page containing 'end' is included. If 'end' is LLONG_MAX,
+ * pages from 'start' till the end of file are included.
*
* DAX requires ZONE_DEVICE mapped pages. These pages are never
* 'onlined' to the page allocator so they are considered idle when
@@ -573,12 +576,15 @@ fallback:
* to be able to run unmap_mapping_range() and subsequently not race
* mapping_mapped() becoming true.
*/
-struct page *dax_layout_busy_page(struct address_space *mapping)
+struct page *dax_layout_busy_page_range(struct address_space *mapping,
+ loff_t start, loff_t end)
{
- XA_STATE(xas, &mapping->i_pages, 0);
void *entry;
unsigned int scanned = 0;
struct page *page = NULL;
+ pgoff_t start_idx = start >> PAGE_SHIFT;
+ pgoff_t end_idx;
+ XA_STATE(xas, &mapping->i_pages, start_idx);
/*
* In the 'limited' case get_user_pages() for dax is disabled.
@@ -589,6 +595,11 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
if (!dax_mapping(mapping) || !mapping_mapped(mapping))
return NULL;
+ /* If end == LLONG_MAX, all pages from start to till end of file */
+ if (end == LLONG_MAX)
+ end_idx = ULONG_MAX;
+ else
+ end_idx = end >> PAGE_SHIFT;
/*
* If we race get_user_pages_fast() here either we'll see the
* elevated page count in the iteration and wait, or
@@ -596,15 +607,15 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
* against is no longer mapped in the page tables and bail to the
* get_user_pages() slow path. The slow path is protected by
* pte_lock() and pmd_lock(). New references are not taken without
- * holding those locks, and unmap_mapping_range() will not zero the
+ * holding those locks, and unmap_mapping_pages() will not zero the
* pte or pmd without holding the respective lock, so we are
* guaranteed to either see new references or prevent new
* references from being established.
*/
- unmap_mapping_range(mapping, 0, 0, 0);
+ unmap_mapping_pages(mapping, start_idx, end_idx - start_idx + 1, 0);
xas_lock_irq(&xas);
- xas_for_each(&xas, entry, ULONG_MAX) {
+ xas_for_each(&xas, entry, end_idx) {
if (WARN_ON_ONCE(!xa_is_value(entry)))
continue;
if (unlikely(dax_is_locked(entry)))
@@ -625,6 +636,12 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
xas_unlock_irq(&xas);
return page;
}
+EXPORT_SYMBOL_GPL(dax_layout_busy_page_range);
+
+struct page *dax_layout_busy_page(struct address_space *mapping)
+{
+ return dax_layout_busy_page_range(mapping, 0, LLONG_MAX);
+}
EXPORT_SYMBOL_GPL(dax_layout_busy_page);
static int __dax_invalidate_entry(struct address_space *mapping,
diff --git a/fs/exec.c b/fs/exec.c
index 3f83e94bb0ba..547a2390baf5 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1001,11 +1001,24 @@ static int exec_mmap(struct mm_struct *mm)
}
task_lock(tsk);
- active_mm = tsk->active_mm;
membarrier_exec_mmap(mm);
- tsk->mm = mm;
+
+ local_irq_disable();
+ active_mm = tsk->active_mm;
tsk->active_mm = mm;
+ tsk->mm = mm;
+ /*
+ * This prevents preemption while active_mm is being loaded and
+ * it and mm are being updated, which could cause problems for
+ * lazy tlb mm refcounting when these are updated by context
+ * switches. Not all architectures can handle irqs off over
+ * activate_mm yet.
+ */
+ if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
+ local_irq_enable();
activate_mm(active_mm, mm);
+ if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
+ local_irq_enable();
tsk->mm->vmacache_seqnum = 0;
vmacache_flush(tsk);
task_unlock(tsk);
diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c
index bbd5e7e0632b..5b7ba8f71153 100644
--- a/fs/ext4/verity.c
+++ b/fs/ext4/verity.c
@@ -349,6 +349,7 @@ static struct page *ext4_read_merkle_tree_page(struct inode *inode,
pgoff_t index,
unsigned long num_ra_pages)
{
+ DEFINE_READAHEAD(ractl, NULL, inode->i_mapping, index);
struct page *page;
index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT;
@@ -358,8 +359,7 @@ static struct page *ext4_read_merkle_tree_page(struct inode *inode,
if (page)
put_page(page);
else if (num_ra_pages > 1)
- page_cache_readahead_unbounded(inode->i_mapping, NULL,
- index, num_ra_pages, 0);
+ page_cache_ra_unbounded(&ractl, num_ra_pages, 0);
page = read_mapping_page(inode->i_mapping, index, NULL);
}
return page;
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 217b290ae3a5..306413589827 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -160,7 +160,7 @@ static void *f2fs_acl_to_disk(struct f2fs_sb_info *sbi,
return (void *)f2fs_acl;
fail:
- kvfree(f2fs_acl);
+ kfree(f2fs_acl);
return ERR_PTR(-EINVAL);
}
@@ -190,7 +190,7 @@ static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type,
acl = NULL;
else
acl = ERR_PTR(retval);
- kvfree(value);
+ kfree(value);
return acl;
}
@@ -240,7 +240,7 @@ static int __f2fs_set_acl(struct inode *inode, int type,
error = f2fs_setxattr(inode, name_index, "", value, size, ipage, 0);
- kvfree(value);
+ kfree(value);
if (!error)
set_cached_acl(inode, type, acl);
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index ff807e14c891..023462e80e58 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -107,7 +107,7 @@ struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
return __get_meta_page(sbi, index, true);
}
-struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index)
+struct page *f2fs_get_meta_page_retry(struct f2fs_sb_info *sbi, pgoff_t index)
{
struct page *page;
int count = 0;
@@ -243,6 +243,8 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
blkno * NAT_ENTRY_PER_BLOCK);
break;
case META_SIT:
+ if (unlikely(blkno >= TOTAL_SEGS(sbi)))
+ goto out;
/* get sit block addr */
fio.new_blkaddr = current_sit_addr(sbi,
blkno * SIT_ENTRY_PER_BLOCK);
@@ -1047,8 +1049,12 @@ int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
get_pages(sbi, is_dir ?
F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
retry:
- if (unlikely(f2fs_cp_error(sbi)))
+ if (unlikely(f2fs_cp_error(sbi))) {
+ trace_f2fs_sync_dirty_inodes_exit(sbi->sb, is_dir,
+ get_pages(sbi, is_dir ?
+ F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
return -EIO;
+ }
spin_lock(&sbi->inode_lock[type]);
@@ -1619,11 +1625,16 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_flush_sit_entries(sbi, cpc);
+ /* save inmem log status */
+ f2fs_save_inmem_curseg(sbi);
+
err = do_checkpoint(sbi, cpc);
if (err)
f2fs_release_discard_addrs(sbi);
else
f2fs_clear_prefree_segments(sbi, cpc);
+
+ f2fs_restore_inmem_curseg(sbi);
stop:
unblock_operations(sbi);
stat_inc_cp_count(sbi->stat_info);
@@ -1654,7 +1665,7 @@ void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi)
}
sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
- NR_CURSEG_TYPE - __cp_payload(sbi)) *
+ NR_CURSEG_PERSIST_TYPE - __cp_payload(sbi)) *
F2FS_ORPHANS_PER_BLOCK;
}
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 1dfb126a0cb2..14262e0f1cd6 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -17,6 +17,33 @@
#include "node.h"
#include <trace/events/f2fs.h>
+static struct kmem_cache *cic_entry_slab;
+static struct kmem_cache *dic_entry_slab;
+
+static void *page_array_alloc(struct inode *inode, int nr)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ unsigned int size = sizeof(struct page *) * nr;
+
+ if (likely(size <= sbi->page_array_slab_size))
+ return kmem_cache_zalloc(sbi->page_array_slab, GFP_NOFS);
+ return f2fs_kzalloc(sbi, size, GFP_NOFS);
+}
+
+static void page_array_free(struct inode *inode, void *pages, int nr)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ unsigned int size = sizeof(struct page *) * nr;
+
+ if (!pages)
+ return;
+
+ if (likely(size <= sbi->page_array_slab_size))
+ kmem_cache_free(sbi->page_array_slab, pages);
+ else
+ kfree(pages);
+}
+
struct f2fs_compress_ops {
int (*init_compress_ctx)(struct compress_ctx *cc);
void (*destroy_compress_ctx)(struct compress_ctx *cc);
@@ -130,19 +157,16 @@ struct page *f2fs_compress_control_page(struct page *page)
int f2fs_init_compress_ctx(struct compress_ctx *cc)
{
- struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode);
-
- if (cc->nr_rpages)
+ if (cc->rpages)
return 0;
- cc->rpages = f2fs_kzalloc(sbi, sizeof(struct page *) <<
- cc->log_cluster_size, GFP_NOFS);
+ cc->rpages = page_array_alloc(cc->inode, cc->cluster_size);
return cc->rpages ? 0 : -ENOMEM;
}
void f2fs_destroy_compress_ctx(struct compress_ctx *cc)
{
- kfree(cc->rpages);
+ page_array_free(cc->inode, cc->rpages, cc->cluster_size);
cc->rpages = NULL;
cc->nr_rpages = 0;
cc->nr_cpages = 0;
@@ -382,16 +406,17 @@ static int zstd_init_decompress_ctx(struct decompress_io_ctx *dic)
ZSTD_DStream *stream;
void *workspace;
unsigned int workspace_size;
+ unsigned int max_window_size =
+ MAX_COMPRESS_WINDOW_SIZE(dic->log_cluster_size);
- workspace_size = ZSTD_DStreamWorkspaceBound(MAX_COMPRESS_WINDOW_SIZE);
+ workspace_size = ZSTD_DStreamWorkspaceBound(max_window_size);
workspace = f2fs_kvmalloc(F2FS_I_SB(dic->inode),
workspace_size, GFP_NOFS);
if (!workspace)
return -ENOMEM;
- stream = ZSTD_initDStream(MAX_COMPRESS_WINDOW_SIZE,
- workspace, workspace_size);
+ stream = ZSTD_initDStream(max_window_size, workspace, workspace_size);
if (!stream) {
printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_initDStream failed\n",
KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id,
@@ -554,13 +579,29 @@ static void f2fs_compress_free_page(struct page *page)
mempool_free(page, compress_page_pool);
}
+#define MAX_VMAP_RETRIES 3
+
+static void *f2fs_vmap(struct page **pages, unsigned int count)
+{
+ int i;
+ void *buf = NULL;
+
+ for (i = 0; i < MAX_VMAP_RETRIES; i++) {
+ buf = vm_map_ram(pages, count, -1);
+ if (buf)
+ break;
+ vm_unmap_aliases();
+ }
+ return buf;
+}
+
static int f2fs_compress_pages(struct compress_ctx *cc)
{
- struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode);
struct f2fs_inode_info *fi = F2FS_I(cc->inode);
const struct f2fs_compress_ops *cops =
f2fs_cops[fi->i_compress_algorithm];
- unsigned int max_len, nr_cpages;
+ unsigned int max_len, new_nr_cpages;
+ struct page **new_cpages;
int i, ret;
trace_f2fs_compress_pages_start(cc->inode, cc->cluster_idx,
@@ -575,8 +616,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
max_len = COMPRESS_HEADER_SIZE + cc->clen;
cc->nr_cpages = DIV_ROUND_UP(max_len, PAGE_SIZE);
- cc->cpages = f2fs_kzalloc(sbi, sizeof(struct page *) *
- cc->nr_cpages, GFP_NOFS);
+ cc->cpages = page_array_alloc(cc->inode, cc->nr_cpages);
if (!cc->cpages) {
ret = -ENOMEM;
goto destroy_compress_ctx;
@@ -590,13 +630,13 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
}
}
- cc->rbuf = vmap(cc->rpages, cc->cluster_size, VM_MAP, PAGE_KERNEL_RO);
+ cc->rbuf = f2fs_vmap(cc->rpages, cc->cluster_size);
if (!cc->rbuf) {
ret = -ENOMEM;
goto out_free_cpages;
}
- cc->cbuf = vmap(cc->cpages, cc->nr_cpages, VM_MAP, PAGE_KERNEL);
+ cc->cbuf = f2fs_vmap(cc->cpages, cc->nr_cpages);
if (!cc->cbuf) {
ret = -ENOMEM;
goto out_vunmap_rbuf;
@@ -618,16 +658,28 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
for (i = 0; i < COMPRESS_DATA_RESERVED_SIZE; i++)
cc->cbuf->reserved[i] = cpu_to_le32(0);
- nr_cpages = DIV_ROUND_UP(cc->clen + COMPRESS_HEADER_SIZE, PAGE_SIZE);
+ new_nr_cpages = DIV_ROUND_UP(cc->clen + COMPRESS_HEADER_SIZE, PAGE_SIZE);
+
+ /* Now we're going to cut unnecessary tail pages */
+ new_cpages = page_array_alloc(cc->inode, new_nr_cpages);
+ if (!new_cpages) {
+ ret = -ENOMEM;
+ goto out_vunmap_cbuf;
+ }
/* zero out any unused part of the last page */
memset(&cc->cbuf->cdata[cc->clen], 0,
- (nr_cpages * PAGE_SIZE) - (cc->clen + COMPRESS_HEADER_SIZE));
+ (new_nr_cpages * PAGE_SIZE) -
+ (cc->clen + COMPRESS_HEADER_SIZE));
- vunmap(cc->cbuf);
- vunmap(cc->rbuf);
+ vm_unmap_ram(cc->cbuf, cc->nr_cpages);
+ vm_unmap_ram(cc->rbuf, cc->cluster_size);
- for (i = nr_cpages; i < cc->nr_cpages; i++) {
+ for (i = 0; i < cc->nr_cpages; i++) {
+ if (i < new_nr_cpages) {
+ new_cpages[i] = cc->cpages[i];
+ continue;
+ }
f2fs_compress_free_page(cc->cpages[i]);
cc->cpages[i] = NULL;
}
@@ -635,22 +687,24 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
if (cops->destroy_compress_ctx)
cops->destroy_compress_ctx(cc);
- cc->nr_cpages = nr_cpages;
+ page_array_free(cc->inode, cc->cpages, cc->nr_cpages);
+ cc->cpages = new_cpages;
+ cc->nr_cpages = new_nr_cpages;
trace_f2fs_compress_pages_end(cc->inode, cc->cluster_idx,
cc->clen, ret);
return 0;
out_vunmap_cbuf:
- vunmap(cc->cbuf);
+ vm_unmap_ram(cc->cbuf, cc->nr_cpages);
out_vunmap_rbuf:
- vunmap(cc->rbuf);
+ vm_unmap_ram(cc->rbuf, cc->cluster_size);
out_free_cpages:
for (i = 0; i < cc->nr_cpages; i++) {
if (cc->cpages[i])
f2fs_compress_free_page(cc->cpages[i]);
}
- kfree(cc->cpages);
+ page_array_free(cc->inode, cc->cpages, cc->nr_cpages);
cc->cpages = NULL;
destroy_compress_ctx:
if (cops->destroy_compress_ctx)
@@ -677,7 +731,7 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity)
if (bio->bi_status || PageError(page))
dic->failed = true;
- if (refcount_dec_not_one(&dic->ref))
+ if (atomic_dec_return(&dic->pending_pages))
return;
trace_f2fs_decompress_pages_start(dic->inode, dic->cluster_idx,
@@ -689,8 +743,7 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity)
goto out_free_dic;
}
- dic->tpages = f2fs_kzalloc(sbi, sizeof(struct page *) *
- dic->cluster_size, GFP_NOFS);
+ dic->tpages = page_array_alloc(dic->inode, dic->cluster_size);
if (!dic->tpages) {
ret = -ENOMEM;
goto out_free_dic;
@@ -715,13 +768,13 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity)
goto out_free_dic;
}
- dic->rbuf = vmap(dic->tpages, dic->cluster_size, VM_MAP, PAGE_KERNEL);
+ dic->rbuf = f2fs_vmap(dic->tpages, dic->cluster_size);
if (!dic->rbuf) {
ret = -ENOMEM;
goto destroy_decompress_ctx;
}
- dic->cbuf = vmap(dic->cpages, dic->nr_cpages, VM_MAP, PAGE_KERNEL_RO);
+ dic->cbuf = f2fs_vmap(dic->cpages, dic->nr_cpages);
if (!dic->cbuf) {
ret = -ENOMEM;
goto out_vunmap_rbuf;
@@ -738,15 +791,15 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity)
ret = cops->decompress_pages(dic);
out_vunmap_cbuf:
- vunmap(dic->cbuf);
+ vm_unmap_ram(dic->cbuf, dic->nr_cpages);
out_vunmap_rbuf:
- vunmap(dic->rbuf);
+ vm_unmap_ram(dic->rbuf, dic->cluster_size);
destroy_decompress_ctx:
if (cops->destroy_decompress_ctx)
cops->destroy_decompress_ctx(dic);
out_free_dic:
if (verity)
- refcount_set(&dic->ref, dic->nr_cpages);
+ atomic_set(&dic->pending_pages, dic->nr_cpages);
if (!verity)
f2fs_decompress_end_io(dic->rpages, dic->cluster_size,
ret, false);
@@ -1029,6 +1082,7 @@ bool f2fs_compress_write_end(struct inode *inode, void *fsdata,
{
struct compress_ctx cc = {
+ .inode = inode,
.log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
.cluster_size = F2FS_I(inode)->i_cluster_size,
.rpages = fsdata,
@@ -1132,7 +1186,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
*/
down_read(&sbi->node_write);
} else if (!f2fs_trylock_op(sbi)) {
- return -EAGAIN;
+ goto out_free;
}
set_new_dnode(&dn, cc->inode, NULL, NULL, 0);
@@ -1155,15 +1209,14 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
fio.version = ni.version;
- cic = f2fs_kzalloc(sbi, sizeof(struct compress_io_ctx), GFP_NOFS);
+ cic = kmem_cache_zalloc(cic_entry_slab, GFP_NOFS);
if (!cic)
goto out_put_dnode;
cic->magic = F2FS_COMPRESSED_PAGE_MAGIC;
cic->inode = inode;
- refcount_set(&cic->ref, cc->nr_cpages);
- cic->rpages = f2fs_kzalloc(sbi, sizeof(struct page *) <<
- cc->log_cluster_size, GFP_NOFS);
+ atomic_set(&cic->pending_pages, cc->nr_cpages);
+ cic->rpages = page_array_alloc(cc->inode, cc->cluster_size);
if (!cic->rpages)
goto out_put_cic;
@@ -1257,11 +1310,13 @@ unlock_continue:
spin_unlock(&fi->i_size_lock);
f2fs_put_rpages(cc);
+ page_array_free(cc->inode, cc->cpages, cc->nr_cpages);
+ cc->cpages = NULL;
f2fs_destroy_compress_ctx(cc);
return 0;
out_destroy_crypt:
- kfree(cic->rpages);
+ page_array_free(cc->inode, cic->rpages, cc->cluster_size);
for (--i; i >= 0; i--)
fscrypt_finalize_bounce_page(&cc->cpages[i]);
@@ -1271,7 +1326,7 @@ out_destroy_crypt:
f2fs_put_page(cc->cpages[i], 1);
}
out_put_cic:
- kfree(cic);
+ kmem_cache_free(cic_entry_slab, cic);
out_put_dnode:
f2fs_put_dnode(&dn);
out_unlock_op:
@@ -1279,6 +1334,9 @@ out_unlock_op:
up_read(&sbi->node_write);
else
f2fs_unlock_op(sbi);
+out_free:
+ page_array_free(cc->inode, cc->cpages, cc->nr_cpages);
+ cc->cpages = NULL;
return -EAGAIN;
}
@@ -1296,7 +1354,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page)
dec_page_count(sbi, F2FS_WB_DATA);
- if (refcount_dec_not_one(&cic->ref))
+ if (atomic_dec_return(&cic->pending_pages))
return;
for (i = 0; i < cic->nr_rpages; i++) {
@@ -1305,8 +1363,8 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page)
end_page_writeback(cic->rpages[i]);
}
- kfree(cic->rpages);
- kfree(cic);
+ page_array_free(cic->inode, cic->rpages, cic->nr_rpages);
+ kmem_cache_free(cic_entry_slab, cic);
}
static int f2fs_write_raw_pages(struct compress_ctx *cc,
@@ -1388,9 +1446,6 @@ int f2fs_write_multi_pages(struct compress_ctx *cc,
struct writeback_control *wbc,
enum iostat_type io_type)
{
- struct f2fs_inode_info *fi = F2FS_I(cc->inode);
- const struct f2fs_compress_ops *cops =
- f2fs_cops[fi->i_compress_algorithm];
int err;
*submitted = 0;
@@ -1405,9 +1460,6 @@ int f2fs_write_multi_pages(struct compress_ctx *cc,
err = f2fs_write_compressed_pages(cc, submitted,
wbc, io_type);
- cops->destroy_compress_ctx(cc);
- kfree(cc->cpages);
- cc->cpages = NULL;
if (!err)
return 0;
f2fs_bug_on(F2FS_I_SB(cc->inode), err != -EAGAIN);
@@ -1424,25 +1476,23 @@ destroy_out:
struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc)
{
- struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode);
struct decompress_io_ctx *dic;
pgoff_t start_idx = start_idx_of_cluster(cc);
int i;
- dic = f2fs_kzalloc(sbi, sizeof(struct decompress_io_ctx), GFP_NOFS);
+ dic = kmem_cache_zalloc(dic_entry_slab, GFP_NOFS);
if (!dic)
return ERR_PTR(-ENOMEM);
- dic->rpages = f2fs_kzalloc(sbi, sizeof(struct page *) <<
- cc->log_cluster_size, GFP_NOFS);
+ dic->rpages = page_array_alloc(cc->inode, cc->cluster_size);
if (!dic->rpages) {
- kfree(dic);
+ kmem_cache_free(dic_entry_slab, dic);
return ERR_PTR(-ENOMEM);
}
dic->magic = F2FS_COMPRESSED_PAGE_MAGIC;
dic->inode = cc->inode;
- refcount_set(&dic->ref, cc->nr_cpages);
+ atomic_set(&dic->pending_pages, cc->nr_cpages);
dic->cluster_idx = cc->cluster_idx;
dic->cluster_size = cc->cluster_size;
dic->log_cluster_size = cc->log_cluster_size;
@@ -1453,8 +1503,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc)
dic->rpages[i] = cc->rpages[i];
dic->nr_rpages = cc->cluster_size;
- dic->cpages = f2fs_kzalloc(sbi, sizeof(struct page *) *
- dic->nr_cpages, GFP_NOFS);
+ dic->cpages = page_array_alloc(dic->inode, dic->nr_cpages);
if (!dic->cpages)
goto out_free;
@@ -1489,7 +1538,7 @@ void f2fs_free_dic(struct decompress_io_ctx *dic)
continue;
f2fs_compress_free_page(dic->tpages[i]);
}
- kfree(dic->tpages);
+ page_array_free(dic->inode, dic->tpages, dic->cluster_size);
}
if (dic->cpages) {
@@ -1498,11 +1547,11 @@ void f2fs_free_dic(struct decompress_io_ctx *dic)
continue;
f2fs_compress_free_page(dic->cpages[i]);
}
- kfree(dic->cpages);
+ page_array_free(dic->inode, dic->cpages, dic->nr_cpages);
}
- kfree(dic->rpages);
- kfree(dic);
+ page_array_free(dic->inode, dic->rpages, dic->nr_rpages);
+ kmem_cache_free(dic_entry_slab, dic);
}
void f2fs_decompress_end_io(struct page **rpages,
@@ -1530,3 +1579,76 @@ unlock:
unlock_page(rpage);
}
}
+
+int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi)
+{
+ dev_t dev = sbi->sb->s_bdev->bd_dev;
+ char slab_name[32];
+
+ sprintf(slab_name, "f2fs_page_array_entry-%u:%u", MAJOR(dev), MINOR(dev));
+
+ sbi->page_array_slab_size = sizeof(struct page *) <<
+ F2FS_OPTION(sbi).compress_log_size;
+
+ sbi->page_array_slab = f2fs_kmem_cache_create(slab_name,
+ sbi->page_array_slab_size);
+ if (!sbi->page_array_slab)
+ return -ENOMEM;
+ return 0;
+}
+
+void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi)
+{
+ kmem_cache_destroy(sbi->page_array_slab);
+}
+
+static int __init f2fs_init_cic_cache(void)
+{
+ cic_entry_slab = f2fs_kmem_cache_create("f2fs_cic_entry",
+ sizeof(struct compress_io_ctx));
+ if (!cic_entry_slab)
+ return -ENOMEM;
+ return 0;
+}
+
+static void f2fs_destroy_cic_cache(void)
+{
+ kmem_cache_destroy(cic_entry_slab);
+}
+
+static int __init f2fs_init_dic_cache(void)
+{
+ dic_entry_slab = f2fs_kmem_cache_create("f2fs_dic_entry",
+ sizeof(struct decompress_io_ctx));
+ if (!dic_entry_slab)
+ return -ENOMEM;
+ return 0;
+}
+
+static void f2fs_destroy_dic_cache(void)
+{
+ kmem_cache_destroy(dic_entry_slab);
+}
+
+int __init f2fs_init_compress_cache(void)
+{
+ int err;
+
+ err = f2fs_init_cic_cache();
+ if (err)
+ goto out;
+ err = f2fs_init_dic_cache();
+ if (err)
+ goto free_cic;
+ return 0;
+free_cic:
+ f2fs_destroy_cic_cache();
+out:
+ return -ENOMEM;
+}
+
+void f2fs_destroy_compress_cache(void)
+{
+ f2fs_destroy_dic_cache();
+ f2fs_destroy_cic_cache();
+}
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 73683e58a08d..be4da52604ed 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -202,7 +202,7 @@ static void f2fs_verify_bio(struct bio *bio)
dic = (struct decompress_io_ctx *)page_private(page);
if (dic) {
- if (refcount_dec_not_one(&dic->ref))
+ if (atomic_dec_return(&dic->pending_pages))
continue;
f2fs_verify_pages(dic->rpages,
dic->cluster_size);
@@ -517,7 +517,7 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
zero_user_segment(page, 0, PAGE_SIZE);
SetPagePrivate(page);
- set_page_private(page, (unsigned long)DUMMY_WRITTEN_PAGE);
+ set_page_private(page, DUMMY_WRITTEN_PAGE);
lock_page(page);
if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
f2fs_bug_on(sbi, 1);
@@ -1416,7 +1416,7 @@ alloc:
set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
old_blkaddr = dn->data_blkaddr;
f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr,
- &sum, seg_type, NULL);
+ &sum, seg_type, NULL);
if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
invalidate_mapping_pages(META_MAPPING(sbi),
old_blkaddr, old_blkaddr);
@@ -1803,10 +1803,6 @@ static int get_data_block_dio(struct inode *inode, sector_t iblock,
static int get_data_block_bmap(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
- /* Block number less than F2FS MAX BLOCKS */
- if (unlikely(iblock >= F2FS_I_SB(inode)->max_file_blocks))
- return -EFBIG;
-
return __get_data_block(inode, iblock, bh_result, create,
F2FS_GET_BLOCK_BMAP, NULL,
NO_CHECK_TYPE, create);
@@ -2272,8 +2268,8 @@ submit_and_realloc:
if (IS_ERR(bio)) {
ret = PTR_ERR(bio);
dic->failed = true;
- if (refcount_sub_and_test(dic->nr_cpages - i,
- &dic->ref)) {
+ if (!atomic_sub_return(dic->nr_cpages - i,
+ &dic->pending_pages)) {
f2fs_decompress_end_io(dic->rpages,
cc->cluster_size, true,
false);
@@ -3133,6 +3129,8 @@ next:
retry = 0;
}
}
+ if (f2fs_compressed_file(inode))
+ f2fs_destroy_compress_ctx(&cc);
#endif
if (retry) {
index = 0;
@@ -3574,7 +3572,7 @@ static void f2fs_dio_end_io(struct bio *bio)
bio->bi_private = dio->orig_private;
bio->bi_end_io = dio->orig_end_io;
- kvfree(dio);
+ kfree(dio);
bio_endio(bio);
}
@@ -3673,12 +3671,18 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
err);
if (!do_opu)
set_inode_flag(inode, FI_UPDATE_WRITE);
+ } else if (err == -EIOCBQUEUED) {
+ f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
+ count - iov_iter_count(iter));
} else if (err < 0) {
f2fs_write_failed(mapping, offset + count);
}
} else {
if (err > 0)
f2fs_update_iostat(sbi, APP_DIRECT_READ_IO, err);
+ else if (err == -EIOCBQUEUED)
+ f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_READ_IO,
+ count - iov_iter_count(iter));
}
out:
@@ -3807,11 +3811,16 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
filemap_write_and_wait(mapping);
- if (f2fs_compressed_file(inode))
- blknr = f2fs_bmap_compress(inode, block);
+ /* Block number less than F2FS MAX BLOCKS */
+ if (unlikely(block >= F2FS_I_SB(inode)->max_file_blocks))
+ goto out;
- if (!get_data_block_bmap(inode, block, &tmp, 0))
- blknr = tmp.b_blocknr;
+ if (f2fs_compressed_file(inode)) {
+ blknr = f2fs_bmap_compress(inode, block);
+ } else {
+ if (!get_data_block_bmap(inode, block, &tmp, 0))
+ blknr = tmp.b_blocknr;
+ }
out:
trace_f2fs_bmap(inode, block, blknr);
return blknr;
@@ -3874,6 +3883,83 @@ int f2fs_migrate_page(struct address_space *mapping,
#endif
#ifdef CONFIG_SWAP
+static int check_swap_activate_fast(struct swap_info_struct *sis,
+ struct file *swap_file, sector_t *span)
+{
+ struct address_space *mapping = swap_file->f_mapping;
+ struct inode *inode = mapping->host;
+ sector_t cur_lblock;
+ sector_t last_lblock;
+ sector_t pblock;
+ sector_t lowest_pblock = -1;
+ sector_t highest_pblock = 0;
+ int nr_extents = 0;
+ unsigned long nr_pblocks;
+ unsigned long len;
+ int ret;
+
+ /*
+ * Map all the blocks into the extent list. This code doesn't try
+ * to be very smart.
+ */
+ cur_lblock = 0;
+ last_lblock = logical_to_blk(inode, i_size_read(inode));
+ len = i_size_read(inode);
+
+ while (cur_lblock <= last_lblock && cur_lblock < sis->max) {
+ struct buffer_head map_bh;
+ pgoff_t next_pgofs;
+
+ cond_resched();
+
+ memset(&map_bh, 0, sizeof(struct buffer_head));
+ map_bh.b_size = len - cur_lblock;
+
+ ret = get_data_block(inode, cur_lblock, &map_bh, 0,
+ F2FS_GET_BLOCK_FIEMAP, &next_pgofs);
+ if (ret)
+ goto err_out;
+
+ /* hole */
+ if (!buffer_mapped(&map_bh))
+ goto err_out;
+
+ pblock = map_bh.b_blocknr;
+ nr_pblocks = logical_to_blk(inode, map_bh.b_size);
+
+ if (cur_lblock + nr_pblocks >= sis->max)
+ nr_pblocks = sis->max - cur_lblock;
+
+ if (cur_lblock) { /* exclude the header page */
+ if (pblock < lowest_pblock)
+ lowest_pblock = pblock;
+ if (pblock + nr_pblocks - 1 > highest_pblock)
+ highest_pblock = pblock + nr_pblocks - 1;
+ }
+
+ /*
+ * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks
+ */
+ ret = add_swap_extent(sis, cur_lblock, nr_pblocks, pblock);
+ if (ret < 0)
+ goto out;
+ nr_extents += ret;
+ cur_lblock += nr_pblocks;
+ }
+ ret = nr_extents;
+ *span = 1 + highest_pblock - lowest_pblock;
+ if (cur_lblock == 0)
+ cur_lblock = 1; /* force Empty message */
+ sis->max = cur_lblock;
+ sis->pages = cur_lblock - 1;
+ sis->highest_bit = cur_lblock - 1;
+out:
+ return ret;
+err_out:
+ pr_err("swapon: swapfile has holes\n");
+ return -EINVAL;
+}
+
/* Copied from generic_swapfile_activate() to check any holes */
static int check_swap_activate(struct swap_info_struct *sis,
struct file *swap_file, sector_t *span)
@@ -3890,6 +3976,9 @@ static int check_swap_activate(struct swap_info_struct *sis,
int nr_extents = 0;
int ret;
+ if (PAGE_SIZE == F2FS_BLKSIZE)
+ return check_swap_activate_fast(sis, swap_file, span);
+
blkbits = inode->i_blkbits;
blocks_per_page = PAGE_SIZE >> blkbits;
@@ -3989,7 +4078,7 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
if (ret)
return ret;
- if (f2fs_disable_compressed_file(inode))
+ if (!f2fs_disable_compressed_file(inode))
return -EINVAL;
ret = check_swap_activate(sis, file, span);
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 4276c0f79beb..a8357fd4f5fa 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -131,7 +131,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->inline_inode = atomic_read(&sbi->inline_inode);
si->inline_dir = atomic_read(&sbi->inline_dir);
si->compr_inode = atomic_read(&sbi->compr_inode);
- si->compr_blocks = atomic_read(&sbi->compr_blocks);
+ si->compr_blocks = atomic64_read(&sbi->compr_blocks);
si->append = sbi->im[APPEND_INO].ino_num;
si->update = sbi->im[UPDATE_INO].ino_num;
si->orphans = sbi->im[ORPHAN_INO].ino_num;
@@ -164,7 +164,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
* 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
/ 2;
si->util_invalid = 50 - si->util_free - si->util_valid;
- for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_NODE; i++) {
+ for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
struct curseg_info *curseg = CURSEG_I(sbi, i);
si->curseg[i] = curseg->segno;
si->cursec[i] = GET_SEC_FROM_SEG(sbi, curseg->segno);
@@ -342,7 +342,7 @@ static int stat_show(struct seq_file *s, void *v)
si->inline_inode);
seq_printf(s, " - Inline_dentry Inode: %u\n",
si->inline_dir);
- seq_printf(s, " - Compressed Inode: %u, Blocks: %u\n",
+ seq_printf(s, " - Compressed Inode: %u, Blocks: %llu\n",
si->compr_inode, si->compr_blocks);
seq_printf(s, " - Orphan/Append/Update Inode: %u, %u, %u\n",
si->orphans, si->append, si->update);
@@ -393,6 +393,14 @@ static int stat_show(struct seq_file *s, void *v)
si->dirty_seg[CURSEG_COLD_NODE],
si->full_seg[CURSEG_COLD_NODE],
si->valid_blks[CURSEG_COLD_NODE]);
+ seq_printf(s, " - Pinned file: %8d %8d %8d\n",
+ si->curseg[CURSEG_COLD_DATA_PINNED],
+ si->cursec[CURSEG_COLD_DATA_PINNED],
+ si->curzone[CURSEG_COLD_DATA_PINNED]);
+ seq_printf(s, " - ATGC data: %8d %8d %8d\n",
+ si->curseg[CURSEG_ALL_DATA_ATGC],
+ si->cursec[CURSEG_ALL_DATA_ATGC],
+ si->curzone[CURSEG_ALL_DATA_ATGC]);
seq_printf(s, "\n - Valid: %d\n - Dirty: %d\n",
si->main_area_segs - si->dirty_count -
si->prefree_count - si->free_segs,
@@ -542,7 +550,7 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
atomic_set(&sbi->inline_inode, 0);
atomic_set(&sbi->inline_dir, 0);
atomic_set(&sbi->compr_inode, 0);
- atomic_set(&sbi->compr_blocks, 0);
+ atomic64_set(&sbi->compr_blocks, 0);
atomic_set(&sbi->inplace_count, 0);
for (i = META_CP; i < META_MAX; i++)
atomic_set(&sbi->meta_count[i], 0);
@@ -566,7 +574,7 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi)
list_del(&si->stat_list);
mutex_unlock(&f2fs_stat_mutex);
- kvfree(si);
+ kfree(si);
}
void __init f2fs_create_root_stats(void)
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 53fbc4dd6e48..4b9ef8bbfa4a 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -75,21 +75,22 @@ int f2fs_init_casefolded_name(const struct inode *dir,
struct f2fs_filename *fname)
{
#ifdef CONFIG_UNICODE
- struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
+ struct super_block *sb = dir->i_sb;
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
if (IS_CASEFOLDED(dir)) {
fname->cf_name.name = f2fs_kmalloc(sbi, F2FS_NAME_LEN,
GFP_NOFS);
if (!fname->cf_name.name)
return -ENOMEM;
- fname->cf_name.len = utf8_casefold(sbi->s_encoding,
+ fname->cf_name.len = utf8_casefold(sb->s_encoding,
fname->usr_fname,
fname->cf_name.name,
F2FS_NAME_LEN);
if ((int)fname->cf_name.len <= 0) {
kfree(fname->cf_name.name);
fname->cf_name.name = NULL;
- if (f2fs_has_strict_mode(sbi))
+ if (sb_has_strict_encoding(sb))
return -EINVAL;
/* fall back to treating name as opaque byte sequence */
}
@@ -190,21 +191,15 @@ static unsigned long dir_block_index(unsigned int level,
static struct f2fs_dir_entry *find_in_block(struct inode *dir,
struct page *dentry_page,
const struct f2fs_filename *fname,
- int *max_slots,
- struct page **res_page)
+ int *max_slots)
{
struct f2fs_dentry_block *dentry_blk;
- struct f2fs_dir_entry *de;
struct f2fs_dentry_ptr d;
dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page);
make_dentry_ptr_block(dir, &d, dentry_blk);
- de = f2fs_find_target_dentry(&d, fname, max_slots);
- if (de)
- *res_page = dentry_page;
-
- return de;
+ return f2fs_find_target_dentry(&d, fname, max_slots);
}
#ifdef CONFIG_UNICODE
@@ -215,8 +210,8 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir,
static bool f2fs_match_ci_name(const struct inode *dir, const struct qstr *name,
const u8 *de_name, u32 de_name_len)
{
- const struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
- const struct unicode_map *um = sbi->s_encoding;
+ const struct super_block *sb = dir->i_sb;
+ const struct unicode_map *um = sb->s_encoding;
struct qstr entry = QSTR_INIT(de_name, de_name_len);
int res;
@@ -226,7 +221,7 @@ static bool f2fs_match_ci_name(const struct inode *dir, const struct qstr *name,
* In strict mode, ignore invalid names. In non-strict mode,
* fall back to treating them as opaque byte sequences.
*/
- if (f2fs_has_strict_mode(sbi) || name->len != entry.len)
+ if (sb_has_strict_encoding(sb) || name->len != entry.len)
return false;
return !memcmp(name->name, entry.name, name->len);
}
@@ -330,10 +325,11 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
}
}
- de = find_in_block(dir, dentry_page, fname, &max_slots,
- res_page);
- if (de)
+ de = find_in_block(dir, dentry_page, fname, &max_slots);
+ if (de) {
+ *res_page = dentry_page;
break;
+ }
if (max_slots >= s)
room = true;
@@ -357,16 +353,15 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
unsigned int max_depth;
unsigned int level;
+ *res_page = NULL;
+
if (f2fs_has_inline_dentry(dir)) {
- *res_page = NULL;
de = f2fs_find_in_inline_dir(dir, fname, res_page);
goto out;
}
- if (npages == 0) {
- *res_page = NULL;
+ if (npages == 0)
goto out;
- }
max_depth = F2FS_I(dir)->i_current_depth;
if (unlikely(max_depth > MAX_DIR_HASH_DEPTH)) {
@@ -377,7 +372,6 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
}
for (level = 0; level < max_depth; level++) {
- *res_page = NULL;
de = find_in_level(dir, level, fname, res_page);
if (de || IS_ERR(*res_page))
break;
@@ -1107,75 +1101,8 @@ const struct file_operations f2fs_dir_operations = {
};
#ifdef CONFIG_UNICODE
-static int f2fs_d_compare(const struct dentry *dentry, unsigned int len,
- const char *str, const struct qstr *name)
-{
- const struct dentry *parent = READ_ONCE(dentry->d_parent);
- const struct inode *dir = READ_ONCE(parent->d_inode);
- const struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
- struct qstr entry = QSTR_INIT(str, len);
- char strbuf[DNAME_INLINE_LEN];
- int res;
-
- if (!dir || !IS_CASEFOLDED(dir))
- goto fallback;
-
- /*
- * If the dentry name is stored in-line, then it may be concurrently
- * modified by a rename. If this happens, the VFS will eventually retry
- * the lookup, so it doesn't matter what ->d_compare() returns.
- * However, it's unsafe to call utf8_strncasecmp() with an unstable
- * string. Therefore, we have to copy the name into a temporary buffer.
- */
- if (len <= DNAME_INLINE_LEN - 1) {
- memcpy(strbuf, str, len);
- strbuf[len] = 0;
- entry.name = strbuf;
- /* prevent compiler from optimizing out the temporary buffer */
- barrier();
- }
-
- res = utf8_strncasecmp(sbi->s_encoding, name, &entry);
- if (res >= 0)
- return res;
-
- if (f2fs_has_strict_mode(sbi))
- return -EINVAL;
-fallback:
- if (len != name->len)
- return 1;
- return !!memcmp(str, name->name, len);
-}
-
-static int f2fs_d_hash(const struct dentry *dentry, struct qstr *str)
-{
- struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
- const struct unicode_map *um = sbi->s_encoding;
- const struct inode *inode = READ_ONCE(dentry->d_inode);
- unsigned char *norm;
- int len, ret = 0;
-
- if (!inode || !IS_CASEFOLDED(inode))
- return 0;
-
- norm = f2fs_kmalloc(sbi, PATH_MAX, GFP_ATOMIC);
- if (!norm)
- return -ENOMEM;
-
- len = utf8_casefold(um, str, norm, PATH_MAX);
- if (len < 0) {
- if (f2fs_has_strict_mode(sbi))
- ret = -EINVAL;
- goto out;
- }
- str->hash = full_name_hash(dentry, norm, len);
-out:
- kvfree(norm);
- return ret;
-}
-
const struct dentry_operations f2fs_dentry_ops = {
- .d_hash = f2fs_d_hash,
- .d_compare = f2fs_d_compare,
+ .d_hash = generic_ci_d_hash,
+ .d_compare = generic_ci_d_compare,
};
#endif
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 686c68b98610..3ebf976a682d 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -58,6 +58,29 @@ struct rb_entry *f2fs_lookup_rb_tree(struct rb_root_cached *root,
return re;
}
+struct rb_node **f2fs_lookup_rb_tree_ext(struct f2fs_sb_info *sbi,
+ struct rb_root_cached *root,
+ struct rb_node **parent,
+ unsigned long long key, bool *leftmost)
+{
+ struct rb_node **p = &root->rb_root.rb_node;
+ struct rb_entry *re;
+
+ while (*p) {
+ *parent = *p;
+ re = rb_entry(*parent, struct rb_entry, rb_node);
+
+ if (key < re->key) {
+ p = &(*p)->rb_left;
+ } else {
+ p = &(*p)->rb_right;
+ *leftmost = false;
+ }
+ }
+
+ return p;
+}
+
struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
struct rb_root_cached *root,
struct rb_node **parent,
@@ -166,7 +189,7 @@ lookup_neighbors:
}
bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
- struct rb_root_cached *root)
+ struct rb_root_cached *root, bool check_key)
{
#ifdef CONFIG_F2FS_CHECK_FS
struct rb_node *cur = rb_first_cached(root), *next;
@@ -183,13 +206,23 @@ bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
cur_re = rb_entry(cur, struct rb_entry, rb_node);
next_re = rb_entry(next, struct rb_entry, rb_node);
+ if (check_key) {
+ if (cur_re->key > next_re->key) {
+ f2fs_info(sbi, "inconsistent rbtree, "
+ "cur(%llu) next(%llu)",
+ cur_re->key, next_re->key);
+ return false;
+ }
+ goto next;
+ }
+
if (cur_re->ofs + cur_re->len > next_re->ofs) {
f2fs_info(sbi, "inconsistent rbtree, cur(%u, %u) next(%u, %u)",
cur_re->ofs, cur_re->len,
next_re->ofs, next_re->len);
return false;
}
-
+next:
cur = next;
}
#endif
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 7c089ff7ff94..cb700d797296 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -98,6 +98,7 @@ extern const char *f2fs_fault_name[FAULT_MAX];
#define F2FS_MOUNT_RESERVE_ROOT 0x01000000
#define F2FS_MOUNT_DISABLE_CHECKPOINT 0x02000000
#define F2FS_MOUNT_NORECOVERY 0x04000000
+#define F2FS_MOUNT_ATGC 0x08000000
#define F2FS_OPTION(sbi) ((sbi)->mount_opt)
#define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option)
@@ -612,8 +613,13 @@ enum {
struct rb_entry {
struct rb_node rb_node; /* rb node located in rb-tree */
- unsigned int ofs; /* start offset of the entry */
- unsigned int len; /* length of the entry */
+ union {
+ struct {
+ unsigned int ofs; /* start offset of the entry */
+ unsigned int len; /* length of the entry */
+ };
+ unsigned long long key; /* 64-bits key */
+ } __packed;
};
struct extent_info {
@@ -801,7 +807,7 @@ struct f2fs_inode_info {
struct timespec64 i_disk_time[4];/* inode disk times */
/* for file compress */
- u64 i_compr_blocks; /* # of compressed blocks */
+ atomic_t i_compr_blocks; /* # of compressed blocks */
unsigned char i_compress_algorithm; /* algorithm type */
unsigned char i_log_cluster_size; /* log of cluster size */
unsigned int i_cluster_size; /* cluster size */
@@ -973,7 +979,9 @@ static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode,
*/
#define NR_CURSEG_DATA_TYPE (3)
#define NR_CURSEG_NODE_TYPE (3)
-#define NR_CURSEG_TYPE (NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
+#define NR_CURSEG_INMEM_TYPE (2)
+#define NR_CURSEG_PERSIST_TYPE (NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
+#define NR_CURSEG_TYPE (NR_CURSEG_INMEM_TYPE + NR_CURSEG_PERSIST_TYPE)
enum {
CURSEG_HOT_DATA = 0, /* directory entry blocks */
@@ -982,8 +990,11 @@ enum {
CURSEG_HOT_NODE, /* direct node blocks of directory files */
CURSEG_WARM_NODE, /* direct node blocks of normal files */
CURSEG_COLD_NODE, /* indirect node blocks */
- NO_CHECK_TYPE,
- CURSEG_COLD_DATA_PINNED,/* cold data for pinned file */
+ NR_PERSISTENT_LOG, /* number of persistent log */
+ CURSEG_COLD_DATA_PINNED = NR_PERSISTENT_LOG,
+ /* pinned file that needs consecutive block address */
+ CURSEG_ALL_DATA_ATGC, /* SSR alloctor in hot/warm/cold data area */
+ NO_CHECK_TYPE, /* number of persistent & inmem log */
};
struct flush_cmd {
@@ -1209,6 +1220,7 @@ struct f2fs_dev_info {
#ifdef CONFIG_BLK_DEV_ZONED
unsigned int nr_blkz; /* Total number of zones */
unsigned long *blkz_seq; /* Bitmap indicating sequential zones */
+ block_t *zone_capacity_blocks; /* Array of zone capacity in blks */
#endif
};
@@ -1228,6 +1240,18 @@ struct inode_management {
unsigned long ino_num; /* number of entries */
};
+/* for GC_AT */
+struct atgc_management {
+ bool atgc_enabled; /* ATGC is enabled or not */
+ struct rb_root_cached root; /* root of victim rb-tree */
+ struct list_head victim_list; /* linked with all victim entries */
+ unsigned int victim_count; /* victim count in rb-tree */
+ unsigned int candidate_ratio; /* candidate ratio */
+ unsigned int max_candidate_count; /* max candidate count */
+ unsigned int age_weight; /* age weight, vblock_weight = 100 - age_weight */
+ unsigned long long age_threshold; /* age threshold */
+};
+
/* For s_flag in struct f2fs_sb_info */
enum {
SBI_IS_DIRTY, /* dirty flag for checkpoint */
@@ -1260,6 +1284,7 @@ enum {
GC_NORMAL,
GC_IDLE_CB,
GC_IDLE_GREEDY,
+ GC_IDLE_AT,
GC_URGENT_HIGH,
GC_URGENT_LOW,
};
@@ -1303,9 +1328,9 @@ enum fsync_mode {
#define DUMMY_WRITTEN_PAGE ((unsigned long)-2)
#define IS_ATOMIC_WRITTEN_PAGE(page) \
- (page_private(page) == (unsigned long)ATOMIC_WRITTEN_PAGE)
+ (page_private(page) == ATOMIC_WRITTEN_PAGE)
#define IS_DUMMY_WRITTEN_PAGE(page) \
- (page_private(page) == (unsigned long)DUMMY_WRITTEN_PAGE)
+ (page_private(page) == DUMMY_WRITTEN_PAGE)
#ifdef CONFIG_F2FS_IO_TRACE
#define IS_IO_TRACED_PAGE(page) \
@@ -1359,7 +1384,7 @@ struct compress_io_ctx {
struct inode *inode; /* inode the context belong to */
struct page **rpages; /* pages store raw data in cluster */
unsigned int nr_rpages; /* total page number in rpages */
- refcount_t ref; /* referrence count of raw page */
+ atomic_t pending_pages; /* in-flight compressed page count */
};
/* decompress io context for read IO path */
@@ -1378,7 +1403,7 @@ struct decompress_io_ctx {
struct compress_data *cbuf; /* virtual mapped address on cpages */
size_t rlen; /* valid data length in rbuf */
size_t clen; /* valid data length in cbuf */
- refcount_t ref; /* referrence count of compressed page */
+ atomic_t pending_pages; /* in-flight compressed page count */
bool failed; /* indicate IO error during decompression */
void *private; /* payload buffer for specified decompression algorithm */
void *private2; /* extra payload buffer */
@@ -1387,7 +1412,7 @@ struct decompress_io_ctx {
#define NULL_CLUSTER ((unsigned int)(~0))
#define MIN_COMPRESS_LOG_SIZE 2
#define MAX_COMPRESS_LOG_SIZE 8
-#define MAX_COMPRESS_WINDOW_SIZE ((PAGE_SIZE) << MAX_COMPRESS_LOG_SIZE)
+#define MAX_COMPRESS_WINDOW_SIZE(log_size) ((PAGE_SIZE) << (log_size))
struct f2fs_sb_info {
struct super_block *sb; /* pointer to VFS super block */
@@ -1397,10 +1422,6 @@ struct f2fs_sb_info {
int valid_super_block; /* valid super block no */
unsigned long s_flag; /* flags for sbi */
struct mutex writepages; /* mutex for writepages() */
-#ifdef CONFIG_UNICODE
- struct unicode_map *s_encoding;
- __u16 s_encoding_flags;
-#endif
#ifdef CONFIG_BLK_DEV_ZONED
unsigned int blocks_per_blkz; /* F2FS blocks per zone */
@@ -1508,6 +1529,7 @@ struct f2fs_sb_info {
* race between GC and GC or CP
*/
struct f2fs_gc_kthread *gc_thread; /* GC thread */
+ struct atgc_management am; /* atgc management */
unsigned int cur_victim_sec; /* current victim section num */
unsigned int gc_mode; /* current GC state */
unsigned int next_victim_seg[2]; /* next segment in victim section */
@@ -1544,7 +1566,7 @@ struct f2fs_sb_info {
atomic_t inline_inode; /* # of inline_data inodes */
atomic_t inline_dir; /* # of inline_dentry inodes */
atomic_t compr_inode; /* # of compressed inodes */
- atomic_t compr_blocks; /* # of compressed blocks */
+ atomic64_t compr_blocks; /* # of compressed blocks */
atomic_t vw_cnt; /* # of volatile writes */
atomic_t max_aw_cnt; /* max # of atomic writes */
atomic_t max_vw_cnt; /* max # of volatile writes */
@@ -1593,6 +1615,11 @@ struct f2fs_sb_info {
struct kmem_cache *inline_xattr_slab; /* inline xattr entry */
unsigned int inline_xattr_slab_size; /* default inline xattr slab size */
+
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ struct kmem_cache *page_array_slab; /* page array entry */
+ unsigned int page_array_slab_size; /* default page array slab size */
+#endif
};
struct f2fs_private_dio {
@@ -3325,6 +3352,11 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi);
int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable);
void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
+void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi);
+void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi);
+void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi);
+void f2fs_get_new_segment(struct f2fs_sb_info *sbi,
+ unsigned int *newseg, bool new_sec, int dir);
void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
unsigned int start, unsigned int end);
void f2fs_allocate_new_segment(struct f2fs_sb_info *sbi, int type);
@@ -3343,7 +3375,8 @@ void f2fs_outplace_write_data(struct dnode_of_data *dn,
int f2fs_inplace_write_data(struct f2fs_io_info *fio);
void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
block_t old_blkaddr, block_t new_blkaddr,
- bool recover_curseg, bool recover_newaddr);
+ bool recover_curseg, bool recover_newaddr,
+ bool from_gc);
void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
block_t old_addr, block_t new_addr,
unsigned char version, bool recover_curseg,
@@ -3371,6 +3404,10 @@ void f2fs_destroy_segment_manager_caches(void);
int f2fs_rw_hint_to_seg_type(enum rw_hint hint);
enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
enum page_type type, enum temp_type temp);
+unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
+ unsigned int segno);
+unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
+ unsigned int segno);
/*
* checkpoint.c
@@ -3378,7 +3415,7 @@ enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io);
struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
-struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index);
+struct page *f2fs_get_meta_page_retry(struct f2fs_sb_info *sbi, pgoff_t index);
struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index);
bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type);
@@ -3486,6 +3523,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background,
unsigned int segno);
void f2fs_build_gc_manager(struct f2fs_sb_info *sbi);
int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count);
+int __init f2fs_create_garbage_collection_cache(void);
+void f2fs_destroy_garbage_collection_cache(void);
/*
* recovery.c
@@ -3521,7 +3560,8 @@ struct f2fs_stat_info {
int nr_discard_cmd;
unsigned int undiscard_blks;
int inline_xattr, inline_inode, inline_dir, append, update, orphans;
- int compr_inode, compr_blocks;
+ int compr_inode;
+ unsigned long long compr_blocks;
int aw_cnt, max_aw_cnt, vw_cnt, max_vw_cnt;
unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks;
unsigned int bimodal, avg_vblocks;
@@ -3606,9 +3646,9 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
(atomic_dec(&F2FS_I_SB(inode)->compr_inode)); \
} while (0)
#define stat_add_compr_blocks(inode, blocks) \
- (atomic_add(blocks, &F2FS_I_SB(inode)->compr_blocks))
+ (atomic64_add(blocks, &F2FS_I_SB(inode)->compr_blocks))
#define stat_sub_compr_blocks(inode, blocks) \
- (atomic_sub(blocks, &F2FS_I_SB(inode)->compr_blocks))
+ (atomic64_sub(blocks, &F2FS_I_SB(inode)->compr_blocks))
#define stat_inc_meta_count(sbi, blkaddr) \
do { \
if (blkaddr < SIT_I(sbi)->sit_base_addr) \
@@ -3787,6 +3827,10 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi);
*/
struct rb_entry *f2fs_lookup_rb_tree(struct rb_root_cached *root,
struct rb_entry *cached_re, unsigned int ofs);
+struct rb_node **f2fs_lookup_rb_tree_ext(struct f2fs_sb_info *sbi,
+ struct rb_root_cached *root,
+ struct rb_node **parent,
+ unsigned long long key, bool *left_most);
struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
struct rb_root_cached *root,
struct rb_node **parent,
@@ -3797,7 +3841,7 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root,
struct rb_node ***insert_p, struct rb_node **insert_parent,
bool force, bool *leftmost);
bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
- struct rb_root_cached *root);
+ struct rb_root_cached *root, bool check_key);
unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink);
void f2fs_init_extent_tree(struct inode *inode, struct page *ipage);
void f2fs_drop_extent_tree(struct inode *inode);
@@ -3883,6 +3927,10 @@ void f2fs_decompress_end_io(struct page **rpages,
int f2fs_init_compress_ctx(struct compress_ctx *cc);
void f2fs_destroy_compress_ctx(struct compress_ctx *cc);
void f2fs_init_compress_info(struct f2fs_sb_info *sbi);
+int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi);
+void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi);
+int __init f2fs_init_compress_cache(void);
+void f2fs_destroy_compress_cache(void);
#else
static inline bool f2fs_is_compressed_page(struct page *page) { return false; }
static inline bool f2fs_is_compress_backend_ready(struct inode *inode)
@@ -3899,6 +3947,10 @@ static inline struct page *f2fs_compress_control_page(struct page *page)
}
static inline int f2fs_init_compress_mempool(void) { return 0; }
static inline void f2fs_destroy_compress_mempool(void) { }
+static inline int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi) { return 0; }
+static inline void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi) { }
+static inline int __init f2fs_init_compress_cache(void) { return 0; }
+static inline void f2fs_destroy_compress_cache(void) { }
#endif
static inline void set_compress_context(struct inode *inode)
@@ -3917,24 +3969,21 @@ static inline void set_compress_context(struct inode *inode)
f2fs_mark_inode_dirty_sync(inode, true);
}
-static inline u64 f2fs_disable_compressed_file(struct inode *inode)
+static inline bool f2fs_disable_compressed_file(struct inode *inode)
{
struct f2fs_inode_info *fi = F2FS_I(inode);
if (!f2fs_compressed_file(inode))
- return 0;
- if (S_ISREG(inode->i_mode)) {
- if (get_dirty_pages(inode))
- return 1;
- if (fi->i_compr_blocks)
- return fi->i_compr_blocks;
- }
+ return true;
+ if (S_ISREG(inode->i_mode) &&
+ (get_dirty_pages(inode) || atomic_read(&fi->i_compr_blocks)))
+ return false;
fi->i_flags &= ~F2FS_COMPR_FL;
stat_dec_compr_inode(inode);
clear_inode_flag(inode, FI_COMPRESSED_FILE);
f2fs_mark_inode_dirty_sync(inode, true);
- return 0;
+ return true;
}
#define F2FS_FEATURE_FUNCS(name, flagname) \
@@ -4028,16 +4077,17 @@ static inline void f2fs_i_compr_blocks_update(struct inode *inode,
u64 blocks, bool add)
{
int diff = F2FS_I(inode)->i_cluster_size - blocks;
+ struct f2fs_inode_info *fi = F2FS_I(inode);
/* don't update i_compr_blocks if saved blocks were released */
- if (!add && !F2FS_I(inode)->i_compr_blocks)
+ if (!add && !atomic_read(&fi->i_compr_blocks))
return;
if (add) {
- F2FS_I(inode)->i_compr_blocks += diff;
+ atomic_add(diff, &fi->i_compr_blocks);
stat_add_compr_blocks(inode, diff);
} else {
- F2FS_I(inode)->i_compr_blocks -= diff;
+ atomic_sub(diff, &fi->i_compr_blocks);
stat_sub_compr_blocks(inode, diff);
}
f2fs_mark_inode_dirty_sync(inode, true);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 8a422400e824..ee861c6d9ff0 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -376,32 +376,15 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
return f2fs_do_sync_file(file, start, end, datasync, false);
}
-static pgoff_t __get_first_dirty_index(struct address_space *mapping,
- pgoff_t pgofs, int whence)
-{
- struct page *page;
- int nr_pages;
-
- if (whence != SEEK_DATA)
- return 0;
-
- /* find first dirty page index */
- nr_pages = find_get_pages_tag(mapping, &pgofs, PAGECACHE_TAG_DIRTY,
- 1, &page);
- if (!nr_pages)
- return ULONG_MAX;
- pgofs = page->index;
- put_page(page);
- return pgofs;
-}
-
-static bool __found_offset(struct f2fs_sb_info *sbi, block_t blkaddr,
- pgoff_t dirty, pgoff_t pgofs, int whence)
+static bool __found_offset(struct address_space *mapping, block_t blkaddr,
+ pgoff_t index, int whence)
{
switch (whence) {
case SEEK_DATA:
- if ((blkaddr == NEW_ADDR && dirty == pgofs) ||
- __is_valid_data_blkaddr(blkaddr))
+ if (__is_valid_data_blkaddr(blkaddr))
+ return true;
+ if (blkaddr == NEW_ADDR &&
+ xa_get_mark(&mapping->i_pages, index, PAGECACHE_TAG_DIRTY))
return true;
break;
case SEEK_HOLE:
@@ -417,7 +400,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
struct inode *inode = file->f_mapping->host;
loff_t maxbytes = inode->i_sb->s_maxbytes;
struct dnode_of_data dn;
- pgoff_t pgofs, end_offset, dirty;
+ pgoff_t pgofs, end_offset;
loff_t data_ofs = offset;
loff_t isize;
int err = 0;
@@ -429,16 +412,13 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
goto fail;
/* handle inline data case */
- if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) {
- if (whence == SEEK_HOLE)
- data_ofs = isize;
+ if (f2fs_has_inline_data(inode) && whence == SEEK_HOLE) {
+ data_ofs = isize;
goto found;
}
pgofs = (pgoff_t)(offset >> PAGE_SHIFT);
- dirty = __get_first_dirty_index(inode->i_mapping, pgofs, whence);
-
for (; data_ofs < isize; data_ofs = (loff_t)pgofs << PAGE_SHIFT) {
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = f2fs_get_dnode_of_data(&dn, pgofs, LOOKUP_NODE);
@@ -471,7 +451,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
goto fail;
}
- if (__found_offset(F2FS_I_SB(inode), blkaddr, dirty,
+ if (__found_offset(file->f_mapping, blkaddr,
pgofs, whence)) {
f2fs_put_dnode(&dn);
goto found;
@@ -564,7 +544,7 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
bool compressed_cluster = false;
int cluster_index = 0, valid_blocks = 0;
int cluster_size = F2FS_I(dn->inode)->i_cluster_size;
- bool released = !F2FS_I(dn->inode)->i_compr_blocks;
+ bool released = !atomic_read(&F2FS_I(dn->inode)->i_compr_blocks);
if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode))
base = get_extra_isize(dn->inode);
@@ -753,11 +733,14 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
return err;
#ifdef CONFIG_F2FS_FS_COMPRESSION
- if (from != free_from)
+ if (from != free_from) {
err = f2fs_truncate_partial_cluster(inode, from, lock);
+ if (err)
+ return err;
+ }
#endif
- return err;
+ return 0;
}
int f2fs_truncate(struct inode *inode)
@@ -1656,13 +1639,14 @@ next_alloc:
}
down_write(&sbi->pin_sem);
- map.m_seg_type = CURSEG_COLD_DATA_PINNED;
f2fs_lock_op(sbi);
- f2fs_allocate_new_segment(sbi, CURSEG_COLD_DATA);
+ f2fs_allocate_new_segment(sbi, CURSEG_COLD_DATA_PINNED);
f2fs_unlock_op(sbi);
+ map.m_seg_type = CURSEG_COLD_DATA_PINNED;
err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
+
up_write(&sbi->pin_sem);
done += map.m_len;
@@ -1828,7 +1812,7 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask)
if ((iflags ^ masked_flags) & F2FS_COMPR_FL) {
if (masked_flags & F2FS_COMPR_FL) {
- if (f2fs_disable_compressed_file(inode))
+ if (!f2fs_disable_compressed_file(inode))
return -EINVAL;
}
if (iflags & F2FS_NOCOMP_FL)
@@ -1836,6 +1820,8 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask)
if (iflags & F2FS_COMPR_FL) {
if (!f2fs_may_compress(inode))
return -EINVAL;
+ if (S_ISREG(inode->i_mode) && inode->i_size)
+ return -EINVAL;
set_compress_context(inode);
}
@@ -2783,6 +2769,9 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
if (IS_ENCRYPTED(src) || IS_ENCRYPTED(dst))
return -EOPNOTSUPP;
+ if (pos_out < 0 || pos_in < 0)
+ return -EINVAL;
+
if (src == dst) {
if (pos_in == pos_out)
return 0;
@@ -3258,7 +3247,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
if (ret)
goto out;
- if (f2fs_disable_compressed_file(inode)) {
+ if (!f2fs_disable_compressed_file(inode)) {
ret = -EOPNOTSUPP;
goto out;
}
@@ -3385,7 +3374,7 @@ static int f2fs_ioc_getfslabel(struct file *filp, unsigned long arg)
min(FSLABEL_MAX, count)))
err = -EFAULT;
- kvfree(vbuf);
+ kfree(vbuf);
return err;
}
@@ -3436,7 +3425,7 @@ static int f2fs_get_compress_blocks(struct file *filp, unsigned long arg)
if (!f2fs_compressed_file(inode))
return -EINVAL;
- blocks = F2FS_I(inode)->i_compr_blocks;
+ blocks = atomic_read(&F2FS_I(inode)->i_compr_blocks);
return put_user(blocks, (u64 __user *)arg);
}
@@ -3521,7 +3510,8 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
inode_lock(inode);
writecount = atomic_read(&inode->i_writecount);
- if ((filp->f_mode & FMODE_WRITE && writecount != 1) || writecount) {
+ if ((filp->f_mode & FMODE_WRITE && writecount != 1) ||
+ (!(filp->f_mode & FMODE_WRITE) && writecount)) {
ret = -EBUSY;
goto out;
}
@@ -3540,7 +3530,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
inode->i_ctime = current_time(inode);
f2fs_mark_inode_dirty_sync(inode, true);
- if (!F2FS_I(inode)->i_compr_blocks)
+ if (!atomic_read(&F2FS_I(inode)->i_compr_blocks))
goto out;
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
@@ -3588,14 +3578,15 @@ out:
if (ret >= 0) {
ret = put_user(released_blocks, (u64 __user *)arg);
- } else if (released_blocks && F2FS_I(inode)->i_compr_blocks) {
+ } else if (released_blocks &&
+ atomic_read(&F2FS_I(inode)->i_compr_blocks)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx "
- "iblocks=%llu, released=%u, compr_blocks=%llu, "
+ "iblocks=%llu, released=%u, compr_blocks=%u, "
"run fsck to fix.",
__func__, inode->i_ino, inode->i_blocks,
released_blocks,
- F2FS_I(inode)->i_compr_blocks);
+ atomic_read(&F2FS_I(inode)->i_compr_blocks));
}
return ret;
@@ -3683,7 +3674,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
if (ret)
return ret;
- if (F2FS_I(inode)->i_compr_blocks)
+ if (atomic_read(&F2FS_I(inode)->i_compr_blocks))
goto out;
f2fs_balance_fs(F2FS_I_SB(inode), true);
@@ -3747,14 +3738,15 @@ out:
if (ret >= 0) {
ret = put_user(reserved_blocks, (u64 __user *)arg);
- } else if (reserved_blocks && F2FS_I(inode)->i_compr_blocks) {
+ } else if (reserved_blocks &&
+ atomic_read(&F2FS_I(inode)->i_compr_blocks)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx "
- "iblocks=%llu, reserved=%u, compr_blocks=%llu, "
+ "iblocks=%llu, reserved=%u, compr_blocks=%u, "
"run fsck to fix.",
__func__, inode->i_ino, inode->i_blocks,
reserved_blocks,
- F2FS_I(inode)->i_compr_blocks);
+ atomic_read(&F2FS_I(inode)->i_compr_blocks));
}
return ret;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 11b4adde9baf..05641a1e36cc 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -21,6 +21,8 @@
#include "gc.h"
#include <trace/events/f2fs.h>
+static struct kmem_cache *victim_entry_slab;
+
static unsigned int count_bits(const unsigned long *addr,
unsigned int offset, unsigned int len);
@@ -150,7 +152,7 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi)
"f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(gc_th->f2fs_gc_task)) {
err = PTR_ERR(gc_th->f2fs_gc_task);
- kvfree(gc_th);
+ kfree(gc_th);
sbi->gc_thread = NULL;
}
out:
@@ -163,13 +165,22 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi)
if (!gc_th)
return;
kthread_stop(gc_th->f2fs_gc_task);
- kvfree(gc_th);
+ kfree(gc_th);
sbi->gc_thread = NULL;
}
static int select_gc_type(struct f2fs_sb_info *sbi, int gc_type)
{
- int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
+ int gc_mode;
+
+ if (gc_type == BG_GC) {
+ if (sbi->am.atgc_enabled)
+ gc_mode = GC_AT;
+ else
+ gc_mode = GC_CB;
+ } else {
+ gc_mode = GC_GREEDY;
+ }
switch (sbi->gc_mode) {
case GC_IDLE_CB:
@@ -179,7 +190,11 @@ static int select_gc_type(struct f2fs_sb_info *sbi, int gc_type)
case GC_URGENT_HIGH:
gc_mode = GC_GREEDY;
break;
+ case GC_IDLE_AT:
+ gc_mode = GC_AT;
+ break;
}
+
return gc_mode;
}
@@ -193,6 +208,11 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
p->dirty_bitmap = dirty_i->dirty_segmap[type];
p->max_search = dirty_i->nr_dirty[type];
p->ofs_unit = 1;
+ } else if (p->alloc_mode == AT_SSR) {
+ p->gc_mode = GC_GREEDY;
+ p->dirty_bitmap = dirty_i->dirty_segmap[type];
+ p->max_search = dirty_i->nr_dirty[type];
+ p->ofs_unit = 1;
} else {
p->gc_mode = select_gc_type(sbi, gc_type);
p->ofs_unit = sbi->segs_per_sec;
@@ -212,6 +232,7 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
*/
if (gc_type != FG_GC &&
(sbi->gc_mode != GC_URGENT_HIGH) &&
+ (p->gc_mode != GC_AT && p->alloc_mode != AT_SSR) &&
p->max_search > sbi->max_victim_search)
p->max_search = sbi->max_victim_search;
@@ -229,10 +250,16 @@ static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
/* SSR allocates in a segment unit */
if (p->alloc_mode == SSR)
return sbi->blocks_per_seg;
+ else if (p->alloc_mode == AT_SSR)
+ return UINT_MAX;
+
+ /* LFS */
if (p->gc_mode == GC_GREEDY)
return 2 * sbi->blocks_per_seg * p->ofs_unit;
else if (p->gc_mode == GC_CB)
return UINT_MAX;
+ else if (p->gc_mode == GC_AT)
+ return UINT_MAX;
else /* No other gc_mode */
return 0;
}
@@ -266,13 +293,14 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
unsigned char age = 0;
unsigned char u;
unsigned int i;
+ unsigned int usable_segs_per_sec = f2fs_usable_segs_in_sec(sbi, segno);
- for (i = 0; i < sbi->segs_per_sec; i++)
+ for (i = 0; i < usable_segs_per_sec; i++)
mtime += get_seg_entry(sbi, start + i)->mtime;
vblocks = get_valid_blocks(sbi, segno, true);
- mtime = div_u64(mtime, sbi->segs_per_sec);
- vblocks = div_u64(vblocks, sbi->segs_per_sec);
+ mtime = div_u64(mtime, usable_segs_per_sec);
+ vblocks = div_u64(vblocks, usable_segs_per_sec);
u = (vblocks * 100) >> sbi->log_blocks_per_seg;
@@ -297,8 +325,11 @@ static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
/* alloc_mode == LFS */
if (p->gc_mode == GC_GREEDY)
return get_valid_blocks(sbi, segno, true);
- else
+ else if (p->gc_mode == GC_CB)
return get_cb_cost(sbi, segno);
+
+ f2fs_bug_on(sbi, 1);
+ return 0;
}
static unsigned int count_bits(const unsigned long *addr,
@@ -313,6 +344,273 @@ static unsigned int count_bits(const unsigned long *addr,
return sum;
}
+static struct victim_entry *attach_victim_entry(struct f2fs_sb_info *sbi,
+ unsigned long long mtime, unsigned int segno,
+ struct rb_node *parent, struct rb_node **p,
+ bool left_most)
+{
+ struct atgc_management *am = &sbi->am;
+ struct victim_entry *ve;
+
+ ve = f2fs_kmem_cache_alloc(victim_entry_slab, GFP_NOFS);
+
+ ve->mtime = mtime;
+ ve->segno = segno;
+
+ rb_link_node(&ve->rb_node, parent, p);
+ rb_insert_color_cached(&ve->rb_node, &am->root, left_most);
+
+ list_add_tail(&ve->list, &am->victim_list);
+
+ am->victim_count++;
+
+ return ve;
+}
+
+static void insert_victim_entry(struct f2fs_sb_info *sbi,
+ unsigned long long mtime, unsigned int segno)
+{
+ struct atgc_management *am = &sbi->am;
+ struct rb_node **p;
+ struct rb_node *parent = NULL;
+ bool left_most = true;
+
+ p = f2fs_lookup_rb_tree_ext(sbi, &am->root, &parent, mtime, &left_most);
+ attach_victim_entry(sbi, mtime, segno, parent, p, left_most);
+}
+
+static void add_victim_entry(struct f2fs_sb_info *sbi,
+ struct victim_sel_policy *p, unsigned int segno)
+{
+ struct sit_info *sit_i = SIT_I(sbi);
+ unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
+ unsigned int start = GET_SEG_FROM_SEC(sbi, secno);
+ unsigned long long mtime = 0;
+ unsigned int i;
+
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+ if (p->gc_mode == GC_AT &&
+ get_valid_blocks(sbi, segno, true) == 0)
+ return;
+
+ if (p->alloc_mode == AT_SSR &&
+ get_seg_entry(sbi, segno)->ckpt_valid_blocks == 0)
+ return;
+ }
+
+ for (i = 0; i < sbi->segs_per_sec; i++)
+ mtime += get_seg_entry(sbi, start + i)->mtime;
+ mtime = div_u64(mtime, sbi->segs_per_sec);
+
+ /* Handle if the system time has changed by the user */
+ if (mtime < sit_i->min_mtime)
+ sit_i->min_mtime = mtime;
+ if (mtime > sit_i->max_mtime)
+ sit_i->max_mtime = mtime;
+ if (mtime < sit_i->dirty_min_mtime)
+ sit_i->dirty_min_mtime = mtime;
+ if (mtime > sit_i->dirty_max_mtime)
+ sit_i->dirty_max_mtime = mtime;
+
+ /* don't choose young section as candidate */
+ if (sit_i->dirty_max_mtime - mtime < p->age_threshold)
+ return;
+
+ insert_victim_entry(sbi, mtime, segno);
+}
+
+static struct rb_node *lookup_central_victim(struct f2fs_sb_info *sbi,
+ struct victim_sel_policy *p)
+{
+ struct atgc_management *am = &sbi->am;
+ struct rb_node *parent = NULL;
+ bool left_most;
+
+ f2fs_lookup_rb_tree_ext(sbi, &am->root, &parent, p->age, &left_most);
+
+ return parent;
+}
+
+static void atgc_lookup_victim(struct f2fs_sb_info *sbi,
+ struct victim_sel_policy *p)
+{
+ struct sit_info *sit_i = SIT_I(sbi);
+ struct atgc_management *am = &sbi->am;
+ struct rb_root_cached *root = &am->root;
+ struct rb_node *node;
+ struct rb_entry *re;
+ struct victim_entry *ve;
+ unsigned long long total_time;
+ unsigned long long age, u, accu;
+ unsigned long long max_mtime = sit_i->dirty_max_mtime;
+ unsigned long long min_mtime = sit_i->dirty_min_mtime;
+ unsigned int sec_blocks = BLKS_PER_SEC(sbi);
+ unsigned int vblocks;
+ unsigned int dirty_threshold = max(am->max_candidate_count,
+ am->candidate_ratio *
+ am->victim_count / 100);
+ unsigned int age_weight = am->age_weight;
+ unsigned int cost;
+ unsigned int iter = 0;
+
+ if (max_mtime < min_mtime)
+ return;
+
+ max_mtime += 1;
+ total_time = max_mtime - min_mtime;
+
+ accu = div64_u64(ULLONG_MAX, total_time);
+ accu = min_t(unsigned long long, div_u64(accu, 100),
+ DEFAULT_ACCURACY_CLASS);
+
+ node = rb_first_cached(root);
+next:
+ re = rb_entry_safe(node, struct rb_entry, rb_node);
+ if (!re)
+ return;
+
+ ve = (struct victim_entry *)re;
+
+ if (ve->mtime >= max_mtime || ve->mtime < min_mtime)
+ goto skip;
+
+ /* age = 10000 * x% * 60 */
+ age = div64_u64(accu * (max_mtime - ve->mtime), total_time) *
+ age_weight;
+
+ vblocks = get_valid_blocks(sbi, ve->segno, true);
+ f2fs_bug_on(sbi, !vblocks || vblocks == sec_blocks);
+
+ /* u = 10000 * x% * 40 */
+ u = div64_u64(accu * (sec_blocks - vblocks), sec_blocks) *
+ (100 - age_weight);
+
+ f2fs_bug_on(sbi, age + u >= UINT_MAX);
+
+ cost = UINT_MAX - (age + u);
+ iter++;
+
+ if (cost < p->min_cost ||
+ (cost == p->min_cost && age > p->oldest_age)) {
+ p->min_cost = cost;
+ p->oldest_age = age;
+ p->min_segno = ve->segno;
+ }
+skip:
+ if (iter < dirty_threshold) {
+ node = rb_next(node);
+ goto next;
+ }
+}
+
+/*
+ * select candidates around source section in range of
+ * [target - dirty_threshold, target + dirty_threshold]
+ */
+static void atssr_lookup_victim(struct f2fs_sb_info *sbi,
+ struct victim_sel_policy *p)
+{
+ struct sit_info *sit_i = SIT_I(sbi);
+ struct atgc_management *am = &sbi->am;
+ struct rb_node *node;
+ struct rb_entry *re;
+ struct victim_entry *ve;
+ unsigned long long age;
+ unsigned long long max_mtime = sit_i->dirty_max_mtime;
+ unsigned long long min_mtime = sit_i->dirty_min_mtime;
+ unsigned int seg_blocks = sbi->blocks_per_seg;
+ unsigned int vblocks;
+ unsigned int dirty_threshold = max(am->max_candidate_count,
+ am->candidate_ratio *
+ am->victim_count / 100);
+ unsigned int cost;
+ unsigned int iter = 0;
+ int stage = 0;
+
+ if (max_mtime < min_mtime)
+ return;
+ max_mtime += 1;
+next_stage:
+ node = lookup_central_victim(sbi, p);
+next_node:
+ re = rb_entry_safe(node, struct rb_entry, rb_node);
+ if (!re) {
+ if (stage == 0)
+ goto skip_stage;
+ return;
+ }
+
+ ve = (struct victim_entry *)re;
+
+ if (ve->mtime >= max_mtime || ve->mtime < min_mtime)
+ goto skip_node;
+
+ age = max_mtime - ve->mtime;
+
+ vblocks = get_seg_entry(sbi, ve->segno)->ckpt_valid_blocks;
+ f2fs_bug_on(sbi, !vblocks);
+
+ /* rare case */
+ if (vblocks == seg_blocks)
+ goto skip_node;
+
+ iter++;
+
+ age = max_mtime - abs(p->age - age);
+ cost = UINT_MAX - vblocks;
+
+ if (cost < p->min_cost ||
+ (cost == p->min_cost && age > p->oldest_age)) {
+ p->min_cost = cost;
+ p->oldest_age = age;
+ p->min_segno = ve->segno;
+ }
+skip_node:
+ if (iter < dirty_threshold) {
+ if (stage == 0)
+ node = rb_prev(node);
+ else if (stage == 1)
+ node = rb_next(node);
+ goto next_node;
+ }
+skip_stage:
+ if (stage < 1) {
+ stage++;
+ iter = 0;
+ goto next_stage;
+ }
+}
+static void lookup_victim_by_age(struct f2fs_sb_info *sbi,
+ struct victim_sel_policy *p)
+{
+ f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
+ &sbi->am.root, true));
+
+ if (p->gc_mode == GC_AT)
+ atgc_lookup_victim(sbi, p);
+ else if (p->alloc_mode == AT_SSR)
+ atssr_lookup_victim(sbi, p);
+ else
+ f2fs_bug_on(sbi, 1);
+}
+
+static void release_victim_entry(struct f2fs_sb_info *sbi)
+{
+ struct atgc_management *am = &sbi->am;
+ struct victim_entry *ve, *tmp;
+
+ list_for_each_entry_safe(ve, tmp, &am->victim_list, list) {
+ list_del(&ve->list);
+ kmem_cache_free(victim_entry_slab, ve);
+ am->victim_count--;
+ }
+
+ am->root = RB_ROOT_CACHED;
+
+ f2fs_bug_on(sbi, am->victim_count);
+ f2fs_bug_on(sbi, !list_empty(&am->victim_list));
+}
+
/*
* This function is called from two paths.
* One is garbage collection and the other is SSR segment selection.
@@ -322,25 +620,37 @@ static unsigned int count_bits(const unsigned long *addr,
* which has minimum valid blocks and removes it from dirty seglist.
*/
static int get_victim_by_default(struct f2fs_sb_info *sbi,
- unsigned int *result, int gc_type, int type, char alloc_mode)
+ unsigned int *result, int gc_type, int type,
+ char alloc_mode, unsigned long long age)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
struct sit_info *sm = SIT_I(sbi);
struct victim_sel_policy p;
unsigned int secno, last_victim;
unsigned int last_segment;
- unsigned int nsearched = 0;
+ unsigned int nsearched;
+ bool is_atgc;
int ret = 0;
mutex_lock(&dirty_i->seglist_lock);
last_segment = MAIN_SECS(sbi) * sbi->segs_per_sec;
p.alloc_mode = alloc_mode;
- select_policy(sbi, gc_type, type, &p);
+ p.age = age;
+ p.age_threshold = sbi->am.age_threshold;
+retry:
+ select_policy(sbi, gc_type, type, &p);
p.min_segno = NULL_SEGNO;
+ p.oldest_age = 0;
p.min_cost = get_max_cost(sbi, &p);
+ is_atgc = (p.gc_mode == GC_AT || p.alloc_mode == AT_SSR);
+ nsearched = 0;
+
+ if (is_atgc)
+ SIT_I(sbi)->dirty_min_mtime = ULLONG_MAX;
+
if (*result != NULL_SEGNO) {
if (!get_valid_blocks(sbi, *result, false)) {
ret = -ENODATA;
@@ -421,11 +731,16 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
/* Don't touch checkpointed data */
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
get_ckpt_valid_blocks(sbi, segno) &&
- p.alloc_mode != SSR))
+ p.alloc_mode == LFS))
goto next;
if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
goto next;
+ if (is_atgc) {
+ add_victim_entry(sbi, &p, segno);
+ goto next;
+ }
+
cost = get_gc_cost(sbi, segno, &p);
if (p.min_cost > cost) {
@@ -444,6 +759,19 @@ next:
break;
}
}
+
+ /* get victim for GC_AT/AT_SSR */
+ if (is_atgc) {
+ lookup_victim_by_age(sbi, &p);
+ release_victim_entry(sbi);
+ }
+
+ if (is_atgc && p.min_segno == NULL_SEGNO &&
+ sm->elapsed_time < p.age_threshold) {
+ p.age_threshold = 0;
+ goto retry;
+ }
+
if (p.min_segno != NULL_SEGNO) {
got_it:
*result = (p.min_segno / p.ofs_unit) * p.ofs_unit;
@@ -536,6 +864,7 @@ static int gc_node_segment(struct f2fs_sb_info *sbi,
int phase = 0;
bool fggc = (gc_type == FG_GC);
int submitted = 0;
+ unsigned int usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno);
start_addr = START_BLOCK(sbi, segno);
@@ -545,7 +874,7 @@ next_step:
if (fggc && phase == 2)
atomic_inc(&sbi->wb_sync_req[NODE]);
- for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
+ for (off = 0; off < usable_blks_in_seg; off++, entry++) {
nid_t nid = le32_to_cpu(entry->nid);
struct page *node_page;
struct node_info ni;
@@ -791,6 +1120,8 @@ static int move_data_block(struct inode *inode, block_t bidx,
block_t newaddr;
int err = 0;
bool lfs_mode = f2fs_lfs_mode(fio.sbi);
+ int type = fio.sbi->am.atgc_enabled ?
+ CURSEG_ALL_DATA_ATGC : CURSEG_COLD_DATA;
/* do not read out */
page = f2fs_grab_cache_page(inode->i_mapping, bidx, false);
@@ -877,7 +1208,7 @@ static int move_data_block(struct inode *inode, block_t bidx,
}
f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr,
- &sum, CURSEG_COLD_DATA, NULL);
+ &sum, type, NULL);
fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(fio.sbi),
newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS);
@@ -927,7 +1258,7 @@ put_page_out:
recover_block:
if (err)
f2fs_do_replace_block(fio.sbi, &sum, newaddr, fio.old_blkaddr,
- true, true);
+ true, true, true);
up_out:
if (lfs_mode)
up_write(&fio.sbi->io_order_lock);
@@ -1033,13 +1364,14 @@ static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
int off;
int phase = 0;
int submitted = 0;
+ unsigned int usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno);
start_addr = START_BLOCK(sbi, segno);
next_step:
entry = sum;
- for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
+ for (off = 0; off < usable_blks_in_seg; off++, entry++) {
struct page *data_page;
struct inode *inode;
struct node_info dni; /* dnode info for the data */
@@ -1182,7 +1514,7 @@ static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
down_write(&sit_i->sentry_lock);
ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type,
- NO_CHECK_TYPE, LFS);
+ NO_CHECK_TYPE, LFS, 0);
up_write(&sit_i->sentry_lock);
return ret;
}
@@ -1204,6 +1536,17 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
if (__is_large_section(sbi))
end_segno = rounddown(end_segno, sbi->segs_per_sec);
+ /*
+ * zone-capacity can be less than zone-size in zoned devices,
+ * resulting in less than expected usable segments in the zone,
+ * calculate the end segno in the zone which can be garbage collected
+ */
+ if (f2fs_sb_has_blkzoned(sbi))
+ end_segno -= sbi->segs_per_sec -
+ f2fs_usable_segs_in_sec(sbi, segno);
+
+ sanity_check_seg_type(sbi, get_seg_entry(sbi, segno)->type);
+
/* readahead multi ssa blocks those have contiguous address */
if (__is_large_section(sbi))
f2fs_ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno),
@@ -1356,7 +1699,8 @@ gc_more:
goto stop;
seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type);
- if (gc_type == FG_GC && seg_freed == sbi->segs_per_sec)
+ if (gc_type == FG_GC &&
+ seg_freed == f2fs_usable_segs_in_sec(sbi, segno))
sec_freed++;
total_freed += seg_freed;
@@ -1413,6 +1757,37 @@ stop:
return ret;
}
+int __init f2fs_create_garbage_collection_cache(void)
+{
+ victim_entry_slab = f2fs_kmem_cache_create("f2fs_victim_entry",
+ sizeof(struct victim_entry));
+ if (!victim_entry_slab)
+ return -ENOMEM;
+ return 0;
+}
+
+void f2fs_destroy_garbage_collection_cache(void)
+{
+ kmem_cache_destroy(victim_entry_slab);
+}
+
+static void init_atgc_management(struct f2fs_sb_info *sbi)
+{
+ struct atgc_management *am = &sbi->am;
+
+ if (test_opt(sbi, ATGC) &&
+ SIT_I(sbi)->elapsed_time >= DEF_GC_THREAD_AGE_THRESHOLD)
+ am->atgc_enabled = true;
+
+ am->root = RB_ROOT_CACHED;
+ INIT_LIST_HEAD(&am->victim_list);
+ am->victim_count = 0;
+
+ am->candidate_ratio = DEF_GC_THREAD_CANDIDATE_RATIO;
+ am->max_candidate_count = DEF_GC_THREAD_MAX_CANDIDATE_COUNT;
+ am->age_weight = DEF_GC_THREAD_AGE_WEIGHT;
+}
+
void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
{
DIRTY_I(sbi)->v_ops = &default_v_ops;
@@ -1423,6 +1798,8 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
if (f2fs_is_multi_device(sbi) && !__is_large_section(sbi))
SIT_I(sbi)->last_victim[ALLOC_NEXT] =
GET_SEGNO(sbi, FDEV(0).end_blk) + 1;
+
+ init_atgc_management(sbi);
}
static int free_segment_range(struct f2fs_sb_info *sbi,
@@ -1450,7 +1827,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi,
mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
/* Move out cursegs from the target range */
- for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++)
+ for (type = CURSEG_HOT_DATA; type < NR_CURSEG_PERSIST_TYPE; type++)
f2fs_allocate_segment_for_resize(sbi, type, start, end);
/* do GC to move out valid blocks in the range */
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index db3c61046aa4..0c8dae12dc51 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -14,6 +14,14 @@
#define DEF_GC_THREAD_MIN_SLEEP_TIME 30000 /* milliseconds */
#define DEF_GC_THREAD_MAX_SLEEP_TIME 60000
#define DEF_GC_THREAD_NOGC_SLEEP_TIME 300000 /* wait 5 min */
+
+/* choose candidates from sections which has age of more than 7 days */
+#define DEF_GC_THREAD_AGE_THRESHOLD (60 * 60 * 24 * 7)
+#define DEF_GC_THREAD_CANDIDATE_RATIO 20 /* select 20% oldest sections as candidates */
+#define DEF_GC_THREAD_MAX_CANDIDATE_COUNT 10 /* select at most 10 sections as candidates */
+#define DEF_GC_THREAD_AGE_WEIGHT 60 /* age weight */
+#define DEFAULT_ACCURACY_CLASS 10000 /* accuracy class */
+
#define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */
#define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */
@@ -41,16 +49,69 @@ struct gc_inode_list {
struct radix_tree_root iroot;
};
+struct victim_info {
+ unsigned long long mtime; /* mtime of section */
+ unsigned int segno; /* section No. */
+};
+
+struct victim_entry {
+ struct rb_node rb_node; /* rb node located in rb-tree */
+ union {
+ struct {
+ unsigned long long mtime; /* mtime of section */
+ unsigned int segno; /* segment No. */
+ };
+ struct victim_info vi; /* victim info */
+ };
+ struct list_head list;
+};
+
/*
* inline functions
*/
+
+/*
+ * On a Zoned device zone-capacity can be less than zone-size and if
+ * zone-capacity is not aligned to f2fs segment size(2MB), then the segment
+ * starting just before zone-capacity has some blocks spanning across the
+ * zone-capacity, these blocks are not usable.
+ * Such spanning segments can be in free list so calculate the sum of usable
+ * blocks in currently free segments including normal and spanning segments.
+ */
+static inline block_t free_segs_blk_count_zoned(struct f2fs_sb_info *sbi)
+{
+ block_t free_seg_blks = 0;
+ struct free_segmap_info *free_i = FREE_I(sbi);
+ int j;
+
+ spin_lock(&free_i->segmap_lock);
+ for (j = 0; j < MAIN_SEGS(sbi); j++)
+ if (!test_bit(j, free_i->free_segmap))
+ free_seg_blks += f2fs_usable_blks_in_seg(sbi, j);
+ spin_unlock(&free_i->segmap_lock);
+
+ return free_seg_blks;
+}
+
+static inline block_t free_segs_blk_count(struct f2fs_sb_info *sbi)
+{
+ if (f2fs_sb_has_blkzoned(sbi))
+ return free_segs_blk_count_zoned(sbi);
+
+ return free_segments(sbi) << sbi->log_blocks_per_seg;
+}
+
static inline block_t free_user_blocks(struct f2fs_sb_info *sbi)
{
- if (free_segments(sbi) < overprovision_segments(sbi))
+ block_t free_blks, ovp_blks;
+
+ free_blks = free_segs_blk_count(sbi);
+ ovp_blks = overprovision_segments(sbi) << sbi->log_blocks_per_seg;
+
+ if (free_blks < ovp_blks)
return 0;
- else
- return (free_segments(sbi) - overprovision_segments(sbi))
- << sbi->log_blocks_per_seg;
+
+ return free_blks - ovp_blks;
}
static inline block_t limit_invalid_user_blocks(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 102df444f623..70384e31788d 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -524,7 +524,7 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage,
!f2fs_has_inline_xattr(dir))
F2FS_I(dir)->i_inline_xattr_size = 0;
- kvfree(backup_dentry);
+ kfree(backup_dentry);
return 0;
recover:
lock_page(ipage);
@@ -535,7 +535,7 @@ recover:
set_page_dirty(ipage);
f2fs_put_page(ipage, 1);
- kvfree(backup_dentry);
+ kfree(backup_dentry);
return err;
}
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 66969ae852b9..657db2fb6739 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -287,11 +287,19 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
return false;
}
+ if ((fi->i_flags & F2FS_CASEFOLD_FL) && !f2fs_sb_has_casefold(sbi)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_warn(sbi, "%s: inode (ino=%lx) has casefold flag, but casefold feature is off",
+ __func__, inode->i_ino);
+ return false;
+ }
+
if (f2fs_has_extra_attr(inode) && f2fs_sb_has_compression(sbi) &&
fi->i_flags & F2FS_COMPR_FL &&
F2FS_FITS_IN_INODE(ri, fi->i_extra_isize,
i_log_cluster_size)) {
if (ri->i_compress_algorithm >= COMPRESS_MAX) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: inode (ino=%lx) has unsupported "
"compress algorithm: %u, run fsck to fix",
__func__, inode->i_ino,
@@ -300,6 +308,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
}
if (le64_to_cpu(ri->i_compr_blocks) >
SECTOR_TO_BLOCK(inode->i_blocks)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: inode (ino=%lx) has inconsistent "
"i_compr_blocks:%llu, i_blocks:%llu, run fsck to fix",
__func__, inode->i_ino,
@@ -309,6 +318,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
}
if (ri->i_log_cluster_size < MIN_COMPRESS_LOG_SIZE ||
ri->i_log_cluster_size > MAX_COMPRESS_LOG_SIZE) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: inode (ino=%lx) has unsupported "
"log cluster size: %u, run fsck to fix",
__func__, inode->i_ino,
@@ -442,7 +452,8 @@ static int do_read_inode(struct inode *inode)
(fi->i_flags & F2FS_COMPR_FL)) {
if (F2FS_FITS_IN_INODE(ri, fi->i_extra_isize,
i_log_cluster_size)) {
- fi->i_compr_blocks = le64_to_cpu(ri->i_compr_blocks);
+ atomic_set(&fi->i_compr_blocks,
+ le64_to_cpu(ri->i_compr_blocks));
fi->i_compress_algorithm = ri->i_compress_algorithm;
fi->i_log_cluster_size = ri->i_log_cluster_size;
fi->i_cluster_size = 1 << fi->i_log_cluster_size;
@@ -460,7 +471,7 @@ static int do_read_inode(struct inode *inode)
stat_inc_inline_inode(inode);
stat_inc_inline_dir(inode);
stat_inc_compr_inode(inode);
- stat_add_compr_blocks(inode, F2FS_I(inode)->i_compr_blocks);
+ stat_add_compr_blocks(inode, atomic_read(&fi->i_compr_blocks));
return 0;
}
@@ -619,7 +630,8 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page)
F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize,
i_log_cluster_size)) {
ri->i_compr_blocks =
- cpu_to_le64(F2FS_I(inode)->i_compr_blocks);
+ cpu_to_le64(atomic_read(
+ &F2FS_I(inode)->i_compr_blocks));
ri->i_compress_algorithm =
F2FS_I(inode)->i_compress_algorithm;
ri->i_log_cluster_size =
@@ -768,7 +780,8 @@ no_delete:
stat_dec_inline_dir(inode);
stat_dec_inline_inode(inode);
stat_dec_compr_inode(inode);
- stat_sub_compr_blocks(inode, F2FS_I(inode)->i_compr_blocks);
+ stat_sub_compr_blocks(inode,
+ atomic_read(&F2FS_I(inode)->i_compr_blocks));
if (likely(!f2fs_cp_error(sbi) &&
!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 45f324511a19..8fa37d1434de 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -712,7 +712,7 @@ out_f2fs_handle_failed_inode:
f2fs_handle_failed_inode(inode);
out_free_encrypted_link:
if (disk_link.name != (unsigned char *)symname)
- kvfree(disk_link.name);
+ kfree(disk_link.name);
return err;
}
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index cb1b5b61a1da..d5d8ce077f29 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -109,7 +109,7 @@ static void clear_node_page_dirty(struct page *page)
static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
{
- return f2fs_get_meta_page_nofail(sbi, current_nat_addr(sbi, nid));
+ return f2fs_get_meta_page(sbi, current_nat_addr(sbi, nid));
}
static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
@@ -3105,9 +3105,6 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP);
- if (!version_bitmap)
- return -EFAULT;
-
nm_i->nat_bitmap = kmemdup(version_bitmap, nm_i->bitmap_size,
GFP_KERNEL);
if (!nm_i->nat_bitmap)
@@ -3257,7 +3254,7 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi)
kvfree(nm_i->nat_bitmap_mir);
#endif
sbi->nm_info = NULL;
- kvfree(nm_i);
+ kfree(nm_i);
}
int __init f2fs_create_node_manager_caches(void)
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index a1dff8c6d1c8..d8c9a58941bc 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -189,7 +189,7 @@ void f2fs_register_inmem_page(struct inode *inode, struct page *page)
f2fs_trace_pid(page);
- f2fs_set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
+ f2fs_set_page_private(page, ATOMIC_WRITTEN_PAGE);
new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
@@ -728,7 +728,7 @@ init_thread:
"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(fcc->f2fs_issue_flush)) {
err = PTR_ERR(fcc->f2fs_issue_flush);
- kvfree(fcc);
+ kfree(fcc);
SM_I(sbi)->fcc_info = NULL;
return err;
}
@@ -747,7 +747,7 @@ void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
kthread_stop(flush_thread);
}
if (free) {
- kvfree(fcc);
+ kfree(fcc);
SM_I(sbi)->fcc_info = NULL;
}
}
@@ -759,6 +759,9 @@ int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
if (!f2fs_is_multi_device(sbi))
return 0;
+ if (test_opt(sbi, NOBARRIER))
+ return 0;
+
for (i = 1; i < sbi->s_ndevs; i++) {
if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
continue;
@@ -859,20 +862,22 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
unsigned short valid_blocks, ckpt_valid_blocks;
+ unsigned int usable_blocks;
if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
return;
+ usable_blocks = f2fs_usable_blks_in_seg(sbi, segno);
mutex_lock(&dirty_i->seglist_lock);
valid_blocks = get_valid_blocks(sbi, segno, false);
ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno);
if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
- ckpt_valid_blocks == sbi->blocks_per_seg)) {
+ ckpt_valid_blocks == usable_blocks)) {
__locate_dirty_segment(sbi, segno, PRE);
__remove_dirty_segment(sbi, segno, DIRTY);
- } else if (valid_blocks < sbi->blocks_per_seg) {
+ } else if (valid_blocks < usable_blocks) {
__locate_dirty_segment(sbi, segno, DIRTY);
} else {
/* Recovery routine with SSR needs this */
@@ -915,9 +920,11 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi)
for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
se = get_seg_entry(sbi, segno);
if (IS_NODESEG(se->type))
- holes[NODE] += sbi->blocks_per_seg - se->valid_blocks;
+ holes[NODE] += f2fs_usable_blks_in_seg(sbi, segno) -
+ se->valid_blocks;
else
- holes[DATA] += sbi->blocks_per_seg - se->valid_blocks;
+ holes[DATA] += f2fs_usable_blks_in_seg(sbi, segno) -
+ se->valid_blocks;
}
mutex_unlock(&dirty_i->seglist_lock);
@@ -1521,7 +1528,7 @@ retry:
goto next;
if (unlikely(dcc->rbtree_check))
f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
- &dcc->root));
+ &dcc->root, false));
blk_start_plug(&plug);
list_for_each_entry_safe(dc, tmp, pend_list, list) {
f2fs_bug_on(sbi, dc->state != D_PREP);
@@ -1958,7 +1965,7 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
mutex_lock(&dirty_i->seglist_lock);
for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
- __set_test_and_free(sbi, segno);
+ __set_test_and_free(sbi, segno, false);
mutex_unlock(&dirty_i->seglist_lock);
}
@@ -2101,7 +2108,7 @@ init_thread:
"f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(dcc->f2fs_issue_discard)) {
err = PTR_ERR(dcc->f2fs_issue_discard);
- kvfree(dcc);
+ kfree(dcc);
SM_I(sbi)->dcc_info = NULL;
return err;
}
@@ -2125,7 +2132,7 @@ static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
if (unlikely(atomic_read(&dcc->discard_cmd_cnt)))
f2fs_issue_discard_timeout(sbi);
- kvfree(dcc);
+ kfree(dcc);
SM_I(sbi)->dcc_info = NULL;
}
@@ -2150,6 +2157,39 @@ static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
__mark_sit_entry_dirty(sbi, segno);
}
+static inline unsigned long long get_segment_mtime(struct f2fs_sb_info *sbi,
+ block_t blkaddr)
+{
+ unsigned int segno = GET_SEGNO(sbi, blkaddr);
+
+ if (segno == NULL_SEGNO)
+ return 0;
+ return get_seg_entry(sbi, segno)->mtime;
+}
+
+static void update_segment_mtime(struct f2fs_sb_info *sbi, block_t blkaddr,
+ unsigned long long old_mtime)
+{
+ struct seg_entry *se;
+ unsigned int segno = GET_SEGNO(sbi, blkaddr);
+ unsigned long long ctime = get_mtime(sbi, false);
+ unsigned long long mtime = old_mtime ? old_mtime : ctime;
+
+ if (segno == NULL_SEGNO)
+ return;
+
+ se = get_seg_entry(sbi, segno);
+
+ if (!se->mtime)
+ se->mtime = mtime;
+ else
+ se->mtime = div_u64(se->mtime * se->valid_blocks + mtime,
+ se->valid_blocks + 1);
+
+ if (ctime > SIT_I(sbi)->max_mtime)
+ SIT_I(sbi)->max_mtime = ctime;
+}
+
static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
{
struct seg_entry *se;
@@ -2167,12 +2207,9 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
f2fs_bug_on(sbi, (new_vblocks < 0 ||
- (new_vblocks > sbi->blocks_per_seg)));
+ (new_vblocks > f2fs_usable_blks_in_seg(sbi, segno))));
se->valid_blocks = new_vblocks;
- se->mtime = get_mtime(sbi, false);
- if (se->mtime > SIT_I(sbi)->max_mtime)
- SIT_I(sbi)->max_mtime = se->mtime;
/* Update valid block bitmap */
if (del > 0) {
@@ -2265,6 +2302,7 @@ void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
/* add it into sit main buffer */
down_write(&sit_i->sentry_lock);
+ update_segment_mtime(sbi, addr, 0);
update_sit_entry(sbi, addr, -1);
/* add it into dirty seglist */
@@ -2344,7 +2382,9 @@ int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
*/
struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
{
- return f2fs_get_meta_page_nofail(sbi, GET_SUM_BLOCK(sbi, segno));
+ if (unlikely(f2fs_cp_error(sbi)))
+ return ERR_PTR(-EIO);
+ return f2fs_get_meta_page_retry(sbi, GET_SUM_BLOCK(sbi, segno));
}
void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
@@ -2389,9 +2429,9 @@ static void write_current_sum_page(struct f2fs_sb_info *sbi,
f2fs_put_page(page, 1);
}
-static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
+static int is_next_segment_free(struct f2fs_sb_info *sbi,
+ struct curseg_info *curseg, int type)
{
- struct curseg_info *curseg = CURSEG_I(sbi, type);
unsigned int segno = curseg->segno + 1;
struct free_segmap_info *free_i = FREE_I(sbi);
@@ -2495,7 +2535,9 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
struct summary_footer *sum_footer;
+ unsigned short seg_type = curseg->seg_type;
+ curseg->inited = true;
curseg->segno = curseg->next_segno;
curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
curseg->next_blkoff = 0;
@@ -2503,24 +2545,36 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
sum_footer = &(curseg->sum_blk->footer);
memset(sum_footer, 0, sizeof(struct summary_footer));
- if (IS_DATASEG(type))
+
+ sanity_check_seg_type(sbi, seg_type);
+
+ if (IS_DATASEG(seg_type))
SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
- if (IS_NODESEG(type))
+ if (IS_NODESEG(seg_type))
SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
- __set_sit_entry_type(sbi, type, curseg->segno, modified);
+ __set_sit_entry_type(sbi, seg_type, curseg->segno, modified);
}
static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
{
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+ unsigned short seg_type = curseg->seg_type;
+
+ sanity_check_seg_type(sbi, seg_type);
+
/* if segs_per_sec is large than 1, we need to keep original policy. */
if (__is_large_section(sbi))
- return CURSEG_I(sbi, type)->segno;
+ return curseg->segno;
+
+ /* inmem log may not locate on any segment after mount */
+ if (!curseg->inited)
+ return 0;
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
return 0;
if (test_opt(sbi, NOHEAP) &&
- (type == CURSEG_HOT_DATA || IS_NODESEG(type)))
+ (seg_type == CURSEG_HOT_DATA || IS_NODESEG(seg_type)))
return 0;
if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
@@ -2530,7 +2584,7 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
return 0;
- return CURSEG_I(sbi, type)->segno;
+ return curseg->segno;
}
/*
@@ -2540,12 +2594,14 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
+ unsigned short seg_type = curseg->seg_type;
unsigned int segno = curseg->segno;
int dir = ALLOC_LEFT;
- write_sum_page(sbi, curseg->sum_blk,
+ if (curseg->inited)
+ write_sum_page(sbi, curseg->sum_blk,
GET_SUM_BLOCK(sbi, segno));
- if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
+ if (seg_type == CURSEG_WARM_DATA || seg_type == CURSEG_COLD_DATA)
dir = ALLOC_RIGHT;
if (test_opt(sbi, NOHEAP))
@@ -2594,7 +2650,7 @@ static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
* This function always allocates a used segment(from dirty seglist) by SSR
* manner, so it should recover the existing segment information of valid blocks
*/
-static void change_curseg(struct f2fs_sb_info *sbi, int type)
+static void change_curseg(struct f2fs_sb_info *sbi, int type, bool flush)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, type);
@@ -2602,8 +2658,10 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type)
struct f2fs_summary_block *sum_node;
struct page *sum_page;
- write_sum_page(sbi, curseg->sum_blk,
- GET_SUM_BLOCK(sbi, curseg->segno));
+ if (flush)
+ write_sum_page(sbi, curseg->sum_blk,
+ GET_SUM_BLOCK(sbi, curseg->segno));
+
__set_test_and_inuse(sbi, new_segno);
mutex_lock(&dirty_i->seglist_lock);
@@ -2616,29 +2674,139 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type)
__next_free_blkoff(sbi, curseg, 0);
sum_page = f2fs_get_sum_page(sbi, new_segno);
- f2fs_bug_on(sbi, IS_ERR(sum_page));
+ if (IS_ERR(sum_page)) {
+ /* GC won't be able to use stale summary pages by cp_error */
+ memset(curseg->sum_blk, 0, SUM_ENTRY_SIZE);
+ return;
+ }
sum_node = (struct f2fs_summary_block *)page_address(sum_page);
memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
f2fs_put_page(sum_page, 1);
}
-static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
+static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
+ int alloc_mode, unsigned long long age);
+
+static void get_atssr_segment(struct f2fs_sb_info *sbi, int type,
+ int target_type, int alloc_mode,
+ unsigned long long age)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+
+ curseg->seg_type = target_type;
+
+ if (get_ssr_segment(sbi, type, alloc_mode, age)) {
+ struct seg_entry *se = get_seg_entry(sbi, curseg->next_segno);
+
+ curseg->seg_type = se->type;
+ change_curseg(sbi, type, true);
+ } else {
+ /* allocate cold segment by default */
+ curseg->seg_type = CURSEG_COLD_DATA;
+ new_curseg(sbi, type, true);
+ }
+ stat_inc_seg_type(sbi, curseg);
+}
+
+static void __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC);
+
+ if (!sbi->am.atgc_enabled)
+ return;
+
+ down_read(&SM_I(sbi)->curseg_lock);
+
+ mutex_lock(&curseg->curseg_mutex);
+ down_write(&SIT_I(sbi)->sentry_lock);
+
+ get_atssr_segment(sbi, CURSEG_ALL_DATA_ATGC, CURSEG_COLD_DATA, SSR, 0);
+
+ up_write(&SIT_I(sbi)->sentry_lock);
+ mutex_unlock(&curseg->curseg_mutex);
+
+ up_read(&SM_I(sbi)->curseg_lock);
+
+}
+void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi)
+{
+ __f2fs_init_atgc_curseg(sbi);
+}
+
+static void __f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+
+ mutex_lock(&curseg->curseg_mutex);
+ if (!curseg->inited)
+ goto out;
+
+ if (get_valid_blocks(sbi, curseg->segno, false)) {
+ write_sum_page(sbi, curseg->sum_blk,
+ GET_SUM_BLOCK(sbi, curseg->segno));
+ } else {
+ mutex_lock(&DIRTY_I(sbi)->seglist_lock);
+ __set_test_and_free(sbi, curseg->segno, true);
+ mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
+ }
+out:
+ mutex_unlock(&curseg->curseg_mutex);
+}
+
+void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi)
+{
+ __f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
+
+ if (sbi->am.atgc_enabled)
+ __f2fs_save_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
+}
+
+static void __f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+
+ mutex_lock(&curseg->curseg_mutex);
+ if (!curseg->inited)
+ goto out;
+ if (get_valid_blocks(sbi, curseg->segno, false))
+ goto out;
+
+ mutex_lock(&DIRTY_I(sbi)->seglist_lock);
+ __set_test_and_inuse(sbi, curseg->segno);
+ mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
+out:
+ mutex_unlock(&curseg->curseg_mutex);
+}
+
+void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi)
+{
+ __f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
+
+ if (sbi->am.atgc_enabled)
+ __f2fs_restore_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
+}
+
+static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
+ int alloc_mode, unsigned long long age)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
unsigned segno = NULL_SEGNO;
+ unsigned short seg_type = curseg->seg_type;
int i, cnt;
bool reversed = false;
+ sanity_check_seg_type(sbi, seg_type);
+
/* f2fs_need_SSR() already forces to do this */
- if (!v_ops->get_victim(sbi, &segno, BG_GC, type, SSR)) {
+ if (!v_ops->get_victim(sbi, &segno, BG_GC, seg_type, alloc_mode, age)) {
curseg->next_segno = segno;
return 1;
}
/* For node segments, let's do SSR more intensively */
- if (IS_NODESEG(type)) {
- if (type >= CURSEG_WARM_NODE) {
+ if (IS_NODESEG(seg_type)) {
+ if (seg_type >= CURSEG_WARM_NODE) {
reversed = true;
i = CURSEG_COLD_NODE;
} else {
@@ -2646,7 +2814,7 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
}
cnt = NR_CURSEG_NODE_TYPE;
} else {
- if (type >= CURSEG_WARM_DATA) {
+ if (seg_type >= CURSEG_WARM_DATA) {
reversed = true;
i = CURSEG_COLD_DATA;
} else {
@@ -2656,9 +2824,9 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
}
for (; cnt-- > 0; reversed ? i-- : i++) {
- if (i == type)
+ if (i == seg_type)
continue;
- if (!v_ops->get_victim(sbi, &segno, BG_GC, i, SSR)) {
+ if (!v_ops->get_victim(sbi, &segno, BG_GC, i, alloc_mode, age)) {
curseg->next_segno = segno;
return 1;
}
@@ -2687,13 +2855,15 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
if (force)
new_curseg(sbi, type, true);
else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
- type == CURSEG_WARM_NODE)
+ curseg->seg_type == CURSEG_WARM_NODE)
new_curseg(sbi, type, false);
- else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type) &&
+ else if (curseg->alloc_type == LFS &&
+ is_next_segment_free(sbi, curseg, type) &&
likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
new_curseg(sbi, type, false);
- else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
- change_curseg(sbi, type);
+ else if (f2fs_need_SSR(sbi) &&
+ get_ssr_segment(sbi, type, SSR, 0))
+ change_curseg(sbi, type, true);
else
new_curseg(sbi, type, false);
@@ -2714,8 +2884,8 @@ void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
if (segno < start || segno > end)
goto unlock;
- if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
- change_curseg(sbi, type);
+ if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type, SSR, 0))
+ change_curseg(sbi, type, true);
else
new_curseg(sbi, type, true);
@@ -2738,11 +2908,15 @@ static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type)
struct curseg_info *curseg = CURSEG_I(sbi, type);
unsigned int old_segno;
+ if (!curseg->inited)
+ goto alloc;
+
if (!curseg->next_blkoff &&
!get_valid_blocks(sbi, curseg->segno, false) &&
!get_ckpt_valid_blocks(sbi, curseg->segno))
return;
+alloc:
old_segno = curseg->segno;
SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true);
locate_dirty_segment(sbi, old_segno);
@@ -2806,7 +2980,7 @@ next:
mutex_lock(&dcc->cmd_lock);
if (unlikely(dcc->rbtree_check))
f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
- &dcc->root));
+ &dcc->root, false));
dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
NULL, start,
@@ -2930,12 +3104,11 @@ out:
return err;
}
-static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
+static bool __has_curseg_space(struct f2fs_sb_info *sbi,
+ struct curseg_info *curseg)
{
- struct curseg_info *curseg = CURSEG_I(sbi, type);
- if (curseg->next_blkoff < sbi->blocks_per_seg)
- return true;
- return false;
+ return curseg->next_blkoff < f2fs_usable_blks_in_seg(sbi,
+ curseg->segno);
}
int f2fs_rw_hint_to_seg_type(enum rw_hint hint)
@@ -3075,8 +3248,13 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
if (fio->type == DATA) {
struct inode *inode = fio->page->mapping->host;
- if (is_cold_data(fio->page) || file_is_cold(inode) ||
- f2fs_compressed_file(inode))
+ if (is_cold_data(fio->page)) {
+ if (fio->sbi->am.atgc_enabled)
+ return CURSEG_ALL_DATA_ATGC;
+ else
+ return CURSEG_COLD_DATA;
+ }
+ if (file_is_cold(inode) || f2fs_compressed_file(inode))
return CURSEG_COLD_DATA;
if (file_is_hot(inode) ||
is_inode_flag_set(inode, FI_HOT_DATA) ||
@@ -3126,27 +3304,25 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
{
struct sit_info *sit_i = SIT_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, type);
- bool put_pin_sem = false;
-
- if (type == CURSEG_COLD_DATA) {
- /* GC during CURSEG_COLD_DATA_PINNED allocation */
- if (down_read_trylock(&sbi->pin_sem)) {
- put_pin_sem = true;
- } else {
- type = CURSEG_WARM_DATA;
- curseg = CURSEG_I(sbi, type);
- }
- } else if (type == CURSEG_COLD_DATA_PINNED) {
- type = CURSEG_COLD_DATA;
- }
+ unsigned long long old_mtime;
+ bool from_gc = (type == CURSEG_ALL_DATA_ATGC);
+ struct seg_entry *se = NULL;
down_read(&SM_I(sbi)->curseg_lock);
mutex_lock(&curseg->curseg_mutex);
down_write(&sit_i->sentry_lock);
+ if (from_gc) {
+ f2fs_bug_on(sbi, GET_SEGNO(sbi, old_blkaddr) == NULL_SEGNO);
+ se = get_seg_entry(sbi, GET_SEGNO(sbi, old_blkaddr));
+ sanity_check_seg_type(sbi, se->type);
+ f2fs_bug_on(sbi, IS_NODESEG(se->type));
+ }
*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
+ f2fs_bug_on(sbi, curseg->next_blkoff >= sbi->blocks_per_seg);
+
f2fs_wait_discard_bio(sbi, *new_blkaddr);
/*
@@ -3160,6 +3336,14 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
stat_inc_block_count(sbi, curseg);
+ if (from_gc) {
+ old_mtime = get_segment_mtime(sbi, old_blkaddr);
+ } else {
+ update_segment_mtime(sbi, old_blkaddr, 0);
+ old_mtime = 0;
+ }
+ update_segment_mtime(sbi, *new_blkaddr, old_mtime);
+
/*
* SIT information should be updated before segment allocation,
* since SSR needs latest valid block information.
@@ -3168,9 +3352,13 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
update_sit_entry(sbi, old_blkaddr, -1);
- if (!__has_curseg_space(sbi, type))
- sit_i->s_ops->allocate_segment(sbi, type, false);
-
+ if (!__has_curseg_space(sbi, curseg)) {
+ if (from_gc)
+ get_atssr_segment(sbi, type, se->type,
+ AT_SSR, se->mtime);
+ else
+ sit_i->s_ops->allocate_segment(sbi, type, false);
+ }
/*
* segment dirty status should be updated after segment allocation,
* so we just need to update status only one time after previous
@@ -3204,9 +3392,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
mutex_unlock(&curseg->curseg_mutex);
up_read(&SM_I(sbi)->curseg_lock);
-
- if (put_pin_sem)
- up_read(&sbi->pin_sem);
}
static void update_device_state(struct f2fs_io_info *fio)
@@ -3355,7 +3540,8 @@ static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
block_t old_blkaddr, block_t new_blkaddr,
- bool recover_curseg, bool recover_newaddr)
+ bool recover_curseg, bool recover_newaddr,
+ bool from_gc)
{
struct sit_info *sit_i = SIT_I(sbi);
struct curseg_info *curseg;
@@ -3400,17 +3586,22 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
/* change the current segment */
if (segno != curseg->segno) {
curseg->next_segno = segno;
- change_curseg(sbi, type);
+ change_curseg(sbi, type, true);
}
curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
__add_sum_entry(sbi, type, sum);
- if (!recover_curseg || recover_newaddr)
+ if (!recover_curseg || recover_newaddr) {
+ if (!from_gc)
+ update_segment_mtime(sbi, new_blkaddr, 0);
update_sit_entry(sbi, new_blkaddr, 1);
+ }
if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
invalidate_mapping_pages(META_MAPPING(sbi),
old_blkaddr, old_blkaddr);
+ if (!from_gc)
+ update_segment_mtime(sbi, old_blkaddr, 0);
update_sit_entry(sbi, old_blkaddr, -1);
}
@@ -3422,7 +3613,7 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
if (recover_curseg) {
if (old_cursegno != curseg->segno) {
curseg->next_segno = old_cursegno;
- change_curseg(sbi, type);
+ change_curseg(sbi, type, true);
}
curseg->next_blkoff = old_blkoff;
}
@@ -3442,7 +3633,7 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
set_summary(&sum, dn->nid, dn->ofs_in_node, version);
f2fs_do_replace_block(sbi, &sum, old_addr, new_addr,
- recover_curseg, recover_newaddr);
+ recover_curseg, recover_newaddr, false);
f2fs_update_data_blkaddr(dn, new_addr);
}
@@ -3579,7 +3770,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
CURSEG_HOT_DATA]);
if (__exist_node_summaries(sbi))
- blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
+ blk_addr = sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type);
else
blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
} else {
@@ -3657,8 +3848,9 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
}
if (__exist_node_summaries(sbi))
- f2fs_ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
- NR_CURSEG_TYPE - type, META_CP, true);
+ f2fs_ra_meta_pages(sbi,
+ sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type),
+ NR_CURSEG_PERSIST_TYPE - type, META_CP, true);
for (; type <= CURSEG_COLD_NODE; type++) {
err = read_normal_summaries(sbi, type);
@@ -3786,7 +3978,7 @@ int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
unsigned int segno)
{
- return f2fs_get_meta_page_nofail(sbi, current_sit_addr(sbi, segno));
+ return f2fs_get_meta_page(sbi, current_sit_addr(sbi, segno));
}
static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
@@ -4160,14 +4352,14 @@ static int build_curseg(struct f2fs_sb_info *sbi)
struct curseg_info *array;
int i;
- array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE, sizeof(*array)),
- GFP_KERNEL);
+ array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE,
+ sizeof(*array)), GFP_KERNEL);
if (!array)
return -ENOMEM;
SM_I(sbi)->curseg_array = array;
- for (i = 0; i < NR_CURSEG_TYPE; i++) {
+ for (i = 0; i < NO_CHECK_TYPE; i++) {
mutex_init(&array[i].curseg_mutex);
array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
if (!array[i].sum_blk)
@@ -4177,8 +4369,15 @@ static int build_curseg(struct f2fs_sb_info *sbi)
sizeof(struct f2fs_journal), GFP_KERNEL);
if (!array[i].journal)
return -ENOMEM;
+ if (i < NR_PERSISTENT_LOG)
+ array[i].seg_type = CURSEG_HOT_DATA + i;
+ else if (i == CURSEG_COLD_DATA_PINNED)
+ array[i].seg_type = CURSEG_COLD_DATA;
+ else if (i == CURSEG_ALL_DATA_ATGC)
+ array[i].seg_type = CURSEG_COLD_DATA;
array[i].segno = NULL_SEGNO;
array[i].next_blkoff = 0;
+ array[i].inited = false;
}
return restore_curseg_summaries(sbi);
}
@@ -4299,9 +4498,12 @@ static void init_free_segmap(struct f2fs_sb_info *sbi)
{
unsigned int start;
int type;
+ struct seg_entry *sentry;
for (start = 0; start < MAIN_SEGS(sbi); start++) {
- struct seg_entry *sentry = get_seg_entry(sbi, start);
+ if (f2fs_usable_blks_in_seg(sbi, start) == 0)
+ continue;
+ sentry = get_seg_entry(sbi, start);
if (!sentry->valid_blocks)
__set_free(sbi, start);
else
@@ -4321,7 +4523,7 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi)
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
struct free_segmap_info *free_i = FREE_I(sbi);
unsigned int segno = 0, offset = 0, secno;
- block_t valid_blocks;
+ block_t valid_blocks, usable_blks_in_seg;
block_t blks_per_sec = BLKS_PER_SEC(sbi);
while (1) {
@@ -4331,9 +4533,10 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi)
break;
offset = segno + 1;
valid_blocks = get_valid_blocks(sbi, segno, false);
- if (valid_blocks == sbi->blocks_per_seg || !valid_blocks)
+ usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno);
+ if (valid_blocks == usable_blks_in_seg || !valid_blocks)
continue;
- if (valid_blocks > sbi->blocks_per_seg) {
+ if (valid_blocks > usable_blks_in_seg) {
f2fs_bug_on(sbi, 1);
continue;
}
@@ -4413,11 +4616,13 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi)
* In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr;
* In LFS curseg, all blkaddr after .next_blkoff should be unused.
*/
- for (i = 0; i < NO_CHECK_TYPE; i++) {
+ for (i = 0; i < NR_PERSISTENT_LOG; i++) {
struct curseg_info *curseg = CURSEG_I(sbi, i);
struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
unsigned int blkofs = curseg->next_blkoff;
+ sanity_check_seg_type(sbi, curseg->seg_type);
+
if (f2fs_test_bit(blkofs, se->cur_valid_map))
goto out;
@@ -4642,7 +4847,7 @@ int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
{
int i, ret;
- for (i = 0; i < NO_CHECK_TYPE; i++) {
+ for (i = 0; i < NR_PERSISTENT_LOG; i++) {
ret = fix_curseg_write_pointer(sbi, i);
if (ret)
return ret;
@@ -4683,6 +4888,101 @@ int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
return 0;
}
+
+static bool is_conv_zone(struct f2fs_sb_info *sbi, unsigned int zone_idx,
+ unsigned int dev_idx)
+{
+ if (!bdev_is_zoned(FDEV(dev_idx).bdev))
+ return true;
+ return !test_bit(zone_idx, FDEV(dev_idx).blkz_seq);
+}
+
+/* Return the zone index in the given device */
+static unsigned int get_zone_idx(struct f2fs_sb_info *sbi, unsigned int secno,
+ int dev_idx)
+{
+ block_t sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno));
+
+ return (sec_start_blkaddr - FDEV(dev_idx).start_blk) >>
+ sbi->log_blocks_per_blkz;
+}
+
+/*
+ * Return the usable segments in a section based on the zone's
+ * corresponding zone capacity. Zone is equal to a section.
+ */
+static inline unsigned int f2fs_usable_zone_segs_in_sec(
+ struct f2fs_sb_info *sbi, unsigned int segno)
+{
+ unsigned int dev_idx, zone_idx, unusable_segs_in_sec;
+
+ dev_idx = f2fs_target_device_index(sbi, START_BLOCK(sbi, segno));
+ zone_idx = get_zone_idx(sbi, GET_SEC_FROM_SEG(sbi, segno), dev_idx);
+
+ /* Conventional zone's capacity is always equal to zone size */
+ if (is_conv_zone(sbi, zone_idx, dev_idx))
+ return sbi->segs_per_sec;
+
+ /*
+ * If the zone_capacity_blocks array is NULL, then zone capacity
+ * is equal to the zone size for all zones
+ */
+ if (!FDEV(dev_idx).zone_capacity_blocks)
+ return sbi->segs_per_sec;
+
+ /* Get the segment count beyond zone capacity block */
+ unusable_segs_in_sec = (sbi->blocks_per_blkz -
+ FDEV(dev_idx).zone_capacity_blocks[zone_idx]) >>
+ sbi->log_blocks_per_seg;
+ return sbi->segs_per_sec - unusable_segs_in_sec;
+}
+
+/*
+ * Return the number of usable blocks in a segment. The number of blocks
+ * returned is always equal to the number of blocks in a segment for
+ * segments fully contained within a sequential zone capacity or a
+ * conventional zone. For segments partially contained in a sequential
+ * zone capacity, the number of usable blocks up to the zone capacity
+ * is returned. 0 is returned in all other cases.
+ */
+static inline unsigned int f2fs_usable_zone_blks_in_seg(
+ struct f2fs_sb_info *sbi, unsigned int segno)
+{
+ block_t seg_start, sec_start_blkaddr, sec_cap_blkaddr;
+ unsigned int zone_idx, dev_idx, secno;
+
+ secno = GET_SEC_FROM_SEG(sbi, segno);
+ seg_start = START_BLOCK(sbi, segno);
+ dev_idx = f2fs_target_device_index(sbi, seg_start);
+ zone_idx = get_zone_idx(sbi, secno, dev_idx);
+
+ /*
+ * Conventional zone's capacity is always equal to zone size,
+ * so, blocks per segment is unchanged.
+ */
+ if (is_conv_zone(sbi, zone_idx, dev_idx))
+ return sbi->blocks_per_seg;
+
+ if (!FDEV(dev_idx).zone_capacity_blocks)
+ return sbi->blocks_per_seg;
+
+ sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno));
+ sec_cap_blkaddr = sec_start_blkaddr +
+ FDEV(dev_idx).zone_capacity_blocks[zone_idx];
+
+ /*
+ * If segment starts before zone capacity and spans beyond
+ * zone capacity, then usable blocks are from seg start to
+ * zone capacity. If the segment starts after the zone capacity,
+ * then there are no usable blocks.
+ */
+ if (seg_start >= sec_cap_blkaddr)
+ return 0;
+ if (seg_start + sbi->blocks_per_seg > sec_cap_blkaddr)
+ return sec_cap_blkaddr - seg_start;
+
+ return sbi->blocks_per_seg;
+}
#else
int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
{
@@ -4693,7 +4993,36 @@ int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
{
return 0;
}
+
+static inline unsigned int f2fs_usable_zone_blks_in_seg(struct f2fs_sb_info *sbi,
+ unsigned int segno)
+{
+ return 0;
+}
+
+static inline unsigned int f2fs_usable_zone_segs_in_sec(struct f2fs_sb_info *sbi,
+ unsigned int segno)
+{
+ return 0;
+}
#endif
+unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
+ unsigned int segno)
+{
+ if (f2fs_sb_has_blkzoned(sbi))
+ return f2fs_usable_zone_blks_in_seg(sbi, segno);
+
+ return sbi->blocks_per_seg;
+}
+
+unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
+ unsigned int segno)
+{
+ if (f2fs_sb_has_blkzoned(sbi))
+ return f2fs_usable_zone_segs_in_sec(sbi, segno);
+
+ return sbi->segs_per_sec;
+}
/*
* Update min, max modified time for cost-benefit GC algorithm
@@ -4720,6 +5049,7 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi)
sit_i->min_mtime = mtime;
}
sit_i->max_mtime = get_mtime(sbi, false);
+ sit_i->dirty_max_mtime = 0;
up_write(&sit_i->sentry_lock);
}
@@ -4835,7 +5165,7 @@ static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
destroy_victim_secmap(sbi);
SM_I(sbi)->dirty_info = NULL;
- kvfree(dirty_i);
+ kfree(dirty_i);
}
static void destroy_curseg(struct f2fs_sb_info *sbi)
@@ -4847,10 +5177,10 @@ static void destroy_curseg(struct f2fs_sb_info *sbi)
return;
SM_I(sbi)->curseg_array = NULL;
for (i = 0; i < NR_CURSEG_TYPE; i++) {
- kvfree(array[i].sum_blk);
- kvfree(array[i].journal);
+ kfree(array[i].sum_blk);
+ kfree(array[i].journal);
}
- kvfree(array);
+ kfree(array);
}
static void destroy_free_segmap(struct f2fs_sb_info *sbi)
@@ -4861,7 +5191,7 @@ static void destroy_free_segmap(struct f2fs_sb_info *sbi)
SM_I(sbi)->free_info = NULL;
kvfree(free_i->free_segmap);
kvfree(free_i->free_secmap);
- kvfree(free_i);
+ kfree(free_i);
}
static void destroy_sit_info(struct f2fs_sb_info *sbi)
@@ -4873,7 +5203,7 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
if (sit_i->sentries)
kvfree(sit_i->bitmap);
- kvfree(sit_i->tmp_map);
+ kfree(sit_i->tmp_map);
kvfree(sit_i->sentries);
kvfree(sit_i->sec_entries);
@@ -4885,7 +5215,7 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
kvfree(sit_i->sit_bitmap_mir);
kvfree(sit_i->invalid_segmap);
#endif
- kvfree(sit_i);
+ kfree(sit_i);
}
void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
@@ -4901,7 +5231,7 @@ void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
destroy_free_segmap(sbi);
destroy_sit_info(sbi);
sbi->sm_info = NULL;
- kvfree(sm_info);
+ kfree(sm_info);
}
int __init f2fs_create_segment_manager_caches(void)
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 752b177073b2..e81eb0748e2a 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -16,13 +16,20 @@
#define DEF_MAX_RECLAIM_PREFREE_SEGMENTS 4096 /* 8GB in maximum */
#define F2FS_MIN_SEGMENTS 9 /* SB + 2 (CP + SIT + NAT) + SSA + MAIN */
+#define F2FS_MIN_META_SEGMENTS 8 /* SB + 2 (CP + SIT + NAT) + SSA */
/* L: Logical segment # in volume, R: Relative segment # in main area */
#define GET_L2R_SEGNO(free_i, segno) ((segno) - (free_i)->start_segno)
#define GET_R2L_SEGNO(free_i, segno) ((segno) + (free_i)->start_segno)
#define IS_DATASEG(t) ((t) <= CURSEG_COLD_DATA)
-#define IS_NODESEG(t) ((t) >= CURSEG_HOT_NODE)
+#define IS_NODESEG(t) ((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
+
+static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
+ unsigned short seg_type)
+{
+ f2fs_bug_on(sbi, seg_type >= NR_PERSISTENT_LOG);
+}
#define IS_HOT(t) ((t) == CURSEG_HOT_NODE || (t) == CURSEG_HOT_DATA)
#define IS_WARM(t) ((t) == CURSEG_WARM_NODE || (t) == CURSEG_WARM_DATA)
@@ -34,7 +41,9 @@
((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \
((seg) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \
((seg) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \
- ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno))
+ ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno) || \
+ ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno) || \
+ ((seg) == CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC)->segno))
#define IS_CURSEC(sbi, secno) \
(((secno) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \
@@ -48,7 +57,11 @@
((secno) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \
(sbi)->segs_per_sec) || \
((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \
- (sbi)->segs_per_sec)) \
+ (sbi)->segs_per_sec) || \
+ ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno / \
+ (sbi)->segs_per_sec) || \
+ ((secno) == CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC)->segno / \
+ (sbi)->segs_per_sec))
#define MAIN_BLKADDR(sbi) \
(SM_I(sbi) ? SM_I(sbi)->main_blkaddr : \
@@ -132,20 +145,25 @@ enum {
* In the victim_sel_policy->alloc_mode, there are two block allocation modes.
* LFS writes data sequentially with cleaning operations.
* SSR (Slack Space Recycle) reuses obsolete space without cleaning operations.
+ * AT_SSR (Age Threshold based Slack Space Recycle) merges fragments into
+ * fragmented segment which has similar aging degree.
*/
enum {
LFS = 0,
- SSR
+ SSR,
+ AT_SSR,
};
/*
* In the victim_sel_policy->gc_mode, there are two gc, aka cleaning, modes.
* GC_CB is based on cost-benefit algorithm.
* GC_GREEDY is based on greedy algorithm.
+ * GC_AT is based on age-threshold algorithm.
*/
enum {
GC_CB = 0,
GC_GREEDY,
+ GC_AT,
ALLOC_NEXT,
FLUSH_DEVICE,
MAX_GC_POLICY,
@@ -174,7 +192,10 @@ struct victim_sel_policy {
unsigned int offset; /* last scanned bitmap offset */
unsigned int ofs_unit; /* bitmap search unit */
unsigned int min_cost; /* minimum cost */
+ unsigned long long oldest_age; /* oldest age of segments having the same min cost */
unsigned int min_segno; /* segment # having min. cost */
+ unsigned long long age; /* mtime of GCed section*/
+ unsigned long long age_threshold;/* age threshold */
};
struct seg_entry {
@@ -240,6 +261,8 @@ struct sit_info {
unsigned long long mounted_time; /* mount time */
unsigned long long min_mtime; /* min. modification time */
unsigned long long max_mtime; /* max. modification time */
+ unsigned long long dirty_min_mtime; /* rerange candidates in GC_AT */
+ unsigned long long dirty_max_mtime; /* rerange candidates in GC_AT */
unsigned int last_victim[MAX_GC_POLICY]; /* last victim segment # */
};
@@ -278,7 +301,7 @@ struct dirty_seglist_info {
/* victim selection function for cleaning and SSR */
struct victim_selection {
int (*get_victim)(struct f2fs_sb_info *, unsigned int *,
- int, int, char);
+ int, int, char, unsigned long long);
};
/* for active log information */
@@ -288,10 +311,12 @@ struct curseg_info {
struct rw_semaphore journal_rwsem; /* protect journal area */
struct f2fs_journal *journal; /* cached journal info */
unsigned char alloc_type; /* current allocation type */
+ unsigned short seg_type; /* segment type like CURSEG_XXX_TYPE */
unsigned int segno; /* current segment number */
unsigned short next_blkoff; /* next block offset to write */
unsigned int zone; /* current zone number */
unsigned int next_segno; /* preallocated segment */
+ bool inited; /* indicate inmem log is inited */
};
struct sit_entry_set {
@@ -305,8 +330,6 @@ struct sit_entry_set {
*/
static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type)
{
- if (type == CURSEG_COLD_DATA_PINNED)
- type = CURSEG_COLD_DATA;
return (struct curseg_info *)(SM_I(sbi)->curseg_array + type);
}
@@ -411,6 +434,7 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno)
unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno);
unsigned int next;
+ unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi, segno);
spin_lock(&free_i->segmap_lock);
clear_bit(segno, free_i->free_segmap);
@@ -418,7 +442,7 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno)
next = find_next_bit(free_i->free_segmap,
start_segno + sbi->segs_per_sec, start_segno);
- if (next >= start_segno + sbi->segs_per_sec) {
+ if (next >= start_segno + usable_segs) {
clear_bit(secno, free_i->free_secmap);
free_i->free_sections++;
}
@@ -438,22 +462,23 @@ static inline void __set_inuse(struct f2fs_sb_info *sbi,
}
static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
- unsigned int segno)
+ unsigned int segno, bool inmem)
{
struct free_segmap_info *free_i = FREE_I(sbi);
unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno);
unsigned int next;
+ unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi, segno);
spin_lock(&free_i->segmap_lock);
if (test_and_clear_bit(segno, free_i->free_segmap)) {
free_i->free_segments++;
- if (IS_CURSEC(sbi, secno))
+ if (!inmem && IS_CURSEC(sbi, secno))
goto skip_free;
next = find_next_bit(free_i->free_segmap,
start_segno + sbi->segs_per_sec, start_segno);
- if (next >= start_segno + sbi->segs_per_sec) {
+ if (next >= start_segno + usable_segs) {
if (test_and_clear_bit(secno, free_i->free_secmap))
free_i->free_sections++;
}
@@ -500,7 +525,7 @@ static inline unsigned int free_segments(struct f2fs_sb_info *sbi)
return FREE_I(sbi)->free_segments;
}
-static inline int reserved_segments(struct f2fs_sb_info *sbi)
+static inline unsigned int reserved_segments(struct f2fs_sb_info *sbi)
{
return SM_I(sbi)->reserved_segments;
}
@@ -532,7 +557,7 @@ static inline int overprovision_segments(struct f2fs_sb_info *sbi)
static inline int reserved_sections(struct f2fs_sb_info *sbi)
{
- return GET_SEC_FROM_SEG(sbi, (unsigned int)reserved_segments(sbi));
+ return GET_SEC_FROM_SEG(sbi, reserved_segments(sbi));
}
static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi)
@@ -546,8 +571,8 @@ static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi)
/* check current node segment */
for (i = CURSEG_HOT_NODE; i <= CURSEG_COLD_NODE; i++) {
segno = CURSEG_I(sbi, i)->segno;
- left_blocks = sbi->blocks_per_seg -
- get_seg_entry(sbi, segno)->ckpt_valid_blocks;
+ left_blocks = f2fs_usable_blks_in_seg(sbi, segno) -
+ get_seg_entry(sbi, segno)->ckpt_valid_blocks;
if (node_blocks > left_blocks)
return false;
@@ -555,7 +580,7 @@ static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi)
/* check current data segment */
segno = CURSEG_I(sbi, CURSEG_HOT_DATA)->segno;
- left_blocks = sbi->blocks_per_seg -
+ left_blocks = f2fs_usable_blks_in_seg(sbi, segno) -
get_seg_entry(sbi, segno)->ckpt_valid_blocks;
if (dent_blocks > left_blocks)
return false;
@@ -677,21 +702,22 @@ static inline int check_block_count(struct f2fs_sb_info *sbi,
bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false;
int valid_blocks = 0;
int cur_pos = 0, next_pos;
+ unsigned int usable_blks_per_seg = f2fs_usable_blks_in_seg(sbi, segno);
/* check bitmap with valid block count */
do {
if (is_valid) {
next_pos = find_next_zero_bit_le(&raw_sit->valid_map,
- sbi->blocks_per_seg,
+ usable_blks_per_seg,
cur_pos);
valid_blocks += next_pos - cur_pos;
} else
next_pos = find_next_bit_le(&raw_sit->valid_map,
- sbi->blocks_per_seg,
+ usable_blks_per_seg,
cur_pos);
cur_pos = next_pos;
is_valid = !is_valid;
- } while (cur_pos < sbi->blocks_per_seg);
+ } while (cur_pos < usable_blks_per_seg);
if (unlikely(GET_SIT_VBLOCKS(raw_sit) != valid_blocks)) {
f2fs_err(sbi, "Mismatch valid blocks %d vs. %d",
@@ -700,8 +726,13 @@ static inline int check_block_count(struct f2fs_sb_info *sbi,
return -EFSCORRUPTED;
}
+ if (usable_blks_per_seg < sbi->blocks_per_seg)
+ f2fs_bug_on(sbi, find_next_bit_le(&raw_sit->valid_map,
+ sbi->blocks_per_seg,
+ usable_blks_per_seg) != sbi->blocks_per_seg);
+
/* check segment usage, and check boundary of a given segment number */
- if (unlikely(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg
+ if (unlikely(GET_SIT_VBLOCKS(raw_sit) > usable_blks_per_seg
|| segno > TOTAL_SEGS(sbi) - 1)) {
f2fs_err(sbi, "Wrong valid blocks %d or segno %u",
GET_SIT_VBLOCKS(raw_sit), segno);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 44847058936b..ddd06565cac6 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -146,6 +146,7 @@ enum {
Opt_compress_algorithm,
Opt_compress_log_size,
Opt_compress_extension,
+ Opt_atgc,
Opt_err,
};
@@ -213,6 +214,7 @@ static match_table_t f2fs_tokens = {
{Opt_compress_algorithm, "compress_algorithm=%s"},
{Opt_compress_log_size, "compress_log_size=%u"},
{Opt_compress_extension, "compress_extension=%s"},
+ {Opt_atgc, "atgc"},
{Opt_err, NULL},
};
@@ -580,7 +582,8 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
case Opt_active_logs:
if (args->from && match_int(args, &arg))
return -EINVAL;
- if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE)
+ if (arg != 2 && arg != 4 &&
+ arg != NR_CURSEG_PERSIST_TYPE)
return -EINVAL;
F2FS_OPTION(sbi).active_logs = arg;
break;
@@ -868,8 +871,8 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
#ifdef CONFIG_F2FS_FS_COMPRESSION
case Opt_compress_algorithm:
if (!f2fs_sb_has_compression(sbi)) {
- f2fs_err(sbi, "Compression feature if off");
- return -EINVAL;
+ f2fs_info(sbi, "Image doesn't support compression");
+ break;
}
name = match_strdup(&args[0]);
if (!name)
@@ -894,8 +897,8 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
break;
case Opt_compress_log_size:
if (!f2fs_sb_has_compression(sbi)) {
- f2fs_err(sbi, "Compression feature is off");
- return -EINVAL;
+ f2fs_info(sbi, "Image doesn't support compression");
+ break;
}
if (args->from && match_int(args, &arg))
return -EINVAL;
@@ -909,8 +912,8 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
break;
case Opt_compress_extension:
if (!f2fs_sb_has_compression(sbi)) {
- f2fs_err(sbi, "Compression feature is off");
- return -EINVAL;
+ f2fs_info(sbi, "Image doesn't support compression");
+ break;
}
name = match_strdup(&args[0]);
if (!name)
@@ -938,6 +941,9 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
f2fs_info(sbi, "compression options not supported");
break;
#endif
+ case Opt_atgc:
+ set_opt(sbi, ATGC);
+ break;
default:
f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
p);
@@ -964,6 +970,17 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
return -EINVAL;
}
#endif
+ /*
+ * The BLKZONED feature indicates that the drive was formatted with
+ * zone alignment optimization. This is optional for host-aware
+ * devices, but mandatory for host-managed zoned block devices.
+ */
+#ifndef CONFIG_BLK_DEV_ZONED
+ if (f2fs_sb_has_blkzoned(sbi)) {
+ f2fs_err(sbi, "Zoned block device support is not enabled");
+ return -EINVAL;
+ }
+#endif
if (F2FS_IO_SIZE_BITS(sbi) && !f2fs_lfs_mode(sbi)) {
f2fs_err(sbi, "Should set mode=lfs with %uKB-sized IO",
@@ -1001,7 +1018,7 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
}
/* Not pass down write hints if the number of active logs is lesser
- * than NR_CURSEG_TYPE.
+ * than NR_CURSEG_PERSIST_TYPE.
*/
if (F2FS_OPTION(sbi).active_logs != NR_CURSEG_TYPE)
F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
@@ -1020,6 +1037,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
/* Initialize f2fs-specific inode info */
atomic_set(&fi->dirty_pages, 0);
+ atomic_set(&fi->i_compr_blocks, 0);
init_rwsem(&fi->i_sem);
spin_lock_init(&fi->i_size_lock);
INIT_LIST_HEAD(&fi->dirty_list);
@@ -1184,6 +1202,7 @@ static void destroy_device_list(struct f2fs_sb_info *sbi)
blkdev_put(FDEV(i).bdev, FMODE_EXCL);
#ifdef CONFIG_BLK_DEV_ZONED
kvfree(FDEV(i).blkz_seq);
+ kfree(FDEV(i).zone_capacity_blocks);
#endif
}
kvfree(sbi->devs);
@@ -1269,6 +1288,7 @@ static void f2fs_put_super(struct super_block *sb)
kfree(sbi->raw_super);
destroy_device_list(sbi);
+ f2fs_destroy_page_array_cache(sbi);
f2fs_destroy_xattr_caches(sbi);
mempool_destroy(sbi->write_io_dummy);
#ifdef CONFIG_QUOTA
@@ -1280,7 +1300,7 @@ static void f2fs_put_super(struct super_block *sb)
for (i = 0; i < NR_PAGE_TYPE; i++)
kvfree(sbi->write_io[i]);
#ifdef CONFIG_UNICODE
- utf8_unload(sbi->s_encoding);
+ utf8_unload(sb->s_encoding);
#endif
kfree(sbi);
}
@@ -1634,13 +1654,16 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
#ifdef CONFIG_F2FS_FS_COMPRESSION
f2fs_show_compress_options(seq, sbi->sb);
#endif
+
+ if (test_opt(sbi, ATGC))
+ seq_puts(seq, ",atgc");
return 0;
}
static void default_options(struct f2fs_sb_info *sbi)
{
/* init some FS parameters */
- F2FS_OPTION(sbi).active_logs = NR_CURSEG_TYPE;
+ F2FS_OPTION(sbi).active_logs = NR_CURSEG_PERSIST_TYPE;
F2FS_OPTION(sbi).inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT;
@@ -1763,6 +1786,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
bool disable_checkpoint = test_opt(sbi, DISABLE_CHECKPOINT);
bool no_io_align = !F2FS_IO_ALIGNED(sbi);
+ bool no_atgc = !test_opt(sbi, ATGC);
bool checkpoint_changed;
#ifdef CONFIG_QUOTA
int i, j;
@@ -1835,6 +1859,13 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
}
}
#endif
+ /* disallow enable atgc dynamically */
+ if (no_atgc == !!test_opt(sbi, ATGC)) {
+ err = -EINVAL;
+ f2fs_warn(sbi, "switch atgc option is not allowed");
+ goto restore_opts;
+ }
+
/* disallow enable/disable extent_cache dynamically */
if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) {
err = -EINVAL;
@@ -2679,10 +2710,8 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
}
if (main_end_blkaddr > seg_end_blkaddr) {
- f2fs_info(sbi, "Wrong MAIN_AREA boundary, start(%u) end(%u) block(%u)",
- main_blkaddr,
- segment0_blkaddr +
- (segment_count << log_blocks_per_seg),
+ f2fs_info(sbi, "Wrong MAIN_AREA boundary, start(%u) end(%llu) block(%u)",
+ main_blkaddr, seg_end_blkaddr,
segment_count_main << log_blocks_per_seg);
return true;
} else if (main_end_blkaddr < seg_end_blkaddr) {
@@ -2700,10 +2729,8 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
err = __f2fs_commit_super(bh, NULL);
res = err ? "failed" : "done";
}
- f2fs_info(sbi, "Fix alignment : %s, start(%u) end(%u) block(%u)",
- res, main_blkaddr,
- segment0_blkaddr +
- (segment_count << log_blocks_per_seg),
+ f2fs_info(sbi, "Fix alignment : %s, start(%u) end(%llu) block(%u)",
+ res, main_blkaddr, seg_end_blkaddr,
segment_count_main << log_blocks_per_seg);
if (err)
return true;
@@ -2714,7 +2741,7 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
struct buffer_head *bh)
{
- block_t segment_count, segs_per_sec, secs_per_zone;
+ block_t segment_count, segs_per_sec, secs_per_zone, segment_count_main;
block_t total_sections, blocks_per_seg;
struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
(bh->b_data + F2FS_SUPER_OFFSET);
@@ -2785,6 +2812,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
}
segment_count = le32_to_cpu(raw_super->segment_count);
+ segment_count_main = le32_to_cpu(raw_super->segment_count_main);
segs_per_sec = le32_to_cpu(raw_super->segs_per_sec);
secs_per_zone = le32_to_cpu(raw_super->secs_per_zone);
total_sections = le32_to_cpu(raw_super->section_count);
@@ -2798,14 +2826,19 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
return -EFSCORRUPTED;
}
- if (total_sections > segment_count ||
- total_sections < F2FS_MIN_SEGMENTS ||
+ if (total_sections > segment_count_main || total_sections < 1 ||
segs_per_sec > segment_count || !segs_per_sec) {
f2fs_info(sbi, "Invalid segment/section count (%u, %u x %u)",
segment_count, total_sections, segs_per_sec);
return -EFSCORRUPTED;
}
+ if (segment_count_main != total_sections * segs_per_sec) {
+ f2fs_info(sbi, "Invalid segment/section count (%u != %u * %u)",
+ segment_count_main, total_sections, segs_per_sec);
+ return -EFSCORRUPTED;
+ }
+
if ((segment_count / segs_per_sec) < total_sections) {
f2fs_info(sbi, "Small segment_count (%u < %u * %u)",
segment_count, segs_per_sec, total_sections);
@@ -2831,6 +2864,12 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
segment_count, dev_seg_count);
return -EFSCORRUPTED;
}
+ } else {
+ if (__F2FS_HAS_FEATURE(raw_super, F2FS_FEATURE_BLKZONED) &&
+ !bdev_is_zoned(sbi->sb->s_bdev)) {
+ f2fs_info(sbi, "Zoned block device path is missing");
+ return -EFSCORRUPTED;
+ }
}
if (secs_per_zone > total_sections || !secs_per_zone) {
@@ -2906,7 +2945,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
- if (unlikely(fsmeta < F2FS_MIN_SEGMENTS ||
+ if (unlikely(fsmeta < F2FS_MIN_META_SEGMENTS ||
ovp_segments == 0 || reserved_segments == 0)) {
f2fs_err(sbi, "Wrong layout: check mkfs.f2fs version");
return 1;
@@ -2994,7 +3033,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
cp_payload = __cp_payload(sbi);
if (cp_pack_start_sum < cp_payload + 1 ||
cp_pack_start_sum > blocks_per_seg - 1 -
- NR_CURSEG_TYPE) {
+ NR_CURSEG_PERSIST_TYPE) {
f2fs_err(sbi, "Wrong cp_pack_start_sum: %u",
cp_pack_start_sum);
return 1;
@@ -3096,13 +3135,26 @@ static int init_percpu_info(struct f2fs_sb_info *sbi)
}
#ifdef CONFIG_BLK_DEV_ZONED
+
+struct f2fs_report_zones_args {
+ struct f2fs_dev_info *dev;
+ bool zone_cap_mismatch;
+};
+
static int f2fs_report_zone_cb(struct blk_zone *zone, unsigned int idx,
- void *data)
+ void *data)
{
- struct f2fs_dev_info *dev = data;
+ struct f2fs_report_zones_args *rz_args = data;
+
+ if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+ return 0;
+
+ set_bit(idx, rz_args->dev->blkz_seq);
+ rz_args->dev->zone_capacity_blocks[idx] = zone->capacity >>
+ F2FS_LOG_SECTORS_PER_BLOCK;
+ if (zone->len != zone->capacity && !rz_args->zone_cap_mismatch)
+ rz_args->zone_cap_mismatch = true;
- if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL)
- set_bit(idx, dev->blkz_seq);
return 0;
}
@@ -3110,6 +3162,7 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
{
struct block_device *bdev = FDEV(devi).bdev;
sector_t nr_sectors = bdev->bd_part->nr_sects;
+ struct f2fs_report_zones_args rep_zone_arg;
int ret;
if (!f2fs_sb_has_blkzoned(sbi))
@@ -3135,12 +3188,26 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
if (!FDEV(devi).blkz_seq)
return -ENOMEM;
- /* Get block zones type */
+ /* Get block zones type and zone-capacity */
+ FDEV(devi).zone_capacity_blocks = f2fs_kzalloc(sbi,
+ FDEV(devi).nr_blkz * sizeof(block_t),
+ GFP_KERNEL);
+ if (!FDEV(devi).zone_capacity_blocks)
+ return -ENOMEM;
+
+ rep_zone_arg.dev = &FDEV(devi);
+ rep_zone_arg.zone_cap_mismatch = false;
+
ret = blkdev_report_zones(bdev, 0, BLK_ALL_ZONES, f2fs_report_zone_cb,
- &FDEV(devi));
+ &rep_zone_arg);
if (ret < 0)
return ret;
+ if (!rep_zone_arg.zone_cap_mismatch) {
+ kfree(FDEV(devi).zone_capacity_blocks);
+ FDEV(devi).zone_capacity_blocks = NULL;
+ }
+
return 0;
}
#endif
@@ -3337,7 +3404,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
static int f2fs_setup_casefold(struct f2fs_sb_info *sbi)
{
#ifdef CONFIG_UNICODE
- if (f2fs_sb_has_casefold(sbi) && !sbi->s_encoding) {
+ if (f2fs_sb_has_casefold(sbi) && !sbi->sb->s_encoding) {
const struct f2fs_sb_encodings *encoding_info;
struct unicode_map *encoding;
__u16 encoding_flags;
@@ -3368,8 +3435,8 @@ static int f2fs_setup_casefold(struct f2fs_sb_info *sbi)
"%s-%s with flags 0x%hx", encoding_info->name,
encoding_info->version?:"\b", encoding_flags);
- sbi->s_encoding = encoding;
- sbi->s_encoding_flags = encoding_flags;
+ sbi->sb->s_encoding = encoding;
+ sbi->sb->s_encoding_flags = encoding_flags;
sbi->sb->s_d_op = &f2fs_dentry_ops;
}
#else
@@ -3448,18 +3515,6 @@ try_onemore:
sbi->s_chksum_seed = f2fs_chksum(sbi, ~0, raw_super->uuid,
sizeof(raw_super->uuid));
- /*
- * The BLKZONED feature indicates that the drive was formatted with
- * zone alignment optimization. This is optional for host-aware
- * devices, but mandatory for host-managed zoned block devices.
- */
-#ifndef CONFIG_BLK_DEV_ZONED
- if (f2fs_sb_has_blkzoned(sbi)) {
- f2fs_err(sbi, "Zoned block device support is not enabled");
- err = -EOPNOTSUPP;
- goto free_sb_buf;
- }
-#endif
default_options(sbi);
/* parse mount options */
options = kstrdup((const char *)data, GFP_KERNEL);
@@ -3574,13 +3629,16 @@ try_onemore:
err = f2fs_init_xattr_caches(sbi);
if (err)
goto free_io_dummy;
+ err = f2fs_init_page_array_cache(sbi);
+ if (err)
+ goto free_xattr_cache;
/* get an inode for meta space */
sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi));
if (IS_ERR(sbi->meta_inode)) {
f2fs_err(sbi, "Failed to read F2FS meta data inode");
err = PTR_ERR(sbi->meta_inode);
- goto free_xattr_cache;
+ goto free_page_array_cache;
}
err = f2fs_get_valid_checkpoint(sbi);
@@ -3770,6 +3828,8 @@ try_onemore:
}
reset_checkpoint:
+ f2fs_init_inmem_curseg(sbi);
+
/* f2fs_recover_fsync_data() cleared this already */
clear_sbi_flag(sbi, SBI_POR_DOING);
@@ -3854,6 +3914,8 @@ free_meta_inode:
make_bad_inode(sbi->meta_inode);
iput(sbi->meta_inode);
sbi->meta_inode = NULL;
+free_page_array_cache:
+ f2fs_destroy_page_array_cache(sbi);
free_xattr_cache:
f2fs_destroy_xattr_caches(sbi);
free_io_dummy:
@@ -3865,7 +3927,7 @@ free_bio_info:
kvfree(sbi->write_io[i]);
#ifdef CONFIG_UNICODE
- utf8_unload(sbi->s_encoding);
+ utf8_unload(sb->s_encoding);
#endif
free_options:
#ifdef CONFIG_QUOTA
@@ -3975,9 +4037,12 @@ static int __init init_f2fs_fs(void)
err = f2fs_create_extent_cache();
if (err)
goto free_checkpoint_caches;
- err = f2fs_init_sysfs();
+ err = f2fs_create_garbage_collection_cache();
if (err)
goto free_extent_cache;
+ err = f2fs_init_sysfs();
+ if (err)
+ goto free_garbage_collection_cache;
err = register_shrinker(&f2fs_shrinker_info);
if (err)
goto free_sysfs;
@@ -3997,7 +4062,12 @@ static int __init init_f2fs_fs(void)
err = f2fs_init_compress_mempool();
if (err)
goto free_bioset;
+ err = f2fs_init_compress_cache();
+ if (err)
+ goto free_compress_mempool;
return 0;
+free_compress_mempool:
+ f2fs_destroy_compress_mempool();
free_bioset:
f2fs_destroy_bioset();
free_bio_enrty_cache:
@@ -4011,6 +4081,8 @@ free_shrinker:
unregister_shrinker(&f2fs_shrinker_info);
free_sysfs:
f2fs_exit_sysfs();
+free_garbage_collection_cache:
+ f2fs_destroy_garbage_collection_cache();
free_extent_cache:
f2fs_destroy_extent_cache();
free_checkpoint_caches:
@@ -4027,6 +4099,7 @@ fail:
static void __exit exit_f2fs_fs(void)
{
+ f2fs_destroy_compress_cache();
f2fs_destroy_compress_mempool();
f2fs_destroy_bioset();
f2fs_destroy_bio_entry_cache();
@@ -4035,6 +4108,7 @@ static void __exit exit_f2fs_fs(void)
unregister_filesystem(&f2fs_fs_type);
unregister_shrinker(&f2fs_shrinker_info);
f2fs_exit_sysfs();
+ f2fs_destroy_garbage_collection_cache();
f2fs_destroy_extent_cache();
f2fs_destroy_checkpoint_caches();
f2fs_destroy_segment_manager_caches();
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 88ed9969cc86..ec77ccfea923 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -176,12 +176,14 @@ static ssize_t encoding_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf)
{
#ifdef CONFIG_UNICODE
+ struct super_block *sb = sbi->sb;
+
if (f2fs_sb_has_casefold(sbi))
return snprintf(buf, PAGE_SIZE, "%s (%d.%d.%d)\n",
- sbi->s_encoding->charset,
- (sbi->s_encoding->version >> 16) & 0xff,
- (sbi->s_encoding->version >> 8) & 0xff,
- sbi->s_encoding->version & 0xff);
+ sb->s_encoding->charset,
+ (sb->s_encoding->version >> 16) & 0xff,
+ (sb->s_encoding->version >> 8) & 0xff,
+ sb->s_encoding->version & 0xff);
#endif
return sprintf(buf, "(none)");
}
@@ -375,12 +377,17 @@ out:
return count;
}
if (!strcmp(a->attr.name, "gc_idle")) {
- if (t == GC_IDLE_CB)
+ if (t == GC_IDLE_CB) {
sbi->gc_mode = GC_IDLE_CB;
- else if (t == GC_IDLE_GREEDY)
+ } else if (t == GC_IDLE_GREEDY) {
sbi->gc_mode = GC_IDLE_GREEDY;
- else
+ } else if (t == GC_IDLE_AT) {
+ if (!sbi->am.atgc_enabled)
+ return -EINVAL;
+ sbi->gc_mode = GC_AT;
+ } else {
sbi->gc_mode = GC_NORMAL;
+ }
return count;
}
@@ -968,4 +975,5 @@ void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi)
}
kobject_del(&sbi->s_kobj);
kobject_put(&sbi->s_kobj);
+ wait_for_completion(&sbi->s_kobj_unregister);
}
diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c
index 9eb0dba851e8..054ec852b5ea 100644
--- a/fs/f2fs/verity.c
+++ b/fs/f2fs/verity.c
@@ -228,6 +228,7 @@ static struct page *f2fs_read_merkle_tree_page(struct inode *inode,
pgoff_t index,
unsigned long num_ra_pages)
{
+ DEFINE_READAHEAD(ractl, NULL, inode->i_mapping, index);
struct page *page;
index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT;
@@ -237,8 +238,7 @@ static struct page *f2fs_read_merkle_tree_page(struct inode *inode,
if (page)
put_page(page);
else if (num_ra_pages > 1)
- page_cache_readahead_unbounded(inode->i_mapping, NULL,
- index, num_ra_pages, 0);
+ page_cache_ra_unbounded(&ractl, num_ra_pages, 0);
page = read_mapping_page(inode->i_mapping, index, NULL);
}
return page;
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 1b0736ce0918..65afcc3cc68a 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -39,7 +39,7 @@ static void xattr_free(struct f2fs_sb_info *sbi, void *xattr_addr,
if (is_inline)
kmem_cache_free(sbi->inline_xattr_slab, xattr_addr);
else
- kvfree(xattr_addr);
+ kfree(xattr_addr);
}
static int f2fs_xattr_generic_get(const struct xattr_handler *handler,
@@ -425,7 +425,7 @@ static int read_all_xattrs(struct inode *inode, struct page *ipage,
*base_addr = txattr_addr;
return 0;
fail:
- kvfree(txattr_addr);
+ kfree(txattr_addr);
return err;
}
@@ -610,7 +610,7 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
}
error = buffer_size - rest;
cleanup:
- kvfree(base_addr);
+ kfree(base_addr);
return error;
}
@@ -750,7 +750,7 @@ static int __f2fs_setxattr(struct inode *inode, int index,
if (!error && S_ISDIR(inode->i_mode))
set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_CP);
exit:
- kvfree(base_addr);
+ kfree(base_addr);
return error;
}
diff --git a/fs/fuse/Kconfig b/fs/fuse/Kconfig
index 774b2618018a..40ce9a1c12e5 100644
--- a/fs/fuse/Kconfig
+++ b/fs/fuse/Kconfig
@@ -8,7 +8,7 @@ config FUSE_FS
There's also a companion library: libfuse2. This library is available
from the FUSE homepage:
- <http://fuse.sourceforge.net/>
+ <https://github.com/libfuse/>
although chances are your distribution already has that library
installed if you've installed the "fuse" package itself.
@@ -38,3 +38,17 @@ config VIRTIO_FS
If you want to share files between guests or with the host, answer Y
or M.
+
+config FUSE_DAX
+ bool "Virtio Filesystem Direct Host Memory Access support"
+ default y
+ select INTERVAL_TREE
+ depends on VIRTIO_FS
+ depends on FS_DAX
+ depends on DAX_DRIVER
+ help
+ This allows bypassing guest page cache and allows mapping host page
+ cache directly in guest address space.
+
+ If you want to allow mounting a Virtio Filesystem with the "dax"
+ option, answer Y.
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
index 3e8cebfb59b7..8c7021fb2cd4 100644
--- a/fs/fuse/Makefile
+++ b/fs/fuse/Makefile
@@ -7,5 +7,7 @@ obj-$(CONFIG_FUSE_FS) += fuse.o
obj-$(CONFIG_CUSE) += cuse.o
obj-$(CONFIG_VIRTIO_FS) += virtiofs.o
-fuse-objs := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o
-virtiofs-y += virtio_fs.o
+fuse-y := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o
+fuse-$(CONFIG_FUSE_DAX) += dax.o
+
+virtiofs-y := virtio_fs.o
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index a1303ad303ba..cc7e94d73c6c 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -164,6 +164,7 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file,
{
unsigned val;
struct fuse_conn *fc;
+ struct fuse_mount *fm;
ssize_t ret;
ret = fuse_conn_limit_write(file, buf, count, ppos, &val,
@@ -174,18 +175,27 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file,
if (!fc)
goto out;
+ down_read(&fc->killsb);
spin_lock(&fc->bg_lock);
fc->congestion_threshold = val;
- if (fc->sb) {
+
+ /*
+ * Get any fuse_mount belonging to this fuse_conn; s_bdi is
+ * shared between all of them
+ */
+
+ if (!list_empty(&fc->mounts)) {
+ fm = list_first_entry(&fc->mounts, struct fuse_mount, fc_entry);
if (fc->num_background < fc->congestion_threshold) {
- clear_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC);
- clear_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC);
+ clear_bdi_congested(fm->sb->s_bdi, BLK_RW_SYNC);
+ clear_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC);
} else {
- set_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC);
- set_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC);
+ set_bdi_congested(fm->sb->s_bdi, BLK_RW_SYNC);
+ set_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC);
}
}
spin_unlock(&fc->bg_lock);
+ up_read(&fc->killsb);
fuse_conn_put(fc);
out:
return ret;
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 2cc17816d7b1..45082269e698 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -57,6 +57,7 @@
struct cuse_conn {
struct list_head list; /* linked on cuse_conntbl */
+ struct fuse_mount fm; /* Dummy mount referencing fc */
struct fuse_conn fc; /* fuse connection */
struct cdev *cdev; /* associated character device */
struct device *dev; /* device representing @cdev */
@@ -134,7 +135,7 @@ static int cuse_open(struct inode *inode, struct file *file)
* Generic permission check is already done against the chrdev
* file, proceed to open.
*/
- rc = fuse_do_open(&cc->fc, 0, file, 0);
+ rc = fuse_do_open(&cc->fm, 0, file, 0);
if (rc)
fuse_conn_put(&cc->fc);
return rc;
@@ -143,10 +144,10 @@ static int cuse_open(struct inode *inode, struct file *file)
static int cuse_release(struct inode *inode, struct file *file)
{
struct fuse_file *ff = file->private_data;
- struct fuse_conn *fc = ff->fc;
+ struct fuse_mount *fm = ff->fm;
fuse_sync_release(NULL, ff, file->f_flags);
- fuse_conn_put(fc);
+ fuse_conn_put(fm->fc);
return 0;
}
@@ -155,7 +156,7 @@ static long cuse_file_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
struct fuse_file *ff = file->private_data;
- struct cuse_conn *cc = fc_to_cc(ff->fc);
+ struct cuse_conn *cc = fc_to_cc(ff->fm->fc);
unsigned int flags = 0;
if (cc->unrestricted_ioctl)
@@ -168,7 +169,7 @@ static long cuse_file_compat_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
struct fuse_file *ff = file->private_data;
- struct cuse_conn *cc = fc_to_cc(ff->fc);
+ struct cuse_conn *cc = fc_to_cc(ff->fm->fc);
unsigned int flags = FUSE_IOCTL_COMPAT;
if (cc->unrestricted_ioctl)
@@ -313,9 +314,10 @@ struct cuse_init_args {
* required data structures for it. Please read the comment at the
* top of this file for high level overview.
*/
-static void cuse_process_init_reply(struct fuse_conn *fc,
+static void cuse_process_init_reply(struct fuse_mount *fm,
struct fuse_args *args, int error)
{
+ struct fuse_conn *fc = fm->fc;
struct cuse_init_args *ia = container_of(args, typeof(*ia), ap.args);
struct fuse_args_pages *ap = &ia->ap;
struct cuse_conn *cc = fc_to_cc(fc), *pos;
@@ -424,7 +426,7 @@ static int cuse_send_init(struct cuse_conn *cc)
{
int rc;
struct page *page;
- struct fuse_conn *fc = &cc->fc;
+ struct fuse_mount *fm = &cc->fm;
struct cuse_init_args *ia;
struct fuse_args_pages *ap;
@@ -460,7 +462,7 @@ static int cuse_send_init(struct cuse_conn *cc)
ia->desc.length = ap->args.out_args[1].size;
ap->args.end = cuse_process_init_reply;
- rc = fuse_simple_background(fc, &ap->args, GFP_KERNEL);
+ rc = fuse_simple_background(fm, &ap->args, GFP_KERNEL);
if (rc) {
kfree(ia);
err_free_page:
@@ -506,7 +508,8 @@ static int cuse_channel_open(struct inode *inode, struct file *file)
* Limit the cuse channel to requests that can
* be represented in file->f_cred->user_ns.
*/
- fuse_conn_init(&cc->fc, file->f_cred->user_ns, &fuse_dev_fiq_ops, NULL);
+ fuse_conn_init(&cc->fc, &cc->fm, file->f_cred->user_ns,
+ &fuse_dev_fiq_ops, NULL);
fud = fuse_dev_alloc_install(&cc->fc);
if (!fud) {
diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c
new file mode 100644
index 000000000000..ff99ab2a3c43
--- /dev/null
+++ b/fs/fuse/dax.c
@@ -0,0 +1,1365 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * dax: direct host memory access
+ * Copyright (C) 2020 Red Hat, Inc.
+ */
+
+#include "fuse_i.h"
+
+#include <linux/delay.h>
+#include <linux/dax.h>
+#include <linux/uio.h>
+#include <linux/pfn_t.h>
+#include <linux/iomap.h>
+#include <linux/interval_tree.h>
+
+/*
+ * Default memory range size. A power of 2 so it agrees with common FUSE_INIT
+ * map_alignment values 4KB and 64KB.
+ */
+#define FUSE_DAX_SHIFT 21
+#define FUSE_DAX_SZ (1 << FUSE_DAX_SHIFT)
+#define FUSE_DAX_PAGES (FUSE_DAX_SZ / PAGE_SIZE)
+
+/* Number of ranges reclaimer will try to free in one invocation */
+#define FUSE_DAX_RECLAIM_CHUNK (10)
+
+/*
+ * Dax memory reclaim threshold in percetage of total ranges. When free
+ * number of free ranges drops below this threshold, reclaim can trigger
+ * Default is 20%
+ */
+#define FUSE_DAX_RECLAIM_THRESHOLD (20)
+
+/** Translation information for file offsets to DAX window offsets */
+struct fuse_dax_mapping {
+ /* Pointer to inode where this memory range is mapped */
+ struct inode *inode;
+
+ /* Will connect in fcd->free_ranges to keep track of free memory */
+ struct list_head list;
+
+ /* For interval tree in file/inode */
+ struct interval_tree_node itn;
+
+ /* Will connect in fc->busy_ranges to keep track busy memory */
+ struct list_head busy_list;
+
+ /** Position in DAX window */
+ u64 window_offset;
+
+ /** Length of mapping, in bytes */
+ loff_t length;
+
+ /* Is this mapping read-only or read-write */
+ bool writable;
+
+ /* reference count when the mapping is used by dax iomap. */
+ refcount_t refcnt;
+};
+
+/* Per-inode dax map */
+struct fuse_inode_dax {
+ /* Semaphore to protect modifications to the dmap tree */
+ struct rw_semaphore sem;
+
+ /* Sorted rb tree of struct fuse_dax_mapping elements */
+ struct rb_root_cached tree;
+ unsigned long nr;
+};
+
+struct fuse_conn_dax {
+ /* DAX device */
+ struct dax_device *dev;
+
+ /* Lock protecting accessess to members of this structure */
+ spinlock_t lock;
+
+ /* List of memory ranges which are busy */
+ unsigned long nr_busy_ranges;
+ struct list_head busy_ranges;
+
+ /* Worker to free up memory ranges */
+ struct delayed_work free_work;
+
+ /* Wait queue for a dax range to become free */
+ wait_queue_head_t range_waitq;
+
+ /* DAX Window Free Ranges */
+ long nr_free_ranges;
+ struct list_head free_ranges;
+
+ unsigned long nr_ranges;
+};
+
+static inline struct fuse_dax_mapping *
+node_to_dmap(struct interval_tree_node *node)
+{
+ if (!node)
+ return NULL;
+
+ return container_of(node, struct fuse_dax_mapping, itn);
+}
+
+static struct fuse_dax_mapping *
+alloc_dax_mapping_reclaim(struct fuse_conn_dax *fcd, struct inode *inode);
+
+static void
+__kick_dmap_free_worker(struct fuse_conn_dax *fcd, unsigned long delay_ms)
+{
+ unsigned long free_threshold;
+
+ /* If number of free ranges are below threshold, start reclaim */
+ free_threshold = max_t(unsigned long, fcd->nr_ranges * FUSE_DAX_RECLAIM_THRESHOLD / 100,
+ 1);
+ if (fcd->nr_free_ranges < free_threshold)
+ queue_delayed_work(system_long_wq, &fcd->free_work,
+ msecs_to_jiffies(delay_ms));
+}
+
+static void kick_dmap_free_worker(struct fuse_conn_dax *fcd,
+ unsigned long delay_ms)
+{
+ spin_lock(&fcd->lock);
+ __kick_dmap_free_worker(fcd, delay_ms);
+ spin_unlock(&fcd->lock);
+}
+
+static struct fuse_dax_mapping *alloc_dax_mapping(struct fuse_conn_dax *fcd)
+{
+ struct fuse_dax_mapping *dmap;
+
+ spin_lock(&fcd->lock);
+ dmap = list_first_entry_or_null(&fcd->free_ranges,
+ struct fuse_dax_mapping, list);
+ if (dmap) {
+ list_del_init(&dmap->list);
+ WARN_ON(fcd->nr_free_ranges <= 0);
+ fcd->nr_free_ranges--;
+ }
+ spin_unlock(&fcd->lock);
+
+ kick_dmap_free_worker(fcd, 0);
+ return dmap;
+}
+
+/* This assumes fcd->lock is held */
+static void __dmap_remove_busy_list(struct fuse_conn_dax *fcd,
+ struct fuse_dax_mapping *dmap)
+{
+ list_del_init(&dmap->busy_list);
+ WARN_ON(fcd->nr_busy_ranges == 0);
+ fcd->nr_busy_ranges--;
+}
+
+static void dmap_remove_busy_list(struct fuse_conn_dax *fcd,
+ struct fuse_dax_mapping *dmap)
+{
+ spin_lock(&fcd->lock);
+ __dmap_remove_busy_list(fcd, dmap);
+ spin_unlock(&fcd->lock);
+}
+
+/* This assumes fcd->lock is held */
+static void __dmap_add_to_free_pool(struct fuse_conn_dax *fcd,
+ struct fuse_dax_mapping *dmap)
+{
+ list_add_tail(&dmap->list, &fcd->free_ranges);
+ fcd->nr_free_ranges++;
+ wake_up(&fcd->range_waitq);
+}
+
+static void dmap_add_to_free_pool(struct fuse_conn_dax *fcd,
+ struct fuse_dax_mapping *dmap)
+{
+ /* Return fuse_dax_mapping to free list */
+ spin_lock(&fcd->lock);
+ __dmap_add_to_free_pool(fcd, dmap);
+ spin_unlock(&fcd->lock);
+}
+
+static int fuse_setup_one_mapping(struct inode *inode, unsigned long start_idx,
+ struct fuse_dax_mapping *dmap, bool writable,
+ bool upgrade)
+{
+ struct fuse_mount *fm = get_fuse_mount(inode);
+ struct fuse_conn_dax *fcd = fm->fc->dax;
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_setupmapping_in inarg;
+ loff_t offset = start_idx << FUSE_DAX_SHIFT;
+ FUSE_ARGS(args);
+ ssize_t err;
+
+ WARN_ON(fcd->nr_free_ranges < 0);
+
+ /* Ask fuse daemon to setup mapping */
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.foffset = offset;
+ inarg.fh = -1;
+ inarg.moffset = dmap->window_offset;
+ inarg.len = FUSE_DAX_SZ;
+ inarg.flags |= FUSE_SETUPMAPPING_FLAG_READ;
+ if (writable)
+ inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE;
+ args.opcode = FUSE_SETUPMAPPING;
+ args.nodeid = fi->nodeid;
+ args.in_numargs = 1;
+ args.in_args[0].size = sizeof(inarg);
+ args.in_args[0].value = &inarg;
+ err = fuse_simple_request(fm, &args);
+ if (err < 0)
+ return err;
+ dmap->writable = writable;
+ if (!upgrade) {
+ /*
+ * We don't take a refernce on inode. inode is valid right now
+ * and when inode is going away, cleanup logic should first
+ * cleanup dmap entries.
+ */
+ dmap->inode = inode;
+ dmap->itn.start = dmap->itn.last = start_idx;
+ /* Protected by fi->dax->sem */
+ interval_tree_insert(&dmap->itn, &fi->dax->tree);
+ fi->dax->nr++;
+ spin_lock(&fcd->lock);
+ list_add_tail(&dmap->busy_list, &fcd->busy_ranges);
+ fcd->nr_busy_ranges++;
+ spin_unlock(&fcd->lock);
+ }
+ return 0;
+}
+
+static int fuse_send_removemapping(struct inode *inode,
+ struct fuse_removemapping_in *inargp,
+ struct fuse_removemapping_one *remove_one)
+{
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_mount *fm = get_fuse_mount(inode);
+ FUSE_ARGS(args);
+
+ args.opcode = FUSE_REMOVEMAPPING;
+ args.nodeid = fi->nodeid;
+ args.in_numargs = 2;
+ args.in_args[0].size = sizeof(*inargp);
+ args.in_args[0].value = inargp;
+ args.in_args[1].size = inargp->count * sizeof(*remove_one);
+ args.in_args[1].value = remove_one;
+ return fuse_simple_request(fm, &args);
+}
+
+static int dmap_removemapping_list(struct inode *inode, unsigned int num,
+ struct list_head *to_remove)
+{
+ struct fuse_removemapping_one *remove_one, *ptr;
+ struct fuse_removemapping_in inarg;
+ struct fuse_dax_mapping *dmap;
+ int ret, i = 0, nr_alloc;
+
+ nr_alloc = min_t(unsigned int, num, FUSE_REMOVEMAPPING_MAX_ENTRY);
+ remove_one = kmalloc_array(nr_alloc, sizeof(*remove_one), GFP_NOFS);
+ if (!remove_one)
+ return -ENOMEM;
+
+ ptr = remove_one;
+ list_for_each_entry(dmap, to_remove, list) {
+ ptr->moffset = dmap->window_offset;
+ ptr->len = dmap->length;
+ ptr++;
+ i++;
+ num--;
+ if (i >= nr_alloc || num == 0) {
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.count = i;
+ ret = fuse_send_removemapping(inode, &inarg,
+ remove_one);
+ if (ret)
+ goto out;
+ ptr = remove_one;
+ i = 0;
+ }
+ }
+out:
+ kfree(remove_one);
+ return ret;
+}
+
+/*
+ * Cleanup dmap entry and add back to free list. This should be called with
+ * fcd->lock held.
+ */
+static void dmap_reinit_add_to_free_pool(struct fuse_conn_dax *fcd,
+ struct fuse_dax_mapping *dmap)
+{
+ pr_debug("fuse: freeing memory range start_idx=0x%lx end_idx=0x%lx window_offset=0x%llx length=0x%llx\n",
+ dmap->itn.start, dmap->itn.last, dmap->window_offset,
+ dmap->length);
+ __dmap_remove_busy_list(fcd, dmap);
+ dmap->inode = NULL;
+ dmap->itn.start = dmap->itn.last = 0;
+ __dmap_add_to_free_pool(fcd, dmap);
+}
+
+/*
+ * Free inode dmap entries whose range falls inside [start, end].
+ * Does not take any locks. At this point of time it should only be
+ * called from evict_inode() path where we know all dmap entries can be
+ * reclaimed.
+ */
+static void inode_reclaim_dmap_range(struct fuse_conn_dax *fcd,
+ struct inode *inode,
+ loff_t start, loff_t end)
+{
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_dax_mapping *dmap, *n;
+ int err, num = 0;
+ LIST_HEAD(to_remove);
+ unsigned long start_idx = start >> FUSE_DAX_SHIFT;
+ unsigned long end_idx = end >> FUSE_DAX_SHIFT;
+ struct interval_tree_node *node;
+
+ while (1) {
+ node = interval_tree_iter_first(&fi->dax->tree, start_idx,
+ end_idx);
+ if (!node)
+ break;
+ dmap = node_to_dmap(node);
+ /* inode is going away. There should not be any users of dmap */
+ WARN_ON(refcount_read(&dmap->refcnt) > 1);
+ interval_tree_remove(&dmap->itn, &fi->dax->tree);
+ num++;
+ list_add(&dmap->list, &to_remove);
+ }
+
+ /* Nothing to remove */
+ if (list_empty(&to_remove))
+ return;
+
+ WARN_ON(fi->dax->nr < num);
+ fi->dax->nr -= num;
+ err = dmap_removemapping_list(inode, num, &to_remove);
+ if (err && err != -ENOTCONN) {
+ pr_warn("Failed to removemappings. start=0x%llx end=0x%llx\n",
+ start, end);
+ }
+ spin_lock(&fcd->lock);
+ list_for_each_entry_safe(dmap, n, &to_remove, list) {
+ list_del_init(&dmap->list);
+ dmap_reinit_add_to_free_pool(fcd, dmap);
+ }
+ spin_unlock(&fcd->lock);
+}
+
+static int dmap_removemapping_one(struct inode *inode,
+ struct fuse_dax_mapping *dmap)
+{
+ struct fuse_removemapping_one forget_one;
+ struct fuse_removemapping_in inarg;
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.count = 1;
+ memset(&forget_one, 0, sizeof(forget_one));
+ forget_one.moffset = dmap->window_offset;
+ forget_one.len = dmap->length;
+
+ return fuse_send_removemapping(inode, &inarg, &forget_one);
+}
+
+/*
+ * It is called from evict_inode() and by that time inode is going away. So
+ * this function does not take any locks like fi->dax->sem for traversing
+ * that fuse inode interval tree. If that lock is taken then lock validator
+ * complains of deadlock situation w.r.t fs_reclaim lock.
+ */
+void fuse_dax_inode_cleanup(struct inode *inode)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ /*
+ * fuse_evict_inode() has already called truncate_inode_pages_final()
+ * before we arrive here. So we should not have to worry about any
+ * pages/exception entries still associated with inode.
+ */
+ inode_reclaim_dmap_range(fc->dax, inode, 0, -1);
+ WARN_ON(fi->dax->nr);
+}
+
+static void fuse_fill_iomap_hole(struct iomap *iomap, loff_t length)
+{
+ iomap->addr = IOMAP_NULL_ADDR;
+ iomap->length = length;
+ iomap->type = IOMAP_HOLE;
+}
+
+static void fuse_fill_iomap(struct inode *inode, loff_t pos, loff_t length,
+ struct iomap *iomap, struct fuse_dax_mapping *dmap,
+ unsigned int flags)
+{
+ loff_t offset, len;
+ loff_t i_size = i_size_read(inode);
+
+ offset = pos - (dmap->itn.start << FUSE_DAX_SHIFT);
+ len = min(length, dmap->length - offset);
+
+ /* If length is beyond end of file, truncate further */
+ if (pos + len > i_size)
+ len = i_size - pos;
+
+ if (len > 0) {
+ iomap->addr = dmap->window_offset + offset;
+ iomap->length = len;
+ if (flags & IOMAP_FAULT)
+ iomap->length = ALIGN(len, PAGE_SIZE);
+ iomap->type = IOMAP_MAPPED;
+ /*
+ * increace refcnt so that reclaim code knows this dmap is in
+ * use. This assumes fi->dax->sem mutex is held either
+ * shared/exclusive.
+ */
+ refcount_inc(&dmap->refcnt);
+
+ /* iomap->private should be NULL */
+ WARN_ON_ONCE(iomap->private);
+ iomap->private = dmap;
+ } else {
+ /* Mapping beyond end of file is hole */
+ fuse_fill_iomap_hole(iomap, length);
+ }
+}
+
+static int fuse_setup_new_dax_mapping(struct inode *inode, loff_t pos,
+ loff_t length, unsigned int flags,
+ struct iomap *iomap)
+{
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_conn_dax *fcd = fc->dax;
+ struct fuse_dax_mapping *dmap, *alloc_dmap = NULL;
+ int ret;
+ bool writable = flags & IOMAP_WRITE;
+ unsigned long start_idx = pos >> FUSE_DAX_SHIFT;
+ struct interval_tree_node *node;
+
+ /*
+ * Can't do inline reclaim in fault path. We call
+ * dax_layout_busy_page() before we free a range. And
+ * fuse_wait_dax_page() drops fi->i_mmap_sem lock and requires it.
+ * In fault path we enter with fi->i_mmap_sem held and can't drop
+ * it. Also in fault path we hold fi->i_mmap_sem shared and not
+ * exclusive, so that creates further issues with fuse_wait_dax_page().
+ * Hence return -EAGAIN and fuse_dax_fault() will wait for a memory
+ * range to become free and retry.
+ */
+ if (flags & IOMAP_FAULT) {
+ alloc_dmap = alloc_dax_mapping(fcd);
+ if (!alloc_dmap)
+ return -EAGAIN;
+ } else {
+ alloc_dmap = alloc_dax_mapping_reclaim(fcd, inode);
+ if (IS_ERR(alloc_dmap))
+ return PTR_ERR(alloc_dmap);
+ }
+
+ /* If we are here, we should have memory allocated */
+ if (WARN_ON(!alloc_dmap))
+ return -EIO;
+
+ /*
+ * Take write lock so that only one caller can try to setup mapping
+ * and other waits.
+ */
+ down_write(&fi->dax->sem);
+ /*
+ * We dropped lock. Check again if somebody else setup
+ * mapping already.
+ */
+ node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx);
+ if (node) {
+ dmap = node_to_dmap(node);
+ fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
+ dmap_add_to_free_pool(fcd, alloc_dmap);
+ up_write(&fi->dax->sem);
+ return 0;
+ }
+
+ /* Setup one mapping */
+ ret = fuse_setup_one_mapping(inode, pos >> FUSE_DAX_SHIFT, alloc_dmap,
+ writable, false);
+ if (ret < 0) {
+ dmap_add_to_free_pool(fcd, alloc_dmap);
+ up_write(&fi->dax->sem);
+ return ret;
+ }
+ fuse_fill_iomap(inode, pos, length, iomap, alloc_dmap, flags);
+ up_write(&fi->dax->sem);
+ return 0;
+}
+
+static int fuse_upgrade_dax_mapping(struct inode *inode, loff_t pos,
+ loff_t length, unsigned int flags,
+ struct iomap *iomap)
+{
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_dax_mapping *dmap;
+ int ret;
+ unsigned long idx = pos >> FUSE_DAX_SHIFT;
+ struct interval_tree_node *node;
+
+ /*
+ * Take exclusive lock so that only one caller can try to setup
+ * mapping and others wait.
+ */
+ down_write(&fi->dax->sem);
+ node = interval_tree_iter_first(&fi->dax->tree, idx, idx);
+
+ /* We are holding either inode lock or i_mmap_sem, and that should
+ * ensure that dmap can't be truncated. We are holding a reference
+ * on dmap and that should make sure it can't be reclaimed. So dmap
+ * should still be there in tree despite the fact we dropped and
+ * re-acquired the fi->dax->sem lock.
+ */
+ ret = -EIO;
+ if (WARN_ON(!node))
+ goto out_err;
+
+ dmap = node_to_dmap(node);
+
+ /* We took an extra reference on dmap to make sure its not reclaimd.
+ * Now we hold fi->dax->sem lock and that reference is not needed
+ * anymore. Drop it.
+ */
+ if (refcount_dec_and_test(&dmap->refcnt)) {
+ /* refcount should not hit 0. This object only goes
+ * away when fuse connection goes away
+ */
+ WARN_ON_ONCE(1);
+ }
+
+ /* Maybe another thread already upgraded mapping while we were not
+ * holding lock.
+ */
+ if (dmap->writable) {
+ ret = 0;
+ goto out_fill_iomap;
+ }
+
+ ret = fuse_setup_one_mapping(inode, pos >> FUSE_DAX_SHIFT, dmap, true,
+ true);
+ if (ret < 0)
+ goto out_err;
+out_fill_iomap:
+ fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
+out_err:
+ up_write(&fi->dax->sem);
+ return ret;
+}
+
+/* This is just for DAX and the mapping is ephemeral, do not use it for other
+ * purposes since there is no block device with a permanent mapping.
+ */
+static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
+ unsigned int flags, struct iomap *iomap,
+ struct iomap *srcmap)
+{
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_dax_mapping *dmap;
+ bool writable = flags & IOMAP_WRITE;
+ unsigned long start_idx = pos >> FUSE_DAX_SHIFT;
+ struct interval_tree_node *node;
+
+ /* We don't support FIEMAP */
+ if (WARN_ON(flags & IOMAP_REPORT))
+ return -EIO;
+
+ iomap->offset = pos;
+ iomap->flags = 0;
+ iomap->bdev = NULL;
+ iomap->dax_dev = fc->dax->dev;
+
+ /*
+ * Both read/write and mmap path can race here. So we need something
+ * to make sure if we are setting up mapping, then other path waits
+ *
+ * For now, use a semaphore for this. It probably needs to be
+ * optimized later.
+ */
+ down_read(&fi->dax->sem);
+ node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx);
+ if (node) {
+ dmap = node_to_dmap(node);
+ if (writable && !dmap->writable) {
+ /* Upgrade read-only mapping to read-write. This will
+ * require exclusive fi->dax->sem lock as we don't want
+ * two threads to be trying to this simultaneously
+ * for same dmap. So drop shared lock and acquire
+ * exclusive lock.
+ *
+ * Before dropping fi->dax->sem lock, take reference
+ * on dmap so that its not freed by range reclaim.
+ */
+ refcount_inc(&dmap->refcnt);
+ up_read(&fi->dax->sem);
+ pr_debug("%s: Upgrading mapping at offset 0x%llx length 0x%llx\n",
+ __func__, pos, length);
+ return fuse_upgrade_dax_mapping(inode, pos, length,
+ flags, iomap);
+ } else {
+ fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
+ up_read(&fi->dax->sem);
+ return 0;
+ }
+ } else {
+ up_read(&fi->dax->sem);
+ pr_debug("%s: no mapping at offset 0x%llx length 0x%llx\n",
+ __func__, pos, length);
+ if (pos >= i_size_read(inode))
+ goto iomap_hole;
+
+ return fuse_setup_new_dax_mapping(inode, pos, length, flags,
+ iomap);
+ }
+
+ /*
+ * If read beyond end of file happnes, fs code seems to return
+ * it as hole
+ */
+iomap_hole:
+ fuse_fill_iomap_hole(iomap, length);
+ pr_debug("%s returning hole mapping. pos=0x%llx length_asked=0x%llx length_returned=0x%llx\n",
+ __func__, pos, length, iomap->length);
+ return 0;
+}
+
+static int fuse_iomap_end(struct inode *inode, loff_t pos, loff_t length,
+ ssize_t written, unsigned int flags,
+ struct iomap *iomap)
+{
+ struct fuse_dax_mapping *dmap = iomap->private;
+
+ if (dmap) {
+ if (refcount_dec_and_test(&dmap->refcnt)) {
+ /* refcount should not hit 0. This object only goes
+ * away when fuse connection goes away
+ */
+ WARN_ON_ONCE(1);
+ }
+ }
+
+ /* DAX writes beyond end-of-file aren't handled using iomap, so the
+ * file size is unchanged and there is nothing to do here.
+ */
+ return 0;
+}
+
+static const struct iomap_ops fuse_iomap_ops = {
+ .iomap_begin = fuse_iomap_begin,
+ .iomap_end = fuse_iomap_end,
+};
+
+static void fuse_wait_dax_page(struct inode *inode)
+{
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ up_write(&fi->i_mmap_sem);
+ schedule();
+ down_write(&fi->i_mmap_sem);
+}
+
+/* Should be called with fi->i_mmap_sem lock held exclusively */
+static int __fuse_dax_break_layouts(struct inode *inode, bool *retry,
+ loff_t start, loff_t end)
+{
+ struct page *page;
+
+ page = dax_layout_busy_page_range(inode->i_mapping, start, end);
+ if (!page)
+ return 0;
+
+ *retry = true;
+ return ___wait_var_event(&page->_refcount,
+ atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE,
+ 0, 0, fuse_wait_dax_page(inode));
+}
+
+/* dmap_end == 0 leads to unmapping of whole file */
+int fuse_dax_break_layouts(struct inode *inode, u64 dmap_start,
+ u64 dmap_end)
+{
+ bool retry;
+ int ret;
+
+ do {
+ retry = false;
+ ret = __fuse_dax_break_layouts(inode, &retry, dmap_start,
+ dmap_end);
+ } while (ret == 0 && retry);
+
+ return ret;
+}
+
+ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+ ssize_t ret;
+
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (!inode_trylock_shared(inode))
+ return -EAGAIN;
+ } else {
+ inode_lock_shared(inode);
+ }
+
+ ret = dax_iomap_rw(iocb, to, &fuse_iomap_ops);
+ inode_unlock_shared(inode);
+
+ /* TODO file_accessed(iocb->f_filp) */
+ return ret;
+}
+
+static bool file_extending_write(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+
+ return (iov_iter_rw(from) == WRITE &&
+ ((iocb->ki_pos) >= i_size_read(inode) ||
+ (iocb->ki_pos + iov_iter_count(from) > i_size_read(inode))));
+}
+
+static ssize_t fuse_dax_direct_write(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+ struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb);
+ ssize_t ret;
+
+ ret = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE);
+ if (ret < 0)
+ return ret;
+
+ fuse_invalidate_attr(inode);
+ fuse_write_update_size(inode, iocb->ki_pos);
+ return ret;
+}
+
+ssize_t fuse_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+ ssize_t ret;
+
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (!inode_trylock(inode))
+ return -EAGAIN;
+ } else {
+ inode_lock(inode);
+ }
+
+ ret = generic_write_checks(iocb, from);
+ if (ret <= 0)
+ goto out;
+
+ ret = file_remove_privs(iocb->ki_filp);
+ if (ret)
+ goto out;
+ /* TODO file_update_time() but we don't want metadata I/O */
+
+ /* Do not use dax for file extending writes as write and on
+ * disk i_size increase are not atomic otherwise.
+ */
+ if (file_extending_write(iocb, from))
+ ret = fuse_dax_direct_write(iocb, from);
+ else
+ ret = dax_iomap_rw(iocb, from, &fuse_iomap_ops);
+
+out:
+ inode_unlock(inode);
+
+ if (ret > 0)
+ ret = generic_write_sync(iocb, ret);
+ return ret;
+}
+
+static int fuse_dax_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+
+ struct inode *inode = mapping->host;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+
+ return dax_writeback_mapping_range(mapping, fc->dax->dev, wbc);
+}
+
+static vm_fault_t __fuse_dax_fault(struct vm_fault *vmf,
+ enum page_entry_size pe_size, bool write)
+{
+ vm_fault_t ret;
+ struct inode *inode = file_inode(vmf->vma->vm_file);
+ struct super_block *sb = inode->i_sb;
+ pfn_t pfn;
+ int error = 0;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_conn_dax *fcd = fc->dax;
+ bool retry = false;
+
+ if (write)
+ sb_start_pagefault(sb);
+retry:
+ if (retry && !(fcd->nr_free_ranges > 0))
+ wait_event(fcd->range_waitq, (fcd->nr_free_ranges > 0));
+
+ /*
+ * We need to serialize against not only truncate but also against
+ * fuse dax memory range reclaim. While a range is being reclaimed,
+ * we do not want any read/write/mmap to make progress and try
+ * to populate page cache or access memory we are trying to free.
+ */
+ down_read(&get_fuse_inode(inode)->i_mmap_sem);
+ ret = dax_iomap_fault(vmf, pe_size, &pfn, &error, &fuse_iomap_ops);
+ if ((ret & VM_FAULT_ERROR) && error == -EAGAIN) {
+ error = 0;
+ retry = true;
+ up_read(&get_fuse_inode(inode)->i_mmap_sem);
+ goto retry;
+ }
+
+ if (ret & VM_FAULT_NEEDDSYNC)
+ ret = dax_finish_sync_fault(vmf, pe_size, pfn);
+ up_read(&get_fuse_inode(inode)->i_mmap_sem);
+
+ if (write)
+ sb_end_pagefault(sb);
+
+ return ret;
+}
+
+static vm_fault_t fuse_dax_fault(struct vm_fault *vmf)
+{
+ return __fuse_dax_fault(vmf, PE_SIZE_PTE,
+ vmf->flags & FAULT_FLAG_WRITE);
+}
+
+static vm_fault_t fuse_dax_huge_fault(struct vm_fault *vmf,
+ enum page_entry_size pe_size)
+{
+ return __fuse_dax_fault(vmf, pe_size, vmf->flags & FAULT_FLAG_WRITE);
+}
+
+static vm_fault_t fuse_dax_page_mkwrite(struct vm_fault *vmf)
+{
+ return __fuse_dax_fault(vmf, PE_SIZE_PTE, true);
+}
+
+static vm_fault_t fuse_dax_pfn_mkwrite(struct vm_fault *vmf)
+{
+ return __fuse_dax_fault(vmf, PE_SIZE_PTE, true);
+}
+
+static const struct vm_operations_struct fuse_dax_vm_ops = {
+ .fault = fuse_dax_fault,
+ .huge_fault = fuse_dax_huge_fault,
+ .page_mkwrite = fuse_dax_page_mkwrite,
+ .pfn_mkwrite = fuse_dax_pfn_mkwrite,
+};
+
+int fuse_dax_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ file_accessed(file);
+ vma->vm_ops = &fuse_dax_vm_ops;
+ vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
+ return 0;
+}
+
+static int dmap_writeback_invalidate(struct inode *inode,
+ struct fuse_dax_mapping *dmap)
+{
+ int ret;
+ loff_t start_pos = dmap->itn.start << FUSE_DAX_SHIFT;
+ loff_t end_pos = (start_pos + FUSE_DAX_SZ - 1);
+
+ ret = filemap_fdatawrite_range(inode->i_mapping, start_pos, end_pos);
+ if (ret) {
+ pr_debug("fuse: filemap_fdatawrite_range() failed. err=%d start_pos=0x%llx, end_pos=0x%llx\n",
+ ret, start_pos, end_pos);
+ return ret;
+ }
+
+ ret = invalidate_inode_pages2_range(inode->i_mapping,
+ start_pos >> PAGE_SHIFT,
+ end_pos >> PAGE_SHIFT);
+ if (ret)
+ pr_debug("fuse: invalidate_inode_pages2_range() failed err=%d\n",
+ ret);
+
+ return ret;
+}
+
+static int reclaim_one_dmap_locked(struct inode *inode,
+ struct fuse_dax_mapping *dmap)
+{
+ int ret;
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ /*
+ * igrab() was done to make sure inode won't go under us, and this
+ * further avoids the race with evict().
+ */
+ ret = dmap_writeback_invalidate(inode, dmap);
+ if (ret)
+ return ret;
+
+ /* Remove dax mapping from inode interval tree now */
+ interval_tree_remove(&dmap->itn, &fi->dax->tree);
+ fi->dax->nr--;
+
+ /* It is possible that umount/shutdown has killed the fuse connection
+ * and worker thread is trying to reclaim memory in parallel. Don't
+ * warn in that case.
+ */
+ ret = dmap_removemapping_one(inode, dmap);
+ if (ret && ret != -ENOTCONN) {
+ pr_warn("Failed to remove mapping. offset=0x%llx len=0x%llx ret=%d\n",
+ dmap->window_offset, dmap->length, ret);
+ }
+ return 0;
+}
+
+/* Find first mapped dmap for an inode and return file offset. Caller needs
+ * to hold fi->dax->sem lock either shared or exclusive.
+ */
+static struct fuse_dax_mapping *inode_lookup_first_dmap(struct inode *inode)
+{
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_dax_mapping *dmap;
+ struct interval_tree_node *node;
+
+ for (node = interval_tree_iter_first(&fi->dax->tree, 0, -1); node;
+ node = interval_tree_iter_next(node, 0, -1)) {
+ dmap = node_to_dmap(node);
+ /* still in use. */
+ if (refcount_read(&dmap->refcnt) > 1)
+ continue;
+
+ return dmap;
+ }
+
+ return NULL;
+}
+
+/*
+ * Find first mapping in the tree and free it and return it. Do not add
+ * it back to free pool.
+ */
+static struct fuse_dax_mapping *
+inode_inline_reclaim_one_dmap(struct fuse_conn_dax *fcd, struct inode *inode,
+ bool *retry)
+{
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_dax_mapping *dmap;
+ u64 dmap_start, dmap_end;
+ unsigned long start_idx;
+ int ret;
+ struct interval_tree_node *node;
+
+ down_write(&fi->i_mmap_sem);
+
+ /* Lookup a dmap and corresponding file offset to reclaim. */
+ down_read(&fi->dax->sem);
+ dmap = inode_lookup_first_dmap(inode);
+ if (dmap) {
+ start_idx = dmap->itn.start;
+ dmap_start = start_idx << FUSE_DAX_SHIFT;
+ dmap_end = dmap_start + FUSE_DAX_SZ - 1;
+ }
+ up_read(&fi->dax->sem);
+
+ if (!dmap)
+ goto out_mmap_sem;
+ /*
+ * Make sure there are no references to inode pages using
+ * get_user_pages()
+ */
+ ret = fuse_dax_break_layouts(inode, dmap_start, dmap_end);
+ if (ret) {
+ pr_debug("fuse: fuse_dax_break_layouts() failed. err=%d\n",
+ ret);
+ dmap = ERR_PTR(ret);
+ goto out_mmap_sem;
+ }
+
+ down_write(&fi->dax->sem);
+ node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx);
+ /* Range already got reclaimed by somebody else */
+ if (!node) {
+ if (retry)
+ *retry = true;
+ goto out_write_dmap_sem;
+ }
+
+ dmap = node_to_dmap(node);
+ /* still in use. */
+ if (refcount_read(&dmap->refcnt) > 1) {
+ dmap = NULL;
+ if (retry)
+ *retry = true;
+ goto out_write_dmap_sem;
+ }
+
+ ret = reclaim_one_dmap_locked(inode, dmap);
+ if (ret < 0) {
+ dmap = ERR_PTR(ret);
+ goto out_write_dmap_sem;
+ }
+
+ /* Clean up dmap. Do not add back to free list */
+ dmap_remove_busy_list(fcd, dmap);
+ dmap->inode = NULL;
+ dmap->itn.start = dmap->itn.last = 0;
+
+ pr_debug("fuse: %s: inline reclaimed memory range. inode=%p, window_offset=0x%llx, length=0x%llx\n",
+ __func__, inode, dmap->window_offset, dmap->length);
+
+out_write_dmap_sem:
+ up_write(&fi->dax->sem);
+out_mmap_sem:
+ up_write(&fi->i_mmap_sem);
+ return dmap;
+}
+
+static struct fuse_dax_mapping *
+alloc_dax_mapping_reclaim(struct fuse_conn_dax *fcd, struct inode *inode)
+{
+ struct fuse_dax_mapping *dmap;
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ while (1) {
+ bool retry = false;
+
+ dmap = alloc_dax_mapping(fcd);
+ if (dmap)
+ return dmap;
+
+ dmap = inode_inline_reclaim_one_dmap(fcd, inode, &retry);
+ /*
+ * Either we got a mapping or it is an error, return in both
+ * the cases.
+ */
+ if (dmap)
+ return dmap;
+
+ /* If we could not reclaim a mapping because it
+ * had a reference or some other temporary failure,
+ * Try again. We want to give up inline reclaim only
+ * if there is no range assigned to this node. Otherwise
+ * if a deadlock is possible if we sleep with fi->i_mmap_sem
+ * held and worker to free memory can't make progress due
+ * to unavailability of fi->i_mmap_sem lock. So sleep
+ * only if fi->dax->nr=0
+ */
+ if (retry)
+ continue;
+ /*
+ * There are no mappings which can be reclaimed. Wait for one.
+ * We are not holding fi->dax->sem. So it is possible
+ * that range gets added now. But as we are not holding
+ * fi->i_mmap_sem, worker should still be able to free up
+ * a range and wake us up.
+ */
+ if (!fi->dax->nr && !(fcd->nr_free_ranges > 0)) {
+ if (wait_event_killable_exclusive(fcd->range_waitq,
+ (fcd->nr_free_ranges > 0))) {
+ return ERR_PTR(-EINTR);
+ }
+ }
+ }
+}
+
+static int lookup_and_reclaim_dmap_locked(struct fuse_conn_dax *fcd,
+ struct inode *inode,
+ unsigned long start_idx)
+{
+ int ret;
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_dax_mapping *dmap;
+ struct interval_tree_node *node;
+
+ /* Find fuse dax mapping at file offset inode. */
+ node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx);
+
+ /* Range already got cleaned up by somebody else */
+ if (!node)
+ return 0;
+ dmap = node_to_dmap(node);
+
+ /* still in use. */
+ if (refcount_read(&dmap->refcnt) > 1)
+ return 0;
+
+ ret = reclaim_one_dmap_locked(inode, dmap);
+ if (ret < 0)
+ return ret;
+
+ /* Cleanup dmap entry and add back to free list */
+ spin_lock(&fcd->lock);
+ dmap_reinit_add_to_free_pool(fcd, dmap);
+ spin_unlock(&fcd->lock);
+ return ret;
+}
+
+/*
+ * Free a range of memory.
+ * Locking:
+ * 1. Take fi->i_mmap_sem to block dax faults.
+ * 2. Take fi->dax->sem to protect interval tree and also to make sure
+ * read/write can not reuse a dmap which we might be freeing.
+ */
+static int lookup_and_reclaim_dmap(struct fuse_conn_dax *fcd,
+ struct inode *inode,
+ unsigned long start_idx,
+ unsigned long end_idx)
+{
+ int ret;
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ loff_t dmap_start = start_idx << FUSE_DAX_SHIFT;
+ loff_t dmap_end = (dmap_start + FUSE_DAX_SZ) - 1;
+
+ down_write(&fi->i_mmap_sem);
+ ret = fuse_dax_break_layouts(inode, dmap_start, dmap_end);
+ if (ret) {
+ pr_debug("virtio_fs: fuse_dax_break_layouts() failed. err=%d\n",
+ ret);
+ goto out_mmap_sem;
+ }
+
+ down_write(&fi->dax->sem);
+ ret = lookup_and_reclaim_dmap_locked(fcd, inode, start_idx);
+ up_write(&fi->dax->sem);
+out_mmap_sem:
+ up_write(&fi->i_mmap_sem);
+ return ret;
+}
+
+static int try_to_free_dmap_chunks(struct fuse_conn_dax *fcd,
+ unsigned long nr_to_free)
+{
+ struct fuse_dax_mapping *dmap, *pos, *temp;
+ int ret, nr_freed = 0;
+ unsigned long start_idx = 0, end_idx = 0;
+ struct inode *inode = NULL;
+
+ /* Pick first busy range and free it for now*/
+ while (1) {
+ if (nr_freed >= nr_to_free)
+ break;
+
+ dmap = NULL;
+ spin_lock(&fcd->lock);
+
+ if (!fcd->nr_busy_ranges) {
+ spin_unlock(&fcd->lock);
+ return 0;
+ }
+
+ list_for_each_entry_safe(pos, temp, &fcd->busy_ranges,
+ busy_list) {
+ /* skip this range if it's in use. */
+ if (refcount_read(&pos->refcnt) > 1)
+ continue;
+
+ inode = igrab(pos->inode);
+ /*
+ * This inode is going away. That will free
+ * up all the ranges anyway, continue to
+ * next range.
+ */
+ if (!inode)
+ continue;
+ /*
+ * Take this element off list and add it tail. If
+ * this element can't be freed, it will help with
+ * selecting new element in next iteration of loop.
+ */
+ dmap = pos;
+ list_move_tail(&dmap->busy_list, &fcd->busy_ranges);
+ start_idx = end_idx = dmap->itn.start;
+ break;
+ }
+ spin_unlock(&fcd->lock);
+ if (!dmap)
+ return 0;
+
+ ret = lookup_and_reclaim_dmap(fcd, inode, start_idx, end_idx);
+ iput(inode);
+ if (ret)
+ return ret;
+ nr_freed++;
+ }
+ return 0;
+}
+
+static void fuse_dax_free_mem_worker(struct work_struct *work)
+{
+ int ret;
+ struct fuse_conn_dax *fcd = container_of(work, struct fuse_conn_dax,
+ free_work.work);
+ ret = try_to_free_dmap_chunks(fcd, FUSE_DAX_RECLAIM_CHUNK);
+ if (ret) {
+ pr_debug("fuse: try_to_free_dmap_chunks() failed with err=%d\n",
+ ret);
+ }
+
+ /* If number of free ranges are still below threhold, requeue */
+ kick_dmap_free_worker(fcd, 1);
+}
+
+static void fuse_free_dax_mem_ranges(struct list_head *mem_list)
+{
+ struct fuse_dax_mapping *range, *temp;
+
+ /* Free All allocated elements */
+ list_for_each_entry_safe(range, temp, mem_list, list) {
+ list_del(&range->list);
+ if (!list_empty(&range->busy_list))
+ list_del(&range->busy_list);
+ kfree(range);
+ }
+}
+
+void fuse_dax_conn_free(struct fuse_conn *fc)
+{
+ if (fc->dax) {
+ fuse_free_dax_mem_ranges(&fc->dax->free_ranges);
+ kfree(fc->dax);
+ }
+}
+
+static int fuse_dax_mem_range_init(struct fuse_conn_dax *fcd)
+{
+ long nr_pages, nr_ranges;
+ void *kaddr;
+ pfn_t pfn;
+ struct fuse_dax_mapping *range;
+ int ret, id;
+ size_t dax_size = -1;
+ unsigned long i;
+
+ init_waitqueue_head(&fcd->range_waitq);
+ INIT_LIST_HEAD(&fcd->free_ranges);
+ INIT_LIST_HEAD(&fcd->busy_ranges);
+ INIT_DELAYED_WORK(&fcd->free_work, fuse_dax_free_mem_worker);
+
+ id = dax_read_lock();
+ nr_pages = dax_direct_access(fcd->dev, 0, PHYS_PFN(dax_size), &kaddr,
+ &pfn);
+ dax_read_unlock(id);
+ if (nr_pages < 0) {
+ pr_debug("dax_direct_access() returned %ld\n", nr_pages);
+ return nr_pages;
+ }
+
+ nr_ranges = nr_pages/FUSE_DAX_PAGES;
+ pr_debug("%s: dax mapped %ld pages. nr_ranges=%ld\n",
+ __func__, nr_pages, nr_ranges);
+
+ for (i = 0; i < nr_ranges; i++) {
+ range = kzalloc(sizeof(struct fuse_dax_mapping), GFP_KERNEL);
+ ret = -ENOMEM;
+ if (!range)
+ goto out_err;
+
+ /* TODO: This offset only works if virtio-fs driver is not
+ * having some memory hidden at the beginning. This needs
+ * better handling
+ */
+ range->window_offset = i * FUSE_DAX_SZ;
+ range->length = FUSE_DAX_SZ;
+ INIT_LIST_HEAD(&range->busy_list);
+ refcount_set(&range->refcnt, 1);
+ list_add_tail(&range->list, &fcd->free_ranges);
+ }
+
+ fcd->nr_free_ranges = nr_ranges;
+ fcd->nr_ranges = nr_ranges;
+ return 0;
+out_err:
+ /* Free All allocated elements */
+ fuse_free_dax_mem_ranges(&fcd->free_ranges);
+ return ret;
+}
+
+int fuse_dax_conn_alloc(struct fuse_conn *fc, struct dax_device *dax_dev)
+{
+ struct fuse_conn_dax *fcd;
+ int err;
+
+ if (!dax_dev)
+ return 0;
+
+ fcd = kzalloc(sizeof(*fcd), GFP_KERNEL);
+ if (!fcd)
+ return -ENOMEM;
+
+ spin_lock_init(&fcd->lock);
+ fcd->dev = dax_dev;
+ err = fuse_dax_mem_range_init(fcd);
+ if (err) {
+ kfree(fcd);
+ return err;
+ }
+
+ fc->dax = fcd;
+ return 0;
+}
+
+bool fuse_dax_inode_alloc(struct super_block *sb, struct fuse_inode *fi)
+{
+ struct fuse_conn *fc = get_fuse_conn_super(sb);
+
+ fi->dax = NULL;
+ if (fc->dax) {
+ fi->dax = kzalloc(sizeof(*fi->dax), GFP_KERNEL_ACCOUNT);
+ if (!fi->dax)
+ return false;
+
+ init_rwsem(&fi->dax->sem);
+ fi->dax->tree = RB_ROOT_CACHED;
+ }
+
+ return true;
+}
+
+static const struct address_space_operations fuse_dax_file_aops = {
+ .writepages = fuse_dax_writepages,
+ .direct_IO = noop_direct_IO,
+ .set_page_dirty = noop_set_page_dirty,
+ .invalidatepage = noop_invalidatepage,
+};
+
+void fuse_dax_inode_init(struct inode *inode)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+
+ if (!fc->dax)
+ return;
+
+ inode->i_flags |= S_DAX;
+ inode->i_data.a_ops = &fuse_dax_file_aops;
+}
+
+bool fuse_dax_check_alignment(struct fuse_conn *fc, unsigned int map_alignment)
+{
+ if (fc->dax && (map_alignment > FUSE_DAX_SHIFT)) {
+ pr_warn("FUSE: map_alignment %u incompatible with dax mem range size %u\n",
+ map_alignment, FUSE_DAX_SZ);
+ return false;
+ }
+ return true;
+}
+
+void fuse_dax_cancel_work(struct fuse_conn *fc)
+{
+ struct fuse_conn_dax *fcd = fc->dax;
+
+ if (fcd)
+ cancel_delayed_work_sync(&fcd->free_work);
+
+}
+EXPORT_SYMBOL_GPL(fuse_dax_cancel_work);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 02b3c36b3676..588f8d1240aa 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -40,20 +40,21 @@ static struct fuse_dev *fuse_get_dev(struct file *file)
return READ_ONCE(file->private_data);
}
-static void fuse_request_init(struct fuse_req *req)
+static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req)
{
INIT_LIST_HEAD(&req->list);
INIT_LIST_HEAD(&req->intr_entry);
init_waitqueue_head(&req->waitq);
refcount_set(&req->count, 1);
__set_bit(FR_PENDING, &req->flags);
+ req->fm = fm;
}
-static struct fuse_req *fuse_request_alloc(gfp_t flags)
+static struct fuse_req *fuse_request_alloc(struct fuse_mount *fm, gfp_t flags)
{
struct fuse_req *req = kmem_cache_zalloc(fuse_req_cachep, flags);
if (req)
- fuse_request_init(req);
+ fuse_request_init(fm, req);
return req;
}
@@ -100,10 +101,11 @@ static void fuse_drop_waiting(struct fuse_conn *fc)
}
}
-static void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req);
+static void fuse_put_request(struct fuse_req *req);
-static struct fuse_req *fuse_get_req(struct fuse_conn *fc, bool for_background)
+static struct fuse_req *fuse_get_req(struct fuse_mount *fm, bool for_background)
{
+ struct fuse_conn *fc = fm->fc;
struct fuse_req *req;
int err;
atomic_inc(&fc->num_waiting);
@@ -125,7 +127,7 @@ static struct fuse_req *fuse_get_req(struct fuse_conn *fc, bool for_background)
if (fc->conn_error)
goto out;
- req = fuse_request_alloc(GFP_KERNEL);
+ req = fuse_request_alloc(fm, GFP_KERNEL);
err = -ENOMEM;
if (!req) {
if (for_background)
@@ -143,7 +145,7 @@ static struct fuse_req *fuse_get_req(struct fuse_conn *fc, bool for_background)
if (unlikely(req->in.h.uid == ((uid_t)-1) ||
req->in.h.gid == ((gid_t)-1))) {
- fuse_put_request(fc, req);
+ fuse_put_request(req);
return ERR_PTR(-EOVERFLOW);
}
return req;
@@ -153,8 +155,10 @@ static struct fuse_req *fuse_get_req(struct fuse_conn *fc, bool for_background)
return ERR_PTR(err);
}
-static void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
+static void fuse_put_request(struct fuse_req *req)
{
+ struct fuse_conn *fc = req->fm->fc;
+
if (refcount_dec_and_test(&req->count)) {
if (test_bit(FR_BACKGROUND, &req->flags)) {
/*
@@ -273,8 +277,10 @@ static void flush_bg_queue(struct fuse_conn *fc)
* the 'end' callback is called if given, else the reference to the
* request is released
*/
-void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req)
+void fuse_request_end(struct fuse_req *req)
{
+ struct fuse_mount *fm = req->fm;
+ struct fuse_conn *fc = fm->fc;
struct fuse_iqueue *fiq = &fc->iq;
if (test_and_set_bit(FR_FINISHED, &req->flags))
@@ -309,9 +315,9 @@ void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req)
wake_up(&fc->blocked_waitq);
}
- if (fc->num_background == fc->congestion_threshold && fc->sb) {
- clear_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC);
- clear_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC);
+ if (fc->num_background == fc->congestion_threshold && fm->sb) {
+ clear_bdi_congested(fm->sb->s_bdi, BLK_RW_SYNC);
+ clear_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC);
}
fc->num_background--;
fc->active_background--;
@@ -323,14 +329,16 @@ void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req)
}
if (test_bit(FR_ASYNC, &req->flags))
- req->args->end(fc, req->args, req->out.h.error);
+ req->args->end(fm, req->args, req->out.h.error);
put_request:
- fuse_put_request(fc, req);
+ fuse_put_request(req);
}
EXPORT_SYMBOL_GPL(fuse_request_end);
-static int queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
+static int queue_interrupt(struct fuse_req *req)
{
+ struct fuse_iqueue *fiq = &req->fm->fc->iq;
+
spin_lock(&fiq->lock);
/* Check for we've sent request to interrupt this req */
if (unlikely(!test_bit(FR_INTERRUPTED, &req->flags))) {
@@ -357,8 +365,9 @@ static int queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
return 0;
}
-static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
+static void request_wait_answer(struct fuse_req *req)
{
+ struct fuse_conn *fc = req->fm->fc;
struct fuse_iqueue *fiq = &fc->iq;
int err;
@@ -373,7 +382,7 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
/* matches barrier in fuse_dev_do_read() */
smp_mb__after_atomic();
if (test_bit(FR_SENT, &req->flags))
- queue_interrupt(fiq, req);
+ queue_interrupt(req);
}
if (!test_bit(FR_FORCE, &req->flags)) {
@@ -402,9 +411,9 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
wait_event(req->waitq, test_bit(FR_FINISHED, &req->flags));
}
-static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
+static void __fuse_request_send(struct fuse_req *req)
{
- struct fuse_iqueue *fiq = &fc->iq;
+ struct fuse_iqueue *fiq = &req->fm->fc->iq;
BUG_ON(test_bit(FR_BACKGROUND, &req->flags));
spin_lock(&fiq->lock);
@@ -418,7 +427,7 @@ static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
__fuse_get_request(req);
queue_request_and_unlock(fiq, req);
- request_wait_answer(fc, req);
+ request_wait_answer(req);
/* Pairs with smp_wmb() in fuse_request_end() */
smp_rmb();
}
@@ -457,8 +466,10 @@ static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args)
}
}
-static void fuse_force_creds(struct fuse_conn *fc, struct fuse_req *req)
+static void fuse_force_creds(struct fuse_req *req)
{
+ struct fuse_conn *fc = req->fm->fc;
+
req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid());
req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid());
req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
@@ -473,23 +484,24 @@ static void fuse_args_to_req(struct fuse_req *req, struct fuse_args *args)
__set_bit(FR_ASYNC, &req->flags);
}
-ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args)
+ssize_t fuse_simple_request(struct fuse_mount *fm, struct fuse_args *args)
{
+ struct fuse_conn *fc = fm->fc;
struct fuse_req *req;
ssize_t ret;
if (args->force) {
atomic_inc(&fc->num_waiting);
- req = fuse_request_alloc(GFP_KERNEL | __GFP_NOFAIL);
+ req = fuse_request_alloc(fm, GFP_KERNEL | __GFP_NOFAIL);
if (!args->nocreds)
- fuse_force_creds(fc, req);
+ fuse_force_creds(req);
__set_bit(FR_WAITING, &req->flags);
__set_bit(FR_FORCE, &req->flags);
} else {
WARN_ON(args->nocreds);
- req = fuse_get_req(fc, false);
+ req = fuse_get_req(fm, false);
if (IS_ERR(req))
return PTR_ERR(req);
}
@@ -500,20 +512,21 @@ ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args)
if (!args->noreply)
__set_bit(FR_ISREPLY, &req->flags);
- __fuse_request_send(fc, req);
+ __fuse_request_send(req);
ret = req->out.h.error;
if (!ret && args->out_argvar) {
BUG_ON(args->out_numargs == 0);
ret = args->out_args[args->out_numargs - 1].size;
}
- fuse_put_request(fc, req);
+ fuse_put_request(req);
return ret;
}
-static bool fuse_request_queue_background(struct fuse_conn *fc,
- struct fuse_req *req)
+static bool fuse_request_queue_background(struct fuse_req *req)
{
+ struct fuse_mount *fm = req->fm;
+ struct fuse_conn *fc = fm->fc;
bool queued = false;
WARN_ON(!test_bit(FR_BACKGROUND, &req->flags));
@@ -527,9 +540,9 @@ static bool fuse_request_queue_background(struct fuse_conn *fc,
fc->num_background++;
if (fc->num_background == fc->max_background)
fc->blocked = 1;
- if (fc->num_background == fc->congestion_threshold && fc->sb) {
- set_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC);
- set_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC);
+ if (fc->num_background == fc->congestion_threshold && fm->sb) {
+ set_bdi_congested(fm->sb->s_bdi, BLK_RW_SYNC);
+ set_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC);
}
list_add_tail(&req->list, &fc->bg_queue);
flush_bg_queue(fc);
@@ -540,28 +553,28 @@ static bool fuse_request_queue_background(struct fuse_conn *fc,
return queued;
}
-int fuse_simple_background(struct fuse_conn *fc, struct fuse_args *args,
+int fuse_simple_background(struct fuse_mount *fm, struct fuse_args *args,
gfp_t gfp_flags)
{
struct fuse_req *req;
if (args->force) {
WARN_ON(!args->nocreds);
- req = fuse_request_alloc(gfp_flags);
+ req = fuse_request_alloc(fm, gfp_flags);
if (!req)
return -ENOMEM;
__set_bit(FR_BACKGROUND, &req->flags);
} else {
WARN_ON(args->nocreds);
- req = fuse_get_req(fc, true);
+ req = fuse_get_req(fm, true);
if (IS_ERR(req))
return PTR_ERR(req);
}
fuse_args_to_req(req, args);
- if (!fuse_request_queue_background(fc, req)) {
- fuse_put_request(fc, req);
+ if (!fuse_request_queue_background(req)) {
+ fuse_put_request(req);
return -ENOTCONN;
}
@@ -569,14 +582,14 @@ int fuse_simple_background(struct fuse_conn *fc, struct fuse_args *args,
}
EXPORT_SYMBOL_GPL(fuse_simple_background);
-static int fuse_simple_notify_reply(struct fuse_conn *fc,
+static int fuse_simple_notify_reply(struct fuse_mount *fm,
struct fuse_args *args, u64 unique)
{
struct fuse_req *req;
- struct fuse_iqueue *fiq = &fc->iq;
+ struct fuse_iqueue *fiq = &fm->fc->iq;
int err = 0;
- req = fuse_get_req(fc, false);
+ req = fuse_get_req(fm, false);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -591,7 +604,7 @@ static int fuse_simple_notify_reply(struct fuse_conn *fc,
} else {
err = -ENODEV;
spin_unlock(&fiq->lock);
- fuse_put_request(fc, req);
+ fuse_put_request(req);
}
return err;
@@ -785,15 +798,16 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
struct page *newpage;
struct pipe_buffer *buf = cs->pipebufs;
+ get_page(oldpage);
err = unlock_request(cs->req);
if (err)
- return err;
+ goto out_put_old;
fuse_copy_finish(cs);
err = pipe_buf_confirm(cs->pipe, buf);
if (err)
- return err;
+ goto out_put_old;
BUG_ON(!cs->nr_segs);
cs->currbuf = buf;
@@ -833,7 +847,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL);
if (err) {
unlock_page(newpage);
- return err;
+ goto out_put_old;
}
get_page(newpage);
@@ -852,14 +866,19 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
if (err) {
unlock_page(newpage);
put_page(newpage);
- return err;
+ goto out_put_old;
}
unlock_page(oldpage);
+ /* Drop ref for ap->pages[] array */
put_page(oldpage);
cs->len = 0;
- return 0;
+ err = 0;
+out_put_old:
+ /* Drop ref obtained in this function */
+ put_page(oldpage);
+ return err;
out_fallback_unlock:
unlock_page(newpage);
@@ -868,10 +887,10 @@ out_fallback:
cs->offset = buf->offset;
err = lock_request(cs->req);
- if (err)
- return err;
+ if (!err)
+ err = 1;
- return 1;
+ goto out_put_old;
}
static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
@@ -883,14 +902,16 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
if (cs->nr_segs >= cs->pipe->max_usage)
return -EIO;
+ get_page(page);
err = unlock_request(cs->req);
- if (err)
+ if (err) {
+ put_page(page);
return err;
+ }
fuse_copy_finish(cs);
buf = cs->pipebufs;
- get_page(page);
buf->page = page;
buf->offset = offset;
buf->len = count;
@@ -1250,7 +1271,7 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
/* SETXATTR is special, since it may contain too large data */
if (args->opcode == FUSE_SETXATTR)
req->out.h.error = -E2BIG;
- fuse_request_end(fc, req);
+ fuse_request_end(req);
goto restart;
}
spin_lock(&fpq->lock);
@@ -1284,8 +1305,8 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
/* matches barrier in request_wait_answer() */
smp_mb__after_atomic();
if (test_bit(FR_INTERRUPTED, &req->flags))
- queue_interrupt(fiq, req);
- fuse_put_request(fc, req);
+ queue_interrupt(req);
+ fuse_put_request(req);
return reqsize;
@@ -1293,7 +1314,7 @@ out_end:
if (!test_bit(FR_PRIVATE, &req->flags))
list_del_init(&req->list);
spin_unlock(&fpq->lock);
- fuse_request_end(fc, req);
+ fuse_request_end(req);
return err;
err_unlock:
@@ -1416,11 +1437,8 @@ static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
fuse_copy_finish(cs);
down_read(&fc->killsb);
- err = -ENOENT;
- if (fc->sb) {
- err = fuse_reverse_inval_inode(fc->sb, outarg.ino,
- outarg.off, outarg.len);
- }
+ err = fuse_reverse_inval_inode(fc, outarg.ino,
+ outarg.off, outarg.len);
up_read(&fc->killsb);
return err;
@@ -1466,9 +1484,7 @@ static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
buf[outarg.namelen] = 0;
down_read(&fc->killsb);
- err = -ENOENT;
- if (fc->sb)
- err = fuse_reverse_inval_entry(fc->sb, outarg.parent, 0, &name);
+ err = fuse_reverse_inval_entry(fc, outarg.parent, 0, &name);
up_read(&fc->killsb);
kfree(buf);
return err;
@@ -1516,10 +1532,7 @@ static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size,
buf[outarg.namelen] = 0;
down_read(&fc->killsb);
- err = -ENOENT;
- if (fc->sb)
- err = fuse_reverse_inval_entry(fc->sb, outarg.parent,
- outarg.child, &name);
+ err = fuse_reverse_inval_entry(fc, outarg.parent, outarg.child, &name);
up_read(&fc->killsb);
kfree(buf);
return err;
@@ -1561,10 +1574,7 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
down_read(&fc->killsb);
err = -ENOENT;
- if (!fc->sb)
- goto out_up_killsb;
-
- inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
+ inode = fuse_ilookup(fc, nodeid, NULL);
if (!inode)
goto out_up_killsb;
@@ -1621,7 +1631,7 @@ struct fuse_retrieve_args {
struct fuse_notify_retrieve_in inarg;
};
-static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_args *args,
+static void fuse_retrieve_end(struct fuse_mount *fm, struct fuse_args *args,
int error)
{
struct fuse_retrieve_args *ra =
@@ -1631,7 +1641,7 @@ static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_args *args,
kfree(ra);
}
-static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
+static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode,
struct fuse_notify_retrieve_out *outarg)
{
int err;
@@ -1642,6 +1652,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
unsigned int offset;
size_t total_len = 0;
unsigned int num_pages;
+ struct fuse_conn *fc = fm->fc;
struct fuse_retrieve_args *ra;
size_t args_size = sizeof(*ra);
struct fuse_args_pages *ap;
@@ -1703,9 +1714,9 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
args->in_args[0].value = &ra->inarg;
args->in_args[1].size = total_len;
- err = fuse_simple_notify_reply(fc, args, outarg->notify_unique);
+ err = fuse_simple_notify_reply(fm, args, outarg->notify_unique);
if (err)
- fuse_retrieve_end(fc, args, err);
+ fuse_retrieve_end(fm, args, err);
return err;
}
@@ -1714,7 +1725,9 @@ static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
struct fuse_copy_state *cs)
{
struct fuse_notify_retrieve_out outarg;
+ struct fuse_mount *fm;
struct inode *inode;
+ u64 nodeid;
int err;
err = -EINVAL;
@@ -1729,14 +1742,12 @@ static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
down_read(&fc->killsb);
err = -ENOENT;
- if (fc->sb) {
- u64 nodeid = outarg.nodeid;
+ nodeid = outarg.nodeid;
- inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
- if (inode) {
- err = fuse_retrieve(fc, inode, &outarg);
- iput(inode);
- }
+ inode = fuse_ilookup(fc, nodeid, &fm);
+ if (inode) {
+ err = fuse_retrieve(fm, inode, &outarg);
+ iput(inode);
}
up_read(&fc->killsb);
@@ -1875,9 +1886,9 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
else if (oh.error == -ENOSYS)
fc->no_interrupt = 1;
else if (oh.error == -EAGAIN)
- err = queue_interrupt(&fc->iq, req);
+ err = queue_interrupt(req);
- fuse_put_request(fc, req);
+ fuse_put_request(req);
goto copy_finish;
}
@@ -1907,7 +1918,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
list_del_init(&req->list);
spin_unlock(&fpq->lock);
- fuse_request_end(fc, req);
+ fuse_request_end(req);
out:
return err ? err : nbytes;
@@ -2045,7 +2056,7 @@ static __poll_t fuse_dev_poll(struct file *file, poll_table *wait)
}
/* Abort all requests on the given list (pending or processing) */
-static void end_requests(struct fuse_conn *fc, struct list_head *head)
+static void end_requests(struct list_head *head)
{
while (!list_empty(head)) {
struct fuse_req *req;
@@ -2053,7 +2064,7 @@ static void end_requests(struct fuse_conn *fc, struct list_head *head)
req->out.h.error = -ECONNABORTED;
clear_bit(FR_SENT, &req->flags);
list_del_init(&req->list);
- fuse_request_end(fc, req);
+ fuse_request_end(req);
}
}
@@ -2148,7 +2159,7 @@ void fuse_abort_conn(struct fuse_conn *fc)
wake_up_all(&fc->blocked_waitq);
spin_unlock(&fc->lock);
- end_requests(fc, &to_end);
+ end_requests(&to_end);
} else {
spin_unlock(&fc->lock);
}
@@ -2178,7 +2189,7 @@ int fuse_dev_release(struct inode *inode, struct file *file)
list_splice_init(&fpq->processing[i], &to_end);
spin_unlock(&fpq->lock);
- end_requests(fc, &to_end);
+ end_requests(&to_end);
/* Are we the last open device? */
if (atomic_dec_and_test(&fc->dev_count)) {
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 26f028bc760b..ff7dbeb16f88 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -10,6 +10,7 @@
#include <linux/pagemap.h>
#include <linux/file.h>
+#include <linux/fs_context.h>
#include <linux/sched.h>
#include <linux/namei.h>
#include <linux/slab.h>
@@ -196,7 +197,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
{
struct inode *inode;
struct dentry *parent;
- struct fuse_conn *fc;
+ struct fuse_mount *fm;
struct fuse_inode *fi;
int ret;
@@ -218,27 +219,29 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
if (flags & LOOKUP_RCU)
goto out;
- fc = get_fuse_conn(inode);
+ fm = get_fuse_mount(inode);
forget = fuse_alloc_forget();
ret = -ENOMEM;
if (!forget)
goto out;
- attr_version = fuse_get_attr_version(fc);
+ attr_version = fuse_get_attr_version(fm->fc);
parent = dget_parent(entry);
- fuse_lookup_init(fc, &args, get_node_id(d_inode(parent)),
+ fuse_lookup_init(fm->fc, &args, get_node_id(d_inode(parent)),
&entry->d_name, &outarg);
- ret = fuse_simple_request(fc, &args);
+ ret = fuse_simple_request(fm, &args);
dput(parent);
/* Zero nodeid is same as -ENOENT */
if (!ret && !outarg.nodeid)
ret = -ENOENT;
if (!ret) {
fi = get_fuse_inode(inode);
- if (outarg.nodeid != get_node_id(inode)) {
- fuse_queue_forget(fc, forget, outarg.nodeid, 1);
+ if (outarg.nodeid != get_node_id(inode) ||
+ (bool) IS_AUTOMOUNT(inode) != (bool) (outarg.attr.flags & FUSE_ATTR_SUBMOUNT)) {
+ fuse_queue_forget(fm->fc, forget,
+ outarg.nodeid, 1);
goto invalid;
}
spin_lock(&fi->lock);
@@ -298,6 +301,79 @@ static int fuse_dentry_delete(const struct dentry *dentry)
return time_before64(fuse_dentry_time(dentry), get_jiffies_64());
}
+/*
+ * Create a fuse_mount object with a new superblock (with path->dentry
+ * as the root), and return that mount so it can be auto-mounted on
+ * @path.
+ */
+static struct vfsmount *fuse_dentry_automount(struct path *path)
+{
+ struct fs_context *fsc;
+ struct fuse_mount *parent_fm = get_fuse_mount_super(path->mnt->mnt_sb);
+ struct fuse_conn *fc = parent_fm->fc;
+ struct fuse_mount *fm;
+ struct vfsmount *mnt;
+ struct fuse_inode *mp_fi = get_fuse_inode(d_inode(path->dentry));
+ struct super_block *sb;
+ int err;
+
+ fsc = fs_context_for_submount(path->mnt->mnt_sb->s_type, path->dentry);
+ if (IS_ERR(fsc)) {
+ err = PTR_ERR(fsc);
+ goto out;
+ }
+
+ err = -ENOMEM;
+ fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL);
+ if (!fm)
+ goto out_put_fsc;
+
+ refcount_set(&fm->count, 1);
+ fsc->s_fs_info = fm;
+ sb = sget_fc(fsc, NULL, set_anon_super_fc);
+ if (IS_ERR(sb)) {
+ err = PTR_ERR(sb);
+ fuse_mount_put(fm);
+ goto out_put_fsc;
+ }
+ fm->fc = fuse_conn_get(fc);
+
+ /* Initialize superblock, making @mp_fi its root */
+ err = fuse_fill_super_submount(sb, mp_fi);
+ if (err)
+ goto out_put_sb;
+
+ sb->s_flags |= SB_ACTIVE;
+ fsc->root = dget(sb->s_root);
+ /* We are done configuring the superblock, so unlock it */
+ up_write(&sb->s_umount);
+
+ down_write(&fc->killsb);
+ list_add_tail(&fm->fc_entry, &fc->mounts);
+ up_write(&fc->killsb);
+
+ /* Create the submount */
+ mnt = vfs_create_mount(fsc);
+ if (IS_ERR(mnt)) {
+ err = PTR_ERR(mnt);
+ goto out_put_fsc;
+ }
+ mntget(mnt);
+ put_fs_context(fsc);
+ return mnt;
+
+out_put_sb:
+ /*
+ * Only jump here when fsc->root is NULL and sb is still locked
+ * (otherwise put_fs_context() will put the superblock)
+ */
+ deactivate_locked_super(sb);
+out_put_fsc:
+ put_fs_context(fsc);
+out:
+ return ERR_PTR(err);
+}
+
const struct dentry_operations fuse_dentry_operations = {
.d_revalidate = fuse_dentry_revalidate,
.d_delete = fuse_dentry_delete,
@@ -305,6 +381,7 @@ const struct dentry_operations fuse_dentry_operations = {
.d_init = fuse_dentry_init,
.d_release = fuse_dentry_release,
#endif
+ .d_automount = fuse_dentry_automount,
};
const struct dentry_operations fuse_root_dentry_operations = {
@@ -329,7 +406,7 @@ bool fuse_invalid_attr(struct fuse_attr *attr)
int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
struct fuse_entry_out *outarg, struct inode **inode)
{
- struct fuse_conn *fc = get_fuse_conn_super(sb);
+ struct fuse_mount *fm = get_fuse_mount_super(sb);
FUSE_ARGS(args);
struct fuse_forget_link *forget;
u64 attr_version;
@@ -346,10 +423,10 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name
if (!forget)
goto out;
- attr_version = fuse_get_attr_version(fc);
+ attr_version = fuse_get_attr_version(fm->fc);
- fuse_lookup_init(fc, &args, nodeid, name, outarg);
- err = fuse_simple_request(fc, &args);
+ fuse_lookup_init(fm->fc, &args, nodeid, name, outarg);
+ err = fuse_simple_request(fm, &args);
/* Zero nodeid is same as -ENOENT, but with valid timeout */
if (err || !outarg->nodeid)
goto out_put_forget;
@@ -365,7 +442,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name
attr_version);
err = -ENOMEM;
if (!*inode) {
- fuse_queue_forget(fc, forget, outarg->nodeid, 1);
+ fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1);
goto out;
}
err = 0;
@@ -434,7 +511,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
{
int err;
struct inode *inode;
- struct fuse_conn *fc = get_fuse_conn(dir);
+ struct fuse_mount *fm = get_fuse_mount(dir);
FUSE_ARGS(args);
struct fuse_forget_link *forget;
struct fuse_create_in inarg;
@@ -452,11 +529,11 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
goto out_err;
err = -ENOMEM;
- ff = fuse_file_alloc(fc);
+ ff = fuse_file_alloc(fm);
if (!ff)
goto out_put_forget_req;
- if (!fc->dont_mask)
+ if (!fm->fc->dont_mask)
mode &= ~current_umask();
flags &= ~O_NOCTTY;
@@ -477,7 +554,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
args.out_args[0].value = &outentry;
args.out_args[1].size = sizeof(outopen);
args.out_args[1].value = &outopen;
- err = fuse_simple_request(fc, &args);
+ err = fuse_simple_request(fm, &args);
if (err)
goto out_free_ff;
@@ -494,7 +571,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
if (!inode) {
flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
fuse_sync_release(NULL, ff, flags);
- fuse_queue_forget(fc, forget, outentry.nodeid, 1);
+ fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1);
err = -ENOMEM;
goto out_err;
}
@@ -567,7 +644,7 @@ no_open:
/*
* Code shared between mknod, mkdir, symlink and link
*/
-static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
+static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args,
struct inode *dir, struct dentry *entry,
umode_t mode)
{
@@ -586,7 +663,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
args->out_numargs = 1;
args->out_args[0].size = sizeof(outarg);
args->out_args[0].value = &outarg;
- err = fuse_simple_request(fc, args);
+ err = fuse_simple_request(fm, args);
if (err)
goto out_put_forget_req;
@@ -600,7 +677,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
&outarg.attr, entry_attr_timeout(&outarg), 0);
if (!inode) {
- fuse_queue_forget(fc, forget, outarg.nodeid, 1);
+ fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1);
return -ENOMEM;
}
kfree(forget);
@@ -628,10 +705,10 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
dev_t rdev)
{
struct fuse_mknod_in inarg;
- struct fuse_conn *fc = get_fuse_conn(dir);
+ struct fuse_mount *fm = get_fuse_mount(dir);
FUSE_ARGS(args);
- if (!fc->dont_mask)
+ if (!fm->fc->dont_mask)
mode &= ~current_umask();
memset(&inarg, 0, sizeof(inarg));
@@ -644,7 +721,7 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
args.in_args[0].value = &inarg;
args.in_args[1].size = entry->d_name.len + 1;
args.in_args[1].value = entry->d_name.name;
- return create_new_entry(fc, &args, dir, entry, mode);
+ return create_new_entry(fm, &args, dir, entry, mode);
}
static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
@@ -656,10 +733,10 @@ static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
{
struct fuse_mkdir_in inarg;
- struct fuse_conn *fc = get_fuse_conn(dir);
+ struct fuse_mount *fm = get_fuse_mount(dir);
FUSE_ARGS(args);
- if (!fc->dont_mask)
+ if (!fm->fc->dont_mask)
mode &= ~current_umask();
memset(&inarg, 0, sizeof(inarg));
@@ -671,13 +748,13 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
args.in_args[0].value = &inarg;
args.in_args[1].size = entry->d_name.len + 1;
args.in_args[1].value = entry->d_name.name;
- return create_new_entry(fc, &args, dir, entry, S_IFDIR);
+ return create_new_entry(fm, &args, dir, entry, S_IFDIR);
}
static int fuse_symlink(struct inode *dir, struct dentry *entry,
const char *link)
{
- struct fuse_conn *fc = get_fuse_conn(dir);
+ struct fuse_mount *fm = get_fuse_mount(dir);
unsigned len = strlen(link) + 1;
FUSE_ARGS(args);
@@ -687,7 +764,7 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry,
args.in_args[0].value = entry->d_name.name;
args.in_args[1].size = len;
args.in_args[1].value = link;
- return create_new_entry(fc, &args, dir, entry, S_IFLNK);
+ return create_new_entry(fm, &args, dir, entry, S_IFLNK);
}
void fuse_update_ctime(struct inode *inode)
@@ -701,7 +778,7 @@ void fuse_update_ctime(struct inode *inode)
static int fuse_unlink(struct inode *dir, struct dentry *entry)
{
int err;
- struct fuse_conn *fc = get_fuse_conn(dir);
+ struct fuse_mount *fm = get_fuse_mount(dir);
FUSE_ARGS(args);
args.opcode = FUSE_UNLINK;
@@ -709,13 +786,13 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
args.in_numargs = 1;
args.in_args[0].size = entry->d_name.len + 1;
args.in_args[0].value = entry->d_name.name;
- err = fuse_simple_request(fc, &args);
+ err = fuse_simple_request(fm, &args);
if (!err) {
struct inode *inode = d_inode(entry);
struct fuse_inode *fi = get_fuse_inode(inode);
spin_lock(&fi->lock);
- fi->attr_version = atomic64_inc_return(&fc->attr_version);
+ fi->attr_version = atomic64_inc_return(&fm->fc->attr_version);
/*
* If i_nlink == 0 then unlink doesn't make sense, yet this can
* happen if userspace filesystem is careless. It would be
@@ -737,7 +814,7 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
static int fuse_rmdir(struct inode *dir, struct dentry *entry)
{
int err;
- struct fuse_conn *fc = get_fuse_conn(dir);
+ struct fuse_mount *fm = get_fuse_mount(dir);
FUSE_ARGS(args);
args.opcode = FUSE_RMDIR;
@@ -745,7 +822,7 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
args.in_numargs = 1;
args.in_args[0].size = entry->d_name.len + 1;
args.in_args[0].value = entry->d_name.name;
- err = fuse_simple_request(fc, &args);
+ err = fuse_simple_request(fm, &args);
if (!err) {
clear_nlink(d_inode(entry));
fuse_dir_changed(dir);
@@ -761,7 +838,7 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
{
int err;
struct fuse_rename2_in inarg;
- struct fuse_conn *fc = get_fuse_conn(olddir);
+ struct fuse_mount *fm = get_fuse_mount(olddir);
FUSE_ARGS(args);
memset(&inarg, 0, argsize);
@@ -776,7 +853,7 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
args.in_args[1].value = oldent->d_name.name;
args.in_args[2].size = newent->d_name.len + 1;
args.in_args[2].value = newent->d_name.name;
- err = fuse_simple_request(fc, &args);
+ err = fuse_simple_request(fm, &args);
if (!err) {
/* ctime changes */
fuse_invalidate_attr(d_inode(oldent));
@@ -847,7 +924,7 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
int err;
struct fuse_link_in inarg;
struct inode *inode = d_inode(entry);
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_mount *fm = get_fuse_mount(inode);
FUSE_ARGS(args);
memset(&inarg, 0, sizeof(inarg));
@@ -858,7 +935,7 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
args.in_args[0].value = &inarg;
args.in_args[1].size = newent->d_name.len + 1;
args.in_args[1].value = newent->d_name.name;
- err = create_new_entry(fc, &args, newdir, newent, inode->i_mode);
+ err = create_new_entry(fm, &args, newdir, newent, inode->i_mode);
/* Contrary to "normal" filesystems it can happen that link
makes two "logical" inodes point to the same "physical"
inode. We invalidate the attributes of the old one, so it
@@ -869,7 +946,7 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
struct fuse_inode *fi = get_fuse_inode(inode);
spin_lock(&fi->lock);
- fi->attr_version = atomic64_inc_return(&fc->attr_version);
+ fi->attr_version = atomic64_inc_return(&fm->fc->attr_version);
if (likely(inode->i_nlink < UINT_MAX))
inc_nlink(inode);
spin_unlock(&fi->lock);
@@ -926,11 +1003,11 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
int err;
struct fuse_getattr_in inarg;
struct fuse_attr_out outarg;
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_mount *fm = get_fuse_mount(inode);
FUSE_ARGS(args);
u64 attr_version;
- attr_version = fuse_get_attr_version(fc);
+ attr_version = fuse_get_attr_version(fm->fc);
memset(&inarg, 0, sizeof(inarg));
memset(&outarg, 0, sizeof(outarg));
@@ -949,7 +1026,7 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
args.out_numargs = 1;
args.out_args[0].size = sizeof(outarg);
args.out_args[0].value = &outarg;
- err = fuse_simple_request(fc, &args);
+ err = fuse_simple_request(fm, &args);
if (!err) {
if (fuse_invalid_attr(&outarg.attr) ||
(inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
@@ -1002,7 +1079,7 @@ int fuse_update_attributes(struct inode *inode, struct file *file)
STATX_BASIC_STATS & ~STATX_ATIME, 0);
}
-int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
+int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
u64 child_nodeid, struct qstr *name)
{
int err = -ENOTDIR;
@@ -1010,7 +1087,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
struct dentry *dir;
struct dentry *entry;
- parent = ilookup5(sb, parent_nodeid, fuse_inode_eq, &parent_nodeid);
+ parent = fuse_ilookup(fc, parent_nodeid, NULL);
if (!parent)
return -ENOENT;
@@ -1102,14 +1179,14 @@ int fuse_allow_current_process(struct fuse_conn *fc)
static int fuse_access(struct inode *inode, int mask)
{
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_mount *fm = get_fuse_mount(inode);
FUSE_ARGS(args);
struct fuse_access_in inarg;
int err;
BUG_ON(mask & MAY_NOT_BLOCK);
- if (fc->no_access)
+ if (fm->fc->no_access)
return 0;
memset(&inarg, 0, sizeof(inarg));
@@ -1119,9 +1196,9 @@ static int fuse_access(struct inode *inode, int mask)
args.in_numargs = 1;
args.in_args[0].size = sizeof(inarg);
args.in_args[0].value = &inarg;
- err = fuse_simple_request(fc, &args);
+ err = fuse_simple_request(fm, &args);
if (err == -ENOSYS) {
- fc->no_access = 1;
+ fm->fc->no_access = 1;
err = 0;
}
return err;
@@ -1209,7 +1286,7 @@ static int fuse_permission(struct inode *inode, int mask)
static int fuse_readlink_page(struct inode *inode, struct page *page)
{
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_mount *fm = get_fuse_mount(inode);
struct fuse_page_desc desc = { .length = PAGE_SIZE - 1 };
struct fuse_args_pages ap = {
.num_pages = 1,
@@ -1226,7 +1303,7 @@ static int fuse_readlink_page(struct inode *inode, struct page *page)
ap.args.page_zeroing = true;
ap.args.out_numargs = 1;
ap.args.out_args[0].size = desc.length;
- res = fuse_simple_request(fc, &ap.args);
+ res = fuse_simple_request(fm, &ap.args);
fuse_invalidate_atime(inode);
@@ -1454,7 +1531,7 @@ static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
*/
int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
{
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_mount *fm = get_fuse_mount(inode);
FUSE_ARGS(args);
struct fuse_setattr_in inarg;
struct fuse_attr_out outarg;
@@ -1465,7 +1542,7 @@ int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
inarg.valid = FATTR_MTIME;
inarg.mtime = inode->i_mtime.tv_sec;
inarg.mtimensec = inode->i_mtime.tv_nsec;
- if (fc->minor >= 23) {
+ if (fm->fc->minor >= 23) {
inarg.valid |= FATTR_CTIME;
inarg.ctime = inode->i_ctime.tv_sec;
inarg.ctimensec = inode->i_ctime.tv_nsec;
@@ -1474,9 +1551,9 @@ int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
inarg.valid |= FATTR_FH;
inarg.fh = ff->fh;
}
- fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
+ fuse_setattr_fill(fm->fc, &args, inode, &inarg, &outarg);
- return fuse_simple_request(fc, &args);
+ return fuse_simple_request(fm, &args);
}
/*
@@ -1491,7 +1568,8 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
struct file *file)
{
struct inode *inode = d_inode(dentry);
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_mount *fm = get_fuse_mount(inode);
+ struct fuse_conn *fc = fm->fc;
struct fuse_inode *fi = get_fuse_inode(inode);
FUSE_ARGS(args);
struct fuse_setattr_in inarg;
@@ -1501,6 +1579,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
loff_t oldsize;
int err;
bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode);
+ bool fault_blocked = false;
if (!fc->default_permissions)
attr->ia_valid |= ATTR_FORCE;
@@ -1509,6 +1588,22 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
if (err)
return err;
+ if (attr->ia_valid & ATTR_SIZE) {
+ if (WARN_ON(!S_ISREG(inode->i_mode)))
+ return -EIO;
+ is_truncate = true;
+ }
+
+ if (FUSE_IS_DAX(inode) && is_truncate) {
+ down_write(&fi->i_mmap_sem);
+ fault_blocked = true;
+ err = fuse_dax_break_layouts(inode, 0, 0);
+ if (err) {
+ up_write(&fi->i_mmap_sem);
+ return err;
+ }
+ }
+
if (attr->ia_valid & ATTR_OPEN) {
/* This is coming from open(..., ... | O_TRUNC); */
WARN_ON(!(attr->ia_valid & ATTR_SIZE));
@@ -1521,17 +1616,11 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
*/
i_size_write(inode, 0);
truncate_pagecache(inode, 0);
- return 0;
+ goto out;
}
file = NULL;
}
- if (attr->ia_valid & ATTR_SIZE) {
- if (WARN_ON(!S_ISREG(inode->i_mode)))
- return -EIO;
- is_truncate = true;
- }
-
/* Flush dirty data/metadata before non-truncate SETATTR */
if (is_wb && S_ISREG(inode->i_mode) &&
attr->ia_valid &
@@ -1566,7 +1655,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
}
fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
- err = fuse_simple_request(fc, &args);
+ err = fuse_simple_request(fm, &args);
if (err) {
if (err == -EINTR)
fuse_invalidate_attr(inode);
@@ -1614,6 +1703,10 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
}
clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
+out:
+ if (fault_blocked)
+ up_write(&fi->i_mmap_sem);
+
return 0;
error:
@@ -1621,6 +1714,9 @@ error:
fuse_release_nowrite(inode);
clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
+
+ if (fault_blocked)
+ up_write(&fi->i_mmap_sem);
return err;
}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 43c165e796da..c03034e8c152 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -32,7 +32,7 @@ static struct page **fuse_pages_alloc(unsigned int npages, gfp_t flags,
return pages;
}
-static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
+static int fuse_send_open(struct fuse_mount *fm, u64 nodeid, struct file *file,
int opcode, struct fuse_open_out *outargp)
{
struct fuse_open_in inarg;
@@ -40,7 +40,7 @@ static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
memset(&inarg, 0, sizeof(inarg));
inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY);
- if (!fc->atomic_o_trunc)
+ if (!fm->fc->atomic_o_trunc)
inarg.flags &= ~O_TRUNC;
args.opcode = opcode;
args.nodeid = nodeid;
@@ -51,7 +51,7 @@ static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
args.out_args[0].size = sizeof(*outargp);
args.out_args[0].value = outargp;
- return fuse_simple_request(fc, &args);
+ return fuse_simple_request(fm, &args);
}
struct fuse_release_args {
@@ -60,7 +60,7 @@ struct fuse_release_args {
struct inode *inode;
};
-struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
+struct fuse_file *fuse_file_alloc(struct fuse_mount *fm)
{
struct fuse_file *ff;
@@ -68,7 +68,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
if (unlikely(!ff))
return NULL;
- ff->fc = fc;
+ ff->fm = fm;
ff->release_args = kzalloc(sizeof(*ff->release_args),
GFP_KERNEL_ACCOUNT);
if (!ff->release_args) {
@@ -82,7 +82,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
RB_CLEAR_NODE(&ff->polled_node);
init_waitqueue_head(&ff->poll_wait);
- ff->kh = atomic64_inc_return(&fc->khctr);
+ ff->kh = atomic64_inc_return(&fm->fc->khctr);
return ff;
}
@@ -100,7 +100,7 @@ static struct fuse_file *fuse_file_get(struct fuse_file *ff)
return ff;
}
-static void fuse_release_end(struct fuse_conn *fc, struct fuse_args *args,
+static void fuse_release_end(struct fuse_mount *fm, struct fuse_args *args,
int error)
{
struct fuse_release_args *ra = container_of(args, typeof(*ra), args);
@@ -114,29 +114,30 @@ static void fuse_file_put(struct fuse_file *ff, bool sync, bool isdir)
if (refcount_dec_and_test(&ff->count)) {
struct fuse_args *args = &ff->release_args->args;
- if (isdir ? ff->fc->no_opendir : ff->fc->no_open) {
+ if (isdir ? ff->fm->fc->no_opendir : ff->fm->fc->no_open) {
/* Do nothing when client does not implement 'open' */
- fuse_release_end(ff->fc, args, 0);
+ fuse_release_end(ff->fm, args, 0);
} else if (sync) {
- fuse_simple_request(ff->fc, args);
- fuse_release_end(ff->fc, args, 0);
+ fuse_simple_request(ff->fm, args);
+ fuse_release_end(ff->fm, args, 0);
} else {
args->end = fuse_release_end;
- if (fuse_simple_background(ff->fc, args,
+ if (fuse_simple_background(ff->fm, args,
GFP_KERNEL | __GFP_NOFAIL))
- fuse_release_end(ff->fc, args, -ENOTCONN);
+ fuse_release_end(ff->fm, args, -ENOTCONN);
}
kfree(ff);
}
}
-int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
+int fuse_do_open(struct fuse_mount *fm, u64 nodeid, struct file *file,
bool isdir)
{
+ struct fuse_conn *fc = fm->fc;
struct fuse_file *ff;
int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
- ff = fuse_file_alloc(fc);
+ ff = fuse_file_alloc(fm);
if (!ff)
return -ENOMEM;
@@ -147,7 +148,7 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
struct fuse_open_out outarg;
int err;
- err = fuse_send_open(fc, nodeid, file, opcode, &outarg);
+ err = fuse_send_open(fm, nodeid, file, opcode, &outarg);
if (!err) {
ff->fh = outarg.fh;
ff->open_flags = outarg.open_flags;
@@ -216,27 +217,40 @@ void fuse_finish_open(struct inode *inode, struct file *file)
int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
{
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_mount *fm = get_fuse_mount(inode);
+ struct fuse_conn *fc = fm->fc;
int err;
bool is_wb_truncate = (file->f_flags & O_TRUNC) &&
fc->atomic_o_trunc &&
fc->writeback_cache;
+ bool dax_truncate = (file->f_flags & O_TRUNC) &&
+ fc->atomic_o_trunc && FUSE_IS_DAX(inode);
err = generic_file_open(inode, file);
if (err)
return err;
- if (is_wb_truncate) {
+ if (is_wb_truncate || dax_truncate) {
inode_lock(inode);
fuse_set_nowrite(inode);
}
- err = fuse_do_open(fc, get_node_id(inode), file, isdir);
+ if (dax_truncate) {
+ down_write(&get_fuse_inode(inode)->i_mmap_sem);
+ err = fuse_dax_break_layouts(inode, 0, 0);
+ if (err)
+ goto out;
+ }
+ err = fuse_do_open(fm, get_node_id(inode), file, isdir);
if (!err)
fuse_finish_open(inode, file);
- if (is_wb_truncate) {
+out:
+ if (dax_truncate)
+ up_write(&get_fuse_inode(inode)->i_mmap_sem);
+
+ if (is_wb_truncate | dax_truncate) {
fuse_release_nowrite(inode);
inode_unlock(inode);
}
@@ -247,7 +261,7 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff,
int flags, int opcode)
{
- struct fuse_conn *fc = ff->fc;
+ struct fuse_conn *fc = ff->fm->fc;
struct fuse_release_args *ra = ff->release_args;
/* Inode is NULL on error path of fuse_create_open() */
@@ -285,7 +299,7 @@ void fuse_release_common(struct file *file, bool isdir)
if (ff->flock) {
ra->inarg.release_flags |= FUSE_RELEASE_FLOCK_UNLOCK;
- ra->inarg.lock_owner = fuse_lock_owner_id(ff->fc,
+ ra->inarg.lock_owner = fuse_lock_owner_id(ff->fm->fc,
(fl_owner_t) file);
}
/* Hold inode until release is finished */
@@ -300,7 +314,7 @@ void fuse_release_common(struct file *file, bool isdir)
* synchronous RELEASE is allowed (and desirable) in this case
* because the server can be trusted not to screw up.
*/
- fuse_file_put(ff, ff->fc->destroy, isdir);
+ fuse_file_put(ff, ff->fm->fc->destroy, isdir);
}
static int fuse_open(struct inode *inode, struct file *file)
@@ -443,7 +457,7 @@ static void fuse_sync_writes(struct inode *inode)
static int fuse_flush(struct file *file, fl_owner_t id)
{
struct inode *inode = file_inode(file);
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_mount *fm = get_fuse_mount(inode);
struct fuse_file *ff = file->private_data;
struct fuse_flush_in inarg;
FUSE_ARGS(args);
@@ -465,12 +479,12 @@ static int fuse_flush(struct file *file, fl_owner_t id)
return err;
err = 0;
- if (fc->no_flush)
+ if (fm->fc->no_flush)
goto inval_attr_out;
memset(&inarg, 0, sizeof(inarg));
inarg.fh = ff->fh;
- inarg.lock_owner = fuse_lock_owner_id(fc, id);
+ inarg.lock_owner = fuse_lock_owner_id(fm->fc, id);
args.opcode = FUSE_FLUSH;
args.nodeid = get_node_id(inode);
args.in_numargs = 1;
@@ -478,9 +492,9 @@ static int fuse_flush(struct file *file, fl_owner_t id)
args.in_args[0].value = &inarg;
args.force = true;
- err = fuse_simple_request(fc, &args);
+ err = fuse_simple_request(fm, &args);
if (err == -ENOSYS) {
- fc->no_flush = 1;
+ fm->fc->no_flush = 1;
err = 0;
}
@@ -489,7 +503,7 @@ inval_attr_out:
* In memory i_blocks is not maintained by fuse, if writeback cache is
* enabled, i_blocks from cached attr may not be accurate.
*/
- if (!err && fc->writeback_cache)
+ if (!err && fm->fc->writeback_cache)
fuse_invalidate_attr(inode);
return err;
}
@@ -498,7 +512,7 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
int datasync, int opcode)
{
struct inode *inode = file->f_mapping->host;
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_mount *fm = get_fuse_mount(inode);
struct fuse_file *ff = file->private_data;
FUSE_ARGS(args);
struct fuse_fsync_in inarg;
@@ -511,7 +525,7 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
args.in_numargs = 1;
args.in_args[0].size = sizeof(inarg);
args.in_args[0].value = &inarg;
- return fuse_simple_request(fc, &args);
+ return fuse_simple_request(fm, &args);
}
static int fuse_fsync(struct file *file, loff_t start, loff_t end,
@@ -686,7 +700,7 @@ static void fuse_io_free(struct fuse_io_args *ia)
kfree(ia);
}
-static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_args *args,
+static void fuse_aio_complete_req(struct fuse_mount *fm, struct fuse_args *args,
int err)
{
struct fuse_io_args *ia = container_of(args, typeof(*ia), ap.args);
@@ -715,7 +729,7 @@ static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_args *args,
fuse_io_free(ia);
}
-static ssize_t fuse_async_req_send(struct fuse_conn *fc,
+static ssize_t fuse_async_req_send(struct fuse_mount *fm,
struct fuse_io_args *ia, size_t num_bytes)
{
ssize_t err;
@@ -729,9 +743,9 @@ static ssize_t fuse_async_req_send(struct fuse_conn *fc,
ia->ap.args.end = fuse_aio_complete_req;
ia->ap.args.may_block = io->should_dirty;
- err = fuse_simple_background(fc, &ia->ap.args, GFP_KERNEL);
+ err = fuse_simple_background(fm, &ia->ap.args, GFP_KERNEL);
if (err)
- fuse_aio_complete_req(fc, &ia->ap.args, err);
+ fuse_aio_complete_req(fm, &ia->ap.args, err);
return num_bytes;
}
@@ -741,18 +755,18 @@ static ssize_t fuse_send_read(struct fuse_io_args *ia, loff_t pos, size_t count,
{
struct file *file = ia->io->iocb->ki_filp;
struct fuse_file *ff = file->private_data;
- struct fuse_conn *fc = ff->fc;
+ struct fuse_mount *fm = ff->fm;
fuse_read_args_fill(ia, file, pos, count, FUSE_READ);
if (owner != NULL) {
ia->read.in.read_flags |= FUSE_READ_LOCKOWNER;
- ia->read.in.lock_owner = fuse_lock_owner_id(fc, owner);
+ ia->read.in.lock_owner = fuse_lock_owner_id(fm->fc, owner);
}
if (ia->io->async)
- return fuse_async_req_send(fc, ia, count);
+ return fuse_async_req_send(fm, ia, count);
- return fuse_simple_request(fc, &ia->ap.args);
+ return fuse_simple_request(fm, &ia->ap.args);
}
static void fuse_read_update_size(struct inode *inode, loff_t size,
@@ -798,7 +812,7 @@ static void fuse_short_read(struct inode *inode, u64 attr_ver, size_t num_read,
static int fuse_do_readpage(struct file *file, struct page *page)
{
struct inode *inode = page->mapping->host;
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_mount *fm = get_fuse_mount(inode);
loff_t pos = page_offset(page);
struct fuse_page_desc desc = { .length = PAGE_SIZE };
struct fuse_io_args ia = {
@@ -818,14 +832,14 @@ static int fuse_do_readpage(struct file *file, struct page *page)
*/
fuse_wait_on_page_writeback(inode, page->index);
- attr_ver = fuse_get_attr_version(fc);
+ attr_ver = fuse_get_attr_version(fm->fc);
/* Don't overflow end offset */
if (pos + (desc.length - 1) == LLONG_MAX)
desc.length--;
fuse_read_args_fill(&ia, file, pos, desc.length, FUSE_READ);
- res = fuse_simple_request(fc, &ia.ap.args);
+ res = fuse_simple_request(fm, &ia.ap.args);
if (res < 0)
return res;
/*
@@ -855,7 +869,7 @@ static int fuse_readpage(struct file *file, struct page *page)
return err;
}
-static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_args *args,
+static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
int err)
{
int i;
@@ -899,7 +913,7 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_args *args,
static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file)
{
struct fuse_file *ff = file->private_data;
- struct fuse_conn *fc = ff->fc;
+ struct fuse_mount *fm = ff->fm;
struct fuse_args_pages *ap = &ia->ap;
loff_t pos = page_offset(ap->pages[0]);
size_t count = ap->num_pages << PAGE_SHIFT;
@@ -918,18 +932,18 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file)
WARN_ON((loff_t) (pos + count) < 0);
fuse_read_args_fill(ia, file, pos, count, FUSE_READ);
- ia->read.attr_ver = fuse_get_attr_version(fc);
- if (fc->async_read) {
+ ia->read.attr_ver = fuse_get_attr_version(fm->fc);
+ if (fm->fc->async_read) {
ia->ff = fuse_file_get(ff);
ap->args.end = fuse_readpages_end;
- err = fuse_simple_background(fc, &ap->args, GFP_KERNEL);
+ err = fuse_simple_background(fm, &ap->args, GFP_KERNEL);
if (!err)
return;
} else {
- res = fuse_simple_request(fc, &ap->args);
+ res = fuse_simple_request(fm, &ap->args);
err = res < 0 ? res : 0;
}
- fuse_readpages_end(fc, &ap->args, err);
+ fuse_readpages_end(fm, &ap->args, err);
}
static void fuse_readahead(struct readahead_control *rac)
@@ -1000,7 +1014,7 @@ static void fuse_write_args_fill(struct fuse_io_args *ia, struct fuse_file *ff,
args->opcode = FUSE_WRITE;
args->nodeid = ff->nodeid;
args->in_numargs = 2;
- if (ff->fc->minor < 9)
+ if (ff->fm->fc->minor < 9)
args->in_args[0].size = FUSE_COMPAT_WRITE_IN_SIZE;
else
args->in_args[0].size = sizeof(ia->write.in);
@@ -1029,7 +1043,7 @@ static ssize_t fuse_send_write(struct fuse_io_args *ia, loff_t pos,
struct kiocb *iocb = ia->io->iocb;
struct file *file = iocb->ki_filp;
struct fuse_file *ff = file->private_data;
- struct fuse_conn *fc = ff->fc;
+ struct fuse_mount *fm = ff->fm;
struct fuse_write_in *inarg = &ia->write.in;
ssize_t err;
@@ -1037,13 +1051,13 @@ static ssize_t fuse_send_write(struct fuse_io_args *ia, loff_t pos,
inarg->flags = fuse_write_flags(iocb);
if (owner != NULL) {
inarg->write_flags |= FUSE_WRITE_LOCKOWNER;
- inarg->lock_owner = fuse_lock_owner_id(fc, owner);
+ inarg->lock_owner = fuse_lock_owner_id(fm->fc, owner);
}
if (ia->io->async)
- return fuse_async_req_send(fc, ia, count);
+ return fuse_async_req_send(fm, ia, count);
- err = fuse_simple_request(fc, &ia->ap.args);
+ err = fuse_simple_request(fm, &ia->ap.args);
if (!err && ia->write.out.size > count)
err = -EIO;
@@ -1074,7 +1088,7 @@ static ssize_t fuse_send_write_pages(struct fuse_io_args *ia,
struct fuse_args_pages *ap = &ia->ap;
struct file *file = iocb->ki_filp;
struct fuse_file *ff = file->private_data;
- struct fuse_conn *fc = ff->fc;
+ struct fuse_mount *fm = ff->fm;
unsigned int offset, i;
int err;
@@ -1084,7 +1098,7 @@ static ssize_t fuse_send_write_pages(struct fuse_io_args *ia,
fuse_write_args_fill(ia, ff, pos, count);
ia->write.in.flags = fuse_write_flags(iocb);
- err = fuse_simple_request(fc, &ap->args);
+ err = fuse_simple_request(fm, &ap->args);
if (!err && ia->write.out.size > count)
err = -EIO;
@@ -1399,7 +1413,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
struct file *file = io->iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
struct fuse_file *ff = file->private_data;
- struct fuse_conn *fc = ff->fc;
+ struct fuse_conn *fc = ff->fm->fc;
size_t nmax = write ? fc->max_write : fc->max_read;
loff_t pos = *ppos;
size_t count = iov_iter_count(iter);
@@ -1539,10 +1553,14 @@ static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct file *file = iocb->ki_filp;
struct fuse_file *ff = file->private_data;
+ struct inode *inode = file_inode(file);
- if (is_bad_inode(file_inode(file)))
+ if (is_bad_inode(inode))
return -EIO;
+ if (FUSE_IS_DAX(inode))
+ return fuse_dax_read_iter(iocb, to);
+
if (!(ff->open_flags & FOPEN_DIRECT_IO))
return fuse_cache_read_iter(iocb, to);
else
@@ -1553,10 +1571,14 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct fuse_file *ff = file->private_data;
+ struct inode *inode = file_inode(file);
- if (is_bad_inode(file_inode(file)))
+ if (is_bad_inode(inode))
return -EIO;
+ if (FUSE_IS_DAX(inode))
+ return fuse_dax_write_iter(iocb, from);
+
if (!(ff->open_flags & FOPEN_DIRECT_IO))
return fuse_cache_write_iter(iocb, from);
else
@@ -1578,7 +1600,7 @@ static void fuse_writepage_free(struct fuse_writepage_args *wpa)
kfree(wpa);
}
-static void fuse_writepage_finish(struct fuse_conn *fc,
+static void fuse_writepage_finish(struct fuse_mount *fm,
struct fuse_writepage_args *wpa)
{
struct fuse_args_pages *ap = &wpa->ia.ap;
@@ -1596,7 +1618,7 @@ static void fuse_writepage_finish(struct fuse_conn *fc,
}
/* Called under fi->lock, may release and reacquire it */
-static void fuse_send_writepage(struct fuse_conn *fc,
+static void fuse_send_writepage(struct fuse_mount *fm,
struct fuse_writepage_args *wpa, loff_t size)
__releases(fi->lock)
__acquires(fi->lock)
@@ -1622,10 +1644,10 @@ __acquires(fi->lock)
args->force = true;
args->nocreds = true;
- err = fuse_simple_background(fc, args, GFP_ATOMIC);
+ err = fuse_simple_background(fm, args, GFP_ATOMIC);
if (err == -ENOMEM) {
spin_unlock(&fi->lock);
- err = fuse_simple_background(fc, args, GFP_NOFS | __GFP_NOFAIL);
+ err = fuse_simple_background(fm, args, GFP_NOFS | __GFP_NOFAIL);
spin_lock(&fi->lock);
}
@@ -1638,7 +1660,7 @@ __acquires(fi->lock)
out_free:
fi->writectr--;
rb_erase(&wpa->writepages_entry, &fi->writepages);
- fuse_writepage_finish(fc, wpa);
+ fuse_writepage_finish(fm, wpa);
spin_unlock(&fi->lock);
/* After fuse_writepage_finish() aux request list is private */
@@ -1662,7 +1684,7 @@ void fuse_flush_writepages(struct inode *inode)
__releases(fi->lock)
__acquires(fi->lock)
{
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_mount *fm = get_fuse_mount(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
loff_t crop = i_size_read(inode);
struct fuse_writepage_args *wpa;
@@ -1671,7 +1693,7 @@ __acquires(fi->lock)
wpa = list_entry(fi->queued_writes.next,
struct fuse_writepage_args, queue_entry);
list_del_init(&wpa->queue_entry);
- fuse_send_writepage(fc, wpa, crop);
+ fuse_send_writepage(fm, wpa, crop);
}
}
@@ -1712,7 +1734,7 @@ static void tree_insert(struct rb_root *root, struct fuse_writepage_args *wpa)
WARN_ON(fuse_insert_writeback(root, wpa));
}
-static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args,
+static void fuse_writepage_end(struct fuse_mount *fm, struct fuse_args *args,
int error)
{
struct fuse_writepage_args *wpa =
@@ -1724,7 +1746,7 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args,
spin_lock(&fi->lock);
rb_erase(&wpa->writepages_entry, &fi->writepages);
while (wpa->next) {
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_mount *fm = get_fuse_mount(inode);
struct fuse_write_in *inarg = &wpa->ia.write.in;
struct fuse_writepage_args *next = wpa->next;
@@ -1756,10 +1778,10 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args,
* no invocations of fuse_writepage_end() while we're in
* fuse_set_nowrite..fuse_release_nowrite section.
*/
- fuse_send_writepage(fc, next, inarg->offset + inarg->size);
+ fuse_send_writepage(fm, next, inarg->offset + inarg->size);
}
fi->writectr--;
- fuse_writepage_finish(fc, wpa);
+ fuse_writepage_finish(fm, wpa);
spin_unlock(&fi->lock);
fuse_writepage_free(wpa);
}
@@ -2317,6 +2339,10 @@ static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
{
struct fuse_file *ff = file->private_data;
+ /* DAX mmap is superior to direct_io mmap */
+ if (FUSE_IS_DAX(file_inode(file)))
+ return fuse_dax_mmap(file, vma);
+
if (ff->open_flags & FOPEN_DIRECT_IO) {
/* Can't provide the coherency needed for MAP_SHARED */
if (vma->vm_flags & VM_MAYSHARE)
@@ -2395,7 +2421,7 @@ static void fuse_lk_fill(struct fuse_args *args, struct file *file,
static int fuse_getlk(struct file *file, struct file_lock *fl)
{
struct inode *inode = file_inode(file);
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_mount *fm = get_fuse_mount(inode);
FUSE_ARGS(args);
struct fuse_lk_in inarg;
struct fuse_lk_out outarg;
@@ -2405,9 +2431,9 @@ static int fuse_getlk(struct file *file, struct file_lock *fl)
args.out_numargs = 1;
args.out_args[0].size = sizeof(outarg);
args.out_args[0].value = &outarg;
- err = fuse_simple_request(fc, &args);
+ err = fuse_simple_request(fm, &args);
if (!err)
- err = convert_fuse_file_lock(fc, &outarg.lk, fl);
+ err = convert_fuse_file_lock(fm->fc, &outarg.lk, fl);
return err;
}
@@ -2415,12 +2441,12 @@ static int fuse_getlk(struct file *file, struct file_lock *fl)
static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
{
struct inode *inode = file_inode(file);
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_mount *fm = get_fuse_mount(inode);
FUSE_ARGS(args);
struct fuse_lk_in inarg;
int opcode = (fl->fl_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK;
struct pid *pid = fl->fl_type != F_UNLCK ? task_tgid(current) : NULL;
- pid_t pid_nr = pid_nr_ns(pid, fc->pid_ns);
+ pid_t pid_nr = pid_nr_ns(pid, fm->fc->pid_ns);
int err;
if (fl->fl_lmops && fl->fl_lmops->lm_grant) {
@@ -2433,7 +2459,7 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
return 0;
fuse_lk_fill(&args, file, fl, opcode, pid_nr, flock, &inarg);
- err = fuse_simple_request(fc, &args);
+ err = fuse_simple_request(fm, &args);
/* locking is restartable */
if (err == -EINTR)
@@ -2487,13 +2513,13 @@ static int fuse_file_flock(struct file *file, int cmd, struct file_lock *fl)
static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
{
struct inode *inode = mapping->host;
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_mount *fm = get_fuse_mount(inode);
FUSE_ARGS(args);
struct fuse_bmap_in inarg;
struct fuse_bmap_out outarg;
int err;
- if (!inode->i_sb->s_bdev || fc->no_bmap)
+ if (!inode->i_sb->s_bdev || fm->fc->no_bmap)
return 0;
memset(&inarg, 0, sizeof(inarg));
@@ -2507,9 +2533,9 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
args.out_numargs = 1;
args.out_args[0].size = sizeof(outarg);
args.out_args[0].value = &outarg;
- err = fuse_simple_request(fc, &args);
+ err = fuse_simple_request(fm, &args);
if (err == -ENOSYS)
- fc->no_bmap = 1;
+ fm->fc->no_bmap = 1;
return err ? 0 : outarg.block;
}
@@ -2517,7 +2543,7 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
static loff_t fuse_lseek(struct file *file, loff_t offset, int whence)
{
struct inode *inode = file->f_mapping->host;
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_mount *fm = get_fuse_mount(inode);
struct fuse_file *ff = file->private_data;
FUSE_ARGS(args);
struct fuse_lseek_in inarg = {
@@ -2528,7 +2554,7 @@ static loff_t fuse_lseek(struct file *file, loff_t offset, int whence)
struct fuse_lseek_out outarg;
int err;
- if (fc->no_lseek)
+ if (fm->fc->no_lseek)
goto fallback;
args.opcode = FUSE_LSEEK;
@@ -2539,10 +2565,10 @@ static loff_t fuse_lseek(struct file *file, loff_t offset, int whence)
args.out_numargs = 1;
args.out_args[0].size = sizeof(outarg);
args.out_args[0].value = &outarg;
- err = fuse_simple_request(fc, &args);
+ err = fuse_simple_request(fm, &args);
if (err) {
if (err == -ENOSYS) {
- fc->no_lseek = 1;
+ fm->fc->no_lseek = 1;
goto fallback;
}
return err;
@@ -2728,7 +2754,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
unsigned int flags)
{
struct fuse_file *ff = file->private_data;
- struct fuse_conn *fc = ff->fc;
+ struct fuse_mount *fm = ff->fm;
struct fuse_ioctl_in inarg = {
.fh = ff->fh,
.cmd = cmd,
@@ -2761,12 +2787,12 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
err = -ENOMEM;
- ap.pages = fuse_pages_alloc(fc->max_pages, GFP_KERNEL, &ap.descs);
+ ap.pages = fuse_pages_alloc(fm->fc->max_pages, GFP_KERNEL, &ap.descs);
iov_page = (struct iovec *) __get_free_page(GFP_KERNEL);
if (!ap.pages || !iov_page)
goto out;
- fuse_page_descs_length_init(ap.descs, 0, fc->max_pages);
+ fuse_page_descs_length_init(ap.descs, 0, fm->fc->max_pages);
/*
* If restricted, initialize IO parameters as encoded in @cmd.
@@ -2811,7 +2837,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
/* make sure there are enough buffer pages and init request with them */
err = -ENOMEM;
- if (max_pages > fc->max_pages)
+ if (max_pages > fm->fc->max_pages)
goto out;
while (ap.num_pages < max_pages) {
ap.pages[ap.num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
@@ -2848,7 +2874,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
ap.args.out_pages = true;
ap.args.out_argvar = true;
- transferred = fuse_simple_request(fc, &ap.args);
+ transferred = fuse_simple_request(fm, &ap.args);
err = transferred;
if (transferred < 0)
goto out;
@@ -2876,7 +2902,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
goto out;
vaddr = kmap_atomic(ap.pages[0]);
- err = fuse_copy_ioctl_iovec(fc, iov_page, vaddr,
+ err = fuse_copy_ioctl_iovec(fm->fc, iov_page, vaddr,
transferred, in_iovs + out_iovs,
(flags & FUSE_IOCTL_COMPAT) != 0);
kunmap_atomic(vaddr);
@@ -2886,11 +2912,11 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
in_iov = iov_page;
out_iov = in_iov + in_iovs;
- err = fuse_verify_ioctl_iov(fc, in_iov, in_iovs);
+ err = fuse_verify_ioctl_iov(fm->fc, in_iov, in_iovs);
if (err)
goto out;
- err = fuse_verify_ioctl_iov(fc, out_iov, out_iovs);
+ err = fuse_verify_ioctl_iov(fm->fc, out_iov, out_iovs);
if (err)
goto out;
@@ -3000,13 +3026,13 @@ static void fuse_register_polled_file(struct fuse_conn *fc,
__poll_t fuse_file_poll(struct file *file, poll_table *wait)
{
struct fuse_file *ff = file->private_data;
- struct fuse_conn *fc = ff->fc;
+ struct fuse_mount *fm = ff->fm;
struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh };
struct fuse_poll_out outarg;
FUSE_ARGS(args);
int err;
- if (fc->no_poll)
+ if (fm->fc->no_poll)
return DEFAULT_POLLMASK;
poll_wait(file, &ff->poll_wait, wait);
@@ -3018,7 +3044,7 @@ __poll_t fuse_file_poll(struct file *file, poll_table *wait)
*/
if (waitqueue_active(&ff->poll_wait)) {
inarg.flags |= FUSE_POLL_SCHEDULE_NOTIFY;
- fuse_register_polled_file(fc, ff);
+ fuse_register_polled_file(fm->fc, ff);
}
args.opcode = FUSE_POLL;
@@ -3029,12 +3055,12 @@ __poll_t fuse_file_poll(struct file *file, poll_table *wait)
args.out_numargs = 1;
args.out_args[0].size = sizeof(outarg);
args.out_args[0].value = &outarg;
- err = fuse_simple_request(fc, &args);
+ err = fuse_simple_request(fm, &args);
if (!err)
return demangle_poll(outarg.revents);
if (err == -ENOSYS) {
- fc->no_poll = 1;
+ fm->fc->no_poll = 1;
return DEFAULT_POLLMASK;
}
return EPOLLERR;
@@ -3120,13 +3146,13 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
* By default, we want to optimize all I/Os with async request
* submission to the client filesystem if supported.
*/
- io->async = ff->fc->async_dio;
+ io->async = ff->fm->fc->async_dio;
io->iocb = iocb;
io->blocking = is_sync_kiocb(iocb);
/* optimization for short read */
if (io->async && !io->write && offset + count > i_size) {
- iov_iter_truncate(iter, fuse_round_up(ff->fc, i_size - offset));
+ iov_iter_truncate(iter, fuse_round_up(ff->fm->fc, i_size - offset));
shortened = count - iov_iter_count(iter);
count -= shortened;
}
@@ -3196,7 +3222,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
struct fuse_file *ff = file->private_data;
struct inode *inode = file_inode(file);
struct fuse_inode *fi = get_fuse_inode(inode);
- struct fuse_conn *fc = ff->fc;
+ struct fuse_mount *fm = ff->fm;
FUSE_ARGS(args);
struct fuse_fallocate_in inarg = {
.fh = ff->fh,
@@ -3208,14 +3234,23 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) ||
(mode & FALLOC_FL_PUNCH_HOLE);
+ bool block_faults = FUSE_IS_DAX(inode) && lock_inode;
+
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
return -EOPNOTSUPP;
- if (fc->no_fallocate)
+ if (fm->fc->no_fallocate)
return -EOPNOTSUPP;
if (lock_inode) {
inode_lock(inode);
+ if (block_faults) {
+ down_write(&fi->i_mmap_sem);
+ err = fuse_dax_break_layouts(inode, 0, 0);
+ if (err)
+ goto out;
+ }
+
if (mode & FALLOC_FL_PUNCH_HOLE) {
loff_t endbyte = offset + length - 1;
@@ -3240,9 +3275,9 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
args.in_numargs = 1;
args.in_args[0].size = sizeof(inarg);
args.in_args[0].value = &inarg;
- err = fuse_simple_request(fc, &args);
+ err = fuse_simple_request(fm, &args);
if (err == -ENOSYS) {
- fc->no_fallocate = 1;
+ fm->fc->no_fallocate = 1;
err = -EOPNOTSUPP;
}
if (err)
@@ -3252,7 +3287,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
if (!(mode & FALLOC_FL_KEEP_SIZE)) {
bool changed = fuse_write_update_size(inode, offset + length);
- if (changed && fc->writeback_cache)
+ if (changed && fm->fc->writeback_cache)
file_update_time(file);
}
@@ -3265,6 +3300,9 @@ out:
if (!(mode & FALLOC_FL_KEEP_SIZE))
clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
+ if (block_faults)
+ up_write(&fi->i_mmap_sem);
+
if (lock_inode)
inode_unlock(inode);
@@ -3280,7 +3318,8 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
struct inode *inode_in = file_inode(file_in);
struct inode *inode_out = file_inode(file_out);
struct fuse_inode *fi_out = get_fuse_inode(inode_out);
- struct fuse_conn *fc = ff_in->fc;
+ struct fuse_mount *fm = ff_in->fm;
+ struct fuse_conn *fc = fm->fc;
FUSE_ARGS(args);
struct fuse_copy_file_range_in inarg = {
.fh_in = ff_in->fh,
@@ -3349,7 +3388,7 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
args.out_numargs = 1;
args.out_args[0].size = sizeof(outarg);
args.out_args[0].value = &outarg;
- err = fuse_simple_request(fc, &args);
+ err = fuse_simple_request(fm, &args);
if (err == -ENOSYS) {
fc->no_copy_file_range = 1;
err = -EOPNOTSUPP;
@@ -3404,6 +3443,7 @@ static const struct file_operations fuse_file_operations = {
.release = fuse_release,
.fsync = fuse_fsync,
.lock = fuse_file_lock,
+ .get_unmapped_area = thp_get_unmapped_area,
.flock = fuse_file_flock,
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
@@ -3439,4 +3479,7 @@ void fuse_init_file_inode(struct inode *inode)
fi->writectr = 0;
init_waitqueue_head(&fi->page_waitq);
fi->writepages = RB_ROOT;
+
+ if (IS_ENABLED(CONFIG_FUSE_DAX))
+ fuse_dax_inode_init(inode);
}
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 740a8a7d7ae6..d51598017d13 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -148,6 +148,20 @@ struct fuse_inode {
/** Lock to protect write related fields */
spinlock_t lock;
+
+ /**
+ * Can't take inode lock in fault path (leads to circular dependency).
+ * Introduce another semaphore which can be taken in fault path and
+ * then other filesystem paths can take this to block faults.
+ */
+ struct rw_semaphore i_mmap_sem;
+
+#ifdef CONFIG_FUSE_DAX
+ /*
+ * Dax specific inode data
+ */
+ struct fuse_inode_dax *dax;
+#endif
};
/** FUSE inode state bits */
@@ -161,12 +175,13 @@ enum {
};
struct fuse_conn;
+struct fuse_mount;
struct fuse_release_args;
/** FUSE specific file data */
struct fuse_file {
/** Fuse connection for this file */
- struct fuse_conn *fc;
+ struct fuse_mount *fm;
/* Argument space reserved for release */
struct fuse_release_args *release_args;
@@ -252,7 +267,7 @@ struct fuse_args {
bool may_block:1;
struct fuse_in_arg in_args[3];
struct fuse_arg out_args[2];
- void (*end)(struct fuse_conn *fc, struct fuse_args *args, int error);
+ void (*end)(struct fuse_mount *fm, struct fuse_args *args, int error);
};
struct fuse_args_pages {
@@ -360,6 +375,9 @@ struct fuse_req {
/** virtio-fs's physically contiguous buffer for in and out args */
void *argbuf;
#endif
+
+ /** fuse_mount this request belongs to */
+ struct fuse_mount *fm;
};
struct fuse_iqueue;
@@ -482,11 +500,15 @@ struct fuse_fs_context {
bool destroy:1;
bool no_control:1;
bool no_force_umount:1;
- bool no_mount_options:1;
+ bool legacy_opts_show:1;
+ bool dax:1;
unsigned int max_read;
unsigned int blksize;
const char *subtype;
+ /* DAX device, may be NULL */
+ struct dax_device *dax_dev;
+
/* fuse_dev pointer to fill in, should contain NULL on entry */
void **fudptr;
};
@@ -494,9 +516,9 @@ struct fuse_fs_context {
/**
* A Fuse connection.
*
- * This structure is created, when the filesystem is mounted, and is
- * destroyed, when the client device is closed and the filesystem is
- * unmounted.
+ * This structure is created, when the root filesystem is mounted, and
+ * is destroyed, when the client device is closed and the last
+ * fuse_mount is destroyed.
*/
struct fuse_conn {
/** Lock protecting accessess to members of this structure */
@@ -610,6 +632,9 @@ struct fuse_conn {
/** cache READLINK responses in page cache */
unsigned cache_symlinks:1;
+ /* show legacy mount options */
+ unsigned int legacy_opts_show:1;
+
/*
* The following bitfields are only for optimization purposes
* and hence races in setting them will not cause malfunction
@@ -717,8 +742,8 @@ struct fuse_conn {
/** Do not allow MNT_FORCE umount */
unsigned int no_force_umount:1;
- /* Do not show mount options */
- unsigned int no_mount_options:1;
+ /* Auto-mount submounts announced by the server */
+ unsigned int auto_submounts:1;
/** The number of requests waiting for completion */
atomic_t num_waiting;
@@ -726,10 +751,10 @@ struct fuse_conn {
/** Negotiated minor version */
unsigned minor;
- /** Entry on the fuse_conn_list */
+ /** Entry on the fuse_mount_list */
struct list_head entry;
- /** Device ID from super block */
+ /** Device ID from the root super block */
dev_t dev;
/** Dentries in the control filesystem */
@@ -747,24 +772,70 @@ struct fuse_conn {
/** Called on final put */
void (*release)(struct fuse_conn *);
- /** Super block for this connection. */
- struct super_block *sb;
-
- /** Read/write semaphore to hold when accessing sb. */
+ /**
+ * Read/write semaphore to hold when accessing the sb of any
+ * fuse_mount belonging to this connection
+ */
struct rw_semaphore killsb;
/** List of device instances belonging to this connection */
struct list_head devices;
+
+#ifdef CONFIG_FUSE_DAX
+ /* Dax specific conn data, non-NULL if DAX is enabled */
+ struct fuse_conn_dax *dax;
+#endif
+
+ /** List of filesystems using this connection */
+ struct list_head mounts;
};
-static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
+/*
+ * Represents a mounted filesystem, potentially a submount.
+ *
+ * This object allows sharing a fuse_conn between separate mounts to
+ * allow submounts with dedicated superblocks and thus separate device
+ * IDs.
+ */
+struct fuse_mount {
+ /* Underlying (potentially shared) connection to the FUSE server */
+ struct fuse_conn *fc;
+
+ /* Refcount */
+ refcount_t count;
+
+ /*
+ * Super block for this connection (fc->killsb must be held when
+ * accessing this).
+ */
+ struct super_block *sb;
+
+ /* Entry on fc->mounts */
+ struct list_head fc_entry;
+};
+
+static inline struct fuse_mount *get_fuse_mount_super(struct super_block *sb)
{
return sb->s_fs_info;
}
+static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
+{
+ struct fuse_mount *fm = get_fuse_mount_super(sb);
+
+ return fm ? fm->fc : NULL;
+}
+
+static inline struct fuse_mount *get_fuse_mount(struct inode *inode)
+{
+ return get_fuse_mount_super(inode->i_sb);
+}
+
static inline struct fuse_conn *get_fuse_conn(struct inode *inode)
{
- return get_fuse_conn_super(inode->i_sb);
+ struct fuse_mount *fm = get_fuse_mount(inode);
+
+ return fm ? fm->fc : NULL;
}
static inline struct fuse_inode *get_fuse_inode(struct inode *inode)
@@ -794,11 +865,6 @@ extern const struct dentry_operations fuse_dentry_operations;
extern const struct dentry_operations fuse_root_dentry_operations;
/**
- * Inode to nodeid comparison.
- */
-int fuse_inode_eq(struct inode *inode, void *_nodeidp);
-
-/**
* Get a filled in inode
*/
struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
@@ -848,7 +914,7 @@ void fuse_read_args_fill(struct fuse_io_args *ia, struct file *file, loff_t pos,
*/
int fuse_open_common(struct inode *inode, struct file *file, bool isdir);
-struct fuse_file *fuse_file_alloc(struct fuse_conn *fc);
+struct fuse_file *fuse_file_alloc(struct fuse_mount *fm);
void fuse_file_free(struct fuse_file *ff);
void fuse_finish_open(struct inode *inode, struct file *file);
@@ -916,14 +982,14 @@ void __exit fuse_ctl_cleanup(void);
/**
* Simple request sending that does request allocation and freeing
*/
-ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args);
-int fuse_simple_background(struct fuse_conn *fc, struct fuse_args *args,
+ssize_t fuse_simple_request(struct fuse_mount *fm, struct fuse_args *args);
+int fuse_simple_background(struct fuse_mount *fm, struct fuse_args *args,
gfp_t gfp_flags);
/**
* End a finished request
*/
-void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req);
+void fuse_request_end(struct fuse_req *req);
/* Abort all requests */
void fuse_abort_conn(struct fuse_conn *fc);
@@ -949,7 +1015,8 @@ struct fuse_conn *fuse_conn_get(struct fuse_conn *fc);
/**
* Initialize fuse_conn
*/
-void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns,
+void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
+ struct user_namespace *user_ns,
const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv);
/**
@@ -957,11 +1024,21 @@ void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns,
*/
void fuse_conn_put(struct fuse_conn *fc);
+/**
+ * Acquire reference to fuse_mount
+ */
+struct fuse_mount *fuse_mount_get(struct fuse_mount *fm);
+
+/**
+ * Release reference to fuse_mount
+ */
+void fuse_mount_put(struct fuse_mount *fm);
+
struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc);
struct fuse_dev *fuse_dev_alloc(void);
void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc);
void fuse_dev_free(struct fuse_dev *fud);
-void fuse_send_init(struct fuse_conn *fc);
+void fuse_send_init(struct fuse_mount *fm);
/**
* Fill in superblock and initialize fuse connection
@@ -970,12 +1047,26 @@ void fuse_send_init(struct fuse_conn *fc);
*/
int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx);
-/**
- * Disassociate fuse connection from superblock and kill the superblock
+/*
+ * Fill in superblock for submounts
+ * @sb: partially-initialized superblock to fill in
+ * @parent_fi: The fuse_inode of the parent filesystem where this submount is
+ * mounted
+ */
+int fuse_fill_super_submount(struct super_block *sb,
+ struct fuse_inode *parent_fi);
+
+/*
+ * Remove the mount from the connection
*
- * Calls kill_anon_super(), do not use with bdev mounts.
+ * Returns whether this was the last mount
*/
-void fuse_kill_sb_anon(struct super_block *sb);
+bool fuse_mount_remove(struct fuse_mount *fm);
+
+/*
+ * Shut down the connection (possibly sending DESTROY request).
+ */
+void fuse_conn_destroy(struct fuse_mount *fm);
/**
* Add connection to control filesystem
@@ -1011,9 +1102,19 @@ void fuse_set_nowrite(struct inode *inode);
void fuse_release_nowrite(struct inode *inode);
/**
+ * Scan all fuse_mounts belonging to fc to find the first where
+ * ilookup5() returns a result. Return that result and the
+ * respective fuse_mount in *fm (unless fm is NULL).
+ *
+ * The caller must hold fc->killsb.
+ */
+struct inode *fuse_ilookup(struct fuse_conn *fc, u64 nodeid,
+ struct fuse_mount **fm);
+
+/**
* File-system tells the kernel to invalidate cache for the given node id.
*/
-int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
+int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid,
loff_t offset, loff_t len);
/**
@@ -1026,10 +1127,10 @@ int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
* - is a file or oan empty directory
* then the dentry is unhashed (d_delete()).
*/
-int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
+int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
u64 child_nodeid, struct qstr *name);
-int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
+int fuse_do_open(struct fuse_mount *fm, u64 nodeid, struct file *file,
bool isdir);
/**
@@ -1093,4 +1194,20 @@ unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args);
u64 fuse_get_unique(struct fuse_iqueue *fiq);
void fuse_free_conn(struct fuse_conn *fc);
+/* dax.c */
+
+#define FUSE_IS_DAX(inode) (IS_ENABLED(CONFIG_FUSE_DAX) && IS_DAX(inode))
+
+ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to);
+ssize_t fuse_dax_write_iter(struct kiocb *iocb, struct iov_iter *from);
+int fuse_dax_mmap(struct file *file, struct vm_area_struct *vma);
+int fuse_dax_break_layouts(struct inode *inode, u64 dmap_start, u64 dmap_end);
+int fuse_dax_conn_alloc(struct fuse_conn *fc, struct dax_device *dax_dev);
+void fuse_dax_conn_free(struct fuse_conn *fc);
+bool fuse_dax_inode_alloc(struct super_block *sb, struct fuse_inode *fi);
+void fuse_dax_inode_init(struct inode *inode);
+void fuse_dax_inode_cleanup(struct inode *inode);
+bool fuse_dax_check_alignment(struct fuse_conn *fc, unsigned int map_alignment);
+void fuse_dax_cancel_work(struct fuse_conn *fc);
+
#endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 581329203d68..1a47afc95f80 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -85,14 +85,22 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
fi->orig_ino = 0;
fi->state = 0;
mutex_init(&fi->mutex);
+ init_rwsem(&fi->i_mmap_sem);
spin_lock_init(&fi->lock);
fi->forget = fuse_alloc_forget();
- if (!fi->forget) {
- kmem_cache_free(fuse_inode_cachep, fi);
- return NULL;
- }
+ if (!fi->forget)
+ goto out_free;
+
+ if (IS_ENABLED(CONFIG_FUSE_DAX) && !fuse_dax_inode_alloc(sb, fi))
+ goto out_free_forget;
return &fi->inode;
+
+out_free_forget:
+ kfree(fi->forget);
+out_free:
+ kmem_cache_free(fuse_inode_cachep, fi);
+ return NULL;
}
static void fuse_free_inode(struct inode *inode)
@@ -101,6 +109,9 @@ static void fuse_free_inode(struct inode *inode)
mutex_destroy(&fi->mutex);
kfree(fi->forget);
+#ifdef CONFIG_FUSE_DAX
+ kfree(fi->dax);
+#endif
kmem_cache_free(fuse_inode_cachep, fi);
}
@@ -112,8 +123,14 @@ static void fuse_evict_inode(struct inode *inode)
clear_inode(inode);
if (inode->i_sb->s_flags & SB_ACTIVE) {
struct fuse_conn *fc = get_fuse_conn(inode);
- fuse_queue_forget(fc, fi->forget, fi->nodeid, fi->nlookup);
- fi->forget = NULL;
+
+ if (FUSE_IS_DAX(inode))
+ fuse_dax_inode_cleanup(inode);
+ if (fi->nlookup) {
+ fuse_queue_forget(fc, fi->forget, fi->nodeid,
+ fi->nlookup);
+ fi->forget = NULL;
+ }
}
if (S_ISREG(inode->i_mode) && !is_bad_inode(inode)) {
WARN_ON(!list_empty(&fi->write_files));
@@ -268,7 +285,7 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
BUG();
}
-int fuse_inode_eq(struct inode *inode, void *_nodeidp)
+static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
{
u64 nodeid = *(u64 *) _nodeidp;
if (get_node_id(inode) == nodeid)
@@ -292,7 +309,26 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
struct fuse_inode *fi;
struct fuse_conn *fc = get_fuse_conn_super(sb);
- retry:
+ /*
+ * Auto mount points get their node id from the submount root, which is
+ * not a unique identifier within this filesystem.
+ *
+ * To avoid conflicts, do not place submount points into the inode hash
+ * table.
+ */
+ if (fc->auto_submounts && (attr->flags & FUSE_ATTR_SUBMOUNT) &&
+ S_ISDIR(attr->mode)) {
+ inode = new_inode(sb);
+ if (!inode)
+ return NULL;
+
+ fuse_init_inode(inode, attr);
+ get_fuse_inode(inode)->nodeid = nodeid;
+ inode->i_flags |= S_AUTOMOUNT;
+ goto done;
+ }
+
+retry:
inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid);
if (!inode)
return NULL;
@@ -310,7 +346,7 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
iput(inode);
goto retry;
}
-
+done:
fi = get_fuse_inode(inode);
spin_lock(&fi->lock);
fi->nlookup++;
@@ -320,16 +356,37 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
return inode;
}
-int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
+struct inode *fuse_ilookup(struct fuse_conn *fc, u64 nodeid,
+ struct fuse_mount **fm)
+{
+ struct fuse_mount *fm_iter;
+ struct inode *inode;
+
+ WARN_ON(!rwsem_is_locked(&fc->killsb));
+ list_for_each_entry(fm_iter, &fc->mounts, fc_entry) {
+ if (!fm_iter->sb)
+ continue;
+
+ inode = ilookup5(fm_iter->sb, nodeid, fuse_inode_eq, &nodeid);
+ if (inode) {
+ if (fm)
+ *fm = fm_iter;
+ return inode;
+ }
+ }
+
+ return NULL;
+}
+
+int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid,
loff_t offset, loff_t len)
{
- struct fuse_conn *fc = get_fuse_conn_super(sb);
struct fuse_inode *fi;
struct inode *inode;
pgoff_t pg_start;
pgoff_t pg_end;
- inode = ilookup5(sb, nodeid, fuse_inode_eq, &nodeid);
+ inode = fuse_ilookup(fc, nodeid, NULL);
if (!inode)
return -ENOENT;
@@ -379,28 +436,23 @@ static void fuse_umount_begin(struct super_block *sb)
fuse_abort_conn(fc);
}
-static void fuse_send_destroy(struct fuse_conn *fc)
+static void fuse_send_destroy(struct fuse_mount *fm)
{
- if (fc->conn_init) {
+ if (fm->fc->conn_init) {
FUSE_ARGS(args);
args.opcode = FUSE_DESTROY;
args.force = true;
args.nocreds = true;
- fuse_simple_request(fc, &args);
+ fuse_simple_request(fm, &args);
}
}
static void fuse_put_super(struct super_block *sb)
{
- struct fuse_conn *fc = get_fuse_conn_super(sb);
+ struct fuse_mount *fm = get_fuse_mount_super(sb);
- mutex_lock(&fuse_mutex);
- list_del(&fc->entry);
- fuse_ctl_remove_conn(fc);
- mutex_unlock(&fuse_mutex);
-
- fuse_conn_put(fc);
+ fuse_mount_put(fm);
}
static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr)
@@ -420,12 +472,12 @@ static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr
static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_sb;
- struct fuse_conn *fc = get_fuse_conn_super(sb);
+ struct fuse_mount *fm = get_fuse_mount_super(sb);
FUSE_ARGS(args);
struct fuse_statfs_out outarg;
int err;
- if (!fuse_allow_current_process(fc)) {
+ if (!fuse_allow_current_process(fm->fc)) {
buf->f_type = FUSE_SUPER_MAGIC;
return 0;
}
@@ -437,7 +489,7 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
args.out_numargs = 1;
args.out_args[0].size = sizeof(outarg);
args.out_args[0].value = &outarg;
- err = fuse_simple_request(fc, &args);
+ err = fuse_simple_request(fm, &args);
if (!err)
convert_fuse_statfs(buf, &outarg.st);
return err;
@@ -573,19 +625,25 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root)
struct super_block *sb = root->d_sb;
struct fuse_conn *fc = get_fuse_conn_super(sb);
- if (fc->no_mount_options)
- return 0;
+ if (fc->legacy_opts_show) {
+ seq_printf(m, ",user_id=%u",
+ from_kuid_munged(fc->user_ns, fc->user_id));
+ seq_printf(m, ",group_id=%u",
+ from_kgid_munged(fc->user_ns, fc->group_id));
+ if (fc->default_permissions)
+ seq_puts(m, ",default_permissions");
+ if (fc->allow_other)
+ seq_puts(m, ",allow_other");
+ if (fc->max_read != ~0)
+ seq_printf(m, ",max_read=%u", fc->max_read);
+ if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE)
+ seq_printf(m, ",blksize=%lu", sb->s_blocksize);
+ }
+#ifdef CONFIG_FUSE_DAX
+ if (fc->dax)
+ seq_puts(m, ",dax");
+#endif
- seq_printf(m, ",user_id=%u", from_kuid_munged(fc->user_ns, fc->user_id));
- seq_printf(m, ",group_id=%u", from_kgid_munged(fc->user_ns, fc->group_id));
- if (fc->default_permissions)
- seq_puts(m, ",default_permissions");
- if (fc->allow_other)
- seq_puts(m, ",allow_other");
- if (fc->max_read != ~0)
- seq_printf(m, ",max_read=%u", fc->max_read);
- if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE)
- seq_printf(m, ",blksize=%lu", sb->s_blocksize);
return 0;
}
@@ -615,7 +673,8 @@ static void fuse_pqueue_init(struct fuse_pqueue *fpq)
fpq->connected = 1;
}
-void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns,
+void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
+ struct user_namespace *user_ns,
const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv)
{
memset(fc, 0, sizeof(*fc));
@@ -642,6 +701,11 @@ void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns,
fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
fc->user_ns = get_user_ns(user_ns);
fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
+
+ INIT_LIST_HEAD(&fc->mounts);
+ list_add(&fm->fc_entry, &fc->mounts);
+ fm->fc = fc;
+ refcount_set(&fm->count, 1);
}
EXPORT_SYMBOL_GPL(fuse_conn_init);
@@ -650,6 +714,8 @@ void fuse_conn_put(struct fuse_conn *fc)
if (refcount_dec_and_test(&fc->count)) {
struct fuse_iqueue *fiq = &fc->iq;
+ if (IS_ENABLED(CONFIG_FUSE_DAX))
+ fuse_dax_conn_free(fc);
if (fiq->ops->release)
fiq->ops->release(fiq);
put_pid_ns(fc->pid_ns);
@@ -666,6 +732,23 @@ struct fuse_conn *fuse_conn_get(struct fuse_conn *fc)
}
EXPORT_SYMBOL_GPL(fuse_conn_get);
+void fuse_mount_put(struct fuse_mount *fm)
+{
+ if (refcount_dec_and_test(&fm->count)) {
+ if (fm->fc)
+ fuse_conn_put(fm->fc);
+ kfree(fm);
+ }
+}
+EXPORT_SYMBOL_GPL(fuse_mount_put);
+
+struct fuse_mount *fuse_mount_get(struct fuse_mount *fm)
+{
+ refcount_inc(&fm->count);
+ return fm;
+}
+EXPORT_SYMBOL_GPL(fuse_mount_get);
+
static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode)
{
struct fuse_attr attr;
@@ -895,14 +978,16 @@ struct fuse_init_args {
struct fuse_init_out out;
};
-static void process_init_reply(struct fuse_conn *fc, struct fuse_args *args,
+static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
int error)
{
+ struct fuse_conn *fc = fm->fc;
struct fuse_init_args *ia = container_of(args, typeof(*ia), args);
struct fuse_init_out *arg = &ia->out;
+ bool ok = true;
if (error || arg->major != FUSE_KERNEL_VERSION)
- fc->conn_error = 1;
+ ok = false;
else {
unsigned long ra_pages;
@@ -950,11 +1035,11 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_args *args,
if (arg->flags & FUSE_HANDLE_KILLPRIV)
fc->handle_killpriv = 1;
if (arg->time_gran && arg->time_gran <= 1000000000)
- fc->sb->s_time_gran = arg->time_gran;
+ fm->sb->s_time_gran = arg->time_gran;
if ((arg->flags & FUSE_POSIX_ACL)) {
fc->default_permissions = 1;
fc->posix_acl = 1;
- fc->sb->s_xattr = fuse_acl_xattr_handlers;
+ fm->sb->s_xattr = fuse_acl_xattr_handlers;
}
if (arg->flags & FUSE_CACHE_SYMLINKS)
fc->cache_symlinks = 1;
@@ -965,14 +1050,19 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_args *args,
min_t(unsigned int, FUSE_MAX_MAX_PAGES,
max_t(unsigned int, arg->max_pages, 1));
}
+ if (IS_ENABLED(CONFIG_FUSE_DAX) &&
+ arg->flags & FUSE_MAP_ALIGNMENT &&
+ !fuse_dax_check_alignment(fc, arg->map_alignment)) {
+ ok = false;
+ }
} else {
ra_pages = fc->max_read / PAGE_SIZE;
fc->no_lock = 1;
fc->no_flock = 1;
}
- fc->sb->s_bdi->ra_pages =
- min(fc->sb->s_bdi->ra_pages, ra_pages);
+ fm->sb->s_bdi->ra_pages =
+ min(fm->sb->s_bdi->ra_pages, ra_pages);
fc->minor = arg->minor;
fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
fc->max_write = max_t(unsigned, 4096, fc->max_write);
@@ -980,11 +1070,16 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_args *args,
}
kfree(ia);
+ if (!ok) {
+ fc->conn_init = 0;
+ fc->conn_error = 1;
+ }
+
fuse_set_initialized(fc);
wake_up_all(&fc->blocked_waitq);
}
-void fuse_send_init(struct fuse_conn *fc)
+void fuse_send_init(struct fuse_mount *fm)
{
struct fuse_init_args *ia;
@@ -992,7 +1087,7 @@ void fuse_send_init(struct fuse_conn *fc)
ia->in.major = FUSE_KERNEL_VERSION;
ia->in.minor = FUSE_KERNEL_MINOR_VERSION;
- ia->in.max_readahead = fc->sb->s_bdi->ra_pages * PAGE_SIZE;
+ ia->in.max_readahead = fm->sb->s_bdi->ra_pages * PAGE_SIZE;
ia->in.flags |=
FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
@@ -1003,6 +1098,13 @@ void fuse_send_init(struct fuse_conn *fc)
FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL |
FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS |
FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA;
+#ifdef CONFIG_FUSE_DAX
+ if (fm->fc->dax)
+ ia->in.flags |= FUSE_MAP_ALIGNMENT;
+#endif
+ if (fm->fc->auto_submounts)
+ ia->in.flags |= FUSE_SUBMOUNTS;
+
ia->args.opcode = FUSE_INIT;
ia->args.in_numargs = 1;
ia->args.in_args[0].size = sizeof(ia->in);
@@ -1018,8 +1120,8 @@ void fuse_send_init(struct fuse_conn *fc)
ia->args.nocreds = true;
ia->args.end = process_init_reply;
- if (fuse_simple_background(fc, &ia->args, GFP_KERNEL) != 0)
- process_init_reply(fc, &ia->args, -ENOTCONN);
+ if (fuse_simple_background(fm, &ia->args, GFP_KERNEL) != 0)
+ process_init_reply(fm, &ia->args, -ENOTCONN);
}
EXPORT_SYMBOL_GPL(fuse_send_init);
@@ -1130,10 +1232,92 @@ void fuse_dev_free(struct fuse_dev *fud)
}
EXPORT_SYMBOL_GPL(fuse_dev_free);
+static void fuse_fill_attr_from_inode(struct fuse_attr *attr,
+ const struct fuse_inode *fi)
+{
+ *attr = (struct fuse_attr){
+ .ino = fi->inode.i_ino,
+ .size = fi->inode.i_size,
+ .blocks = fi->inode.i_blocks,
+ .atime = fi->inode.i_atime.tv_sec,
+ .mtime = fi->inode.i_mtime.tv_sec,
+ .ctime = fi->inode.i_ctime.tv_sec,
+ .atimensec = fi->inode.i_atime.tv_nsec,
+ .mtimensec = fi->inode.i_mtime.tv_nsec,
+ .ctimensec = fi->inode.i_ctime.tv_nsec,
+ .mode = fi->inode.i_mode,
+ .nlink = fi->inode.i_nlink,
+ .uid = fi->inode.i_uid.val,
+ .gid = fi->inode.i_gid.val,
+ .rdev = fi->inode.i_rdev,
+ .blksize = 1u << fi->inode.i_blkbits,
+ };
+}
+
+static void fuse_sb_defaults(struct super_block *sb)
+{
+ sb->s_magic = FUSE_SUPER_MAGIC;
+ sb->s_op = &fuse_super_operations;
+ sb->s_xattr = fuse_xattr_handlers;
+ sb->s_maxbytes = MAX_LFS_FILESIZE;
+ sb->s_time_gran = 1;
+ sb->s_export_op = &fuse_export_operations;
+ sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE;
+ if (sb->s_user_ns != &init_user_ns)
+ sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER;
+ sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);
+
+ /*
+ * If we are not in the initial user namespace posix
+ * acls must be translated.
+ */
+ if (sb->s_user_ns != &init_user_ns)
+ sb->s_xattr = fuse_no_acl_xattr_handlers;
+}
+
+int fuse_fill_super_submount(struct super_block *sb,
+ struct fuse_inode *parent_fi)
+{
+ struct fuse_mount *fm = get_fuse_mount_super(sb);
+ struct super_block *parent_sb = parent_fi->inode.i_sb;
+ struct fuse_attr root_attr;
+ struct inode *root;
+
+ fuse_sb_defaults(sb);
+ fm->sb = sb;
+
+ WARN_ON(sb->s_bdi != &noop_backing_dev_info);
+ sb->s_bdi = bdi_get(parent_sb->s_bdi);
+
+ sb->s_xattr = parent_sb->s_xattr;
+ sb->s_time_gran = parent_sb->s_time_gran;
+ sb->s_blocksize = parent_sb->s_blocksize;
+ sb->s_blocksize_bits = parent_sb->s_blocksize_bits;
+ sb->s_subtype = kstrdup(parent_sb->s_subtype, GFP_KERNEL);
+ if (parent_sb->s_subtype && !sb->s_subtype)
+ return -ENOMEM;
+
+ fuse_fill_attr_from_inode(&root_attr, parent_fi);
+ root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0);
+ /*
+ * This inode is just a duplicate, so it is not looked up and
+ * its nlookup should not be incremented. fuse_iget() does
+ * that, though, so undo it here.
+ */
+ get_fuse_inode(root)->nlookup--;
+ sb->s_d_op = &fuse_dentry_operations;
+ sb->s_root = d_make_root(root);
+ if (!sb->s_root)
+ return -ENOMEM;
+
+ return 0;
+}
+
int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
{
struct fuse_dev *fud = NULL;
- struct fuse_conn *fc = get_fuse_conn_super(sb);
+ struct fuse_mount *fm = get_fuse_mount_super(sb);
+ struct fuse_conn *fc = fm->fc;
struct inode *root;
struct dentry *root_dentry;
int err;
@@ -1142,7 +1326,7 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
if (sb->s_flags & SB_MANDLOCK)
goto err;
- sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);
+ fuse_sb_defaults(sb);
if (ctx->is_bdev) {
#ifdef CONFIG_BLOCK
@@ -1157,32 +1341,21 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
sb->s_subtype = ctx->subtype;
ctx->subtype = NULL;
- sb->s_magic = FUSE_SUPER_MAGIC;
- sb->s_op = &fuse_super_operations;
- sb->s_xattr = fuse_xattr_handlers;
- sb->s_maxbytes = MAX_LFS_FILESIZE;
- sb->s_time_gran = 1;
- sb->s_export_op = &fuse_export_operations;
- sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE;
- if (sb->s_user_ns != &init_user_ns)
- sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER;
-
- /*
- * If we are not in the initial user namespace posix
- * acls must be translated.
- */
- if (sb->s_user_ns != &init_user_ns)
- sb->s_xattr = fuse_no_acl_xattr_handlers;
+ if (IS_ENABLED(CONFIG_FUSE_DAX)) {
+ err = fuse_dax_conn_alloc(fc, ctx->dax_dev);
+ if (err)
+ goto err;
+ }
if (ctx->fudptr) {
err = -ENOMEM;
fud = fuse_dev_alloc_install(fc);
if (!fud)
- goto err;
+ goto err_free_dax;
}
fc->dev = sb->s_dev;
- fc->sb = sb;
+ fm->sb = sb;
err = fuse_bdi_init(fc, sb);
if (err)
goto err_dev_free;
@@ -1196,11 +1369,11 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
fc->allow_other = ctx->allow_other;
fc->user_id = ctx->user_id;
fc->group_id = ctx->group_id;
- fc->max_read = max_t(unsigned, 4096, ctx->max_read);
+ fc->legacy_opts_show = ctx->legacy_opts_show;
+ fc->max_read = max_t(unsigned int, 4096, ctx->max_read);
fc->destroy = ctx->destroy;
fc->no_control = ctx->no_control;
fc->no_force_umount = ctx->no_force_umount;
- fc->no_mount_options = ctx->no_mount_options;
err = -ENOMEM;
root = fuse_get_root_inode(sb, ctx->rootmode);
@@ -1233,6 +1406,9 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
err_dev_free:
if (fud)
fuse_dev_free(fud);
+ err_free_dax:
+ if (IS_ENABLED(CONFIG_FUSE_DAX))
+ fuse_dax_conn_free(fc);
err:
return err;
}
@@ -1244,6 +1420,7 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
struct file *file;
int err;
struct fuse_conn *fc;
+ struct fuse_mount *fm;
err = -EINVAL;
file = fget(ctx->fd);
@@ -1264,9 +1441,16 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
if (!fc)
goto err_fput;
- fuse_conn_init(fc, sb->s_user_ns, &fuse_dev_fiq_ops, NULL);
+ fm = kzalloc(sizeof(*fm), GFP_KERNEL);
+ if (!fm) {
+ kfree(fc);
+ goto err_fput;
+ }
+
+ fuse_conn_init(fc, fm, sb->s_user_ns, &fuse_dev_fiq_ops, NULL);
fc->release = fuse_free_conn;
- sb->s_fs_info = fc;
+
+ sb->s_fs_info = fm;
err = fuse_fill_super_common(sb, ctx);
if (err)
@@ -1277,11 +1461,11 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
* CPUs after this
*/
fput(file);
- fuse_send_init(get_fuse_conn_super(sb));
+ fuse_send_init(get_fuse_mount_super(sb));
return 0;
err_put_conn:
- fuse_conn_put(fc);
+ fuse_mount_put(fm);
sb->s_fs_info = NULL;
err_fput:
fput(file);
@@ -1325,6 +1509,7 @@ static int fuse_init_fs_context(struct fs_context *fc)
ctx->max_read = ~0;
ctx->blksize = FUSE_DEFAULT_BLKSIZE;
+ ctx->legacy_opts_show = true;
#ifdef CONFIG_BLOCK
if (fc->fs_type == &fuseblk_fs_type) {
@@ -1338,29 +1523,52 @@ static int fuse_init_fs_context(struct fs_context *fc)
return 0;
}
-static void fuse_sb_destroy(struct super_block *sb)
+bool fuse_mount_remove(struct fuse_mount *fm)
{
- struct fuse_conn *fc = get_fuse_conn_super(sb);
+ struct fuse_conn *fc = fm->fc;
+ bool last = false;
- if (fc) {
- if (fc->destroy)
- fuse_send_destroy(fc);
+ down_write(&fc->killsb);
+ list_del_init(&fm->fc_entry);
+ if (list_empty(&fc->mounts))
+ last = true;
+ up_write(&fc->killsb);
- fuse_abort_conn(fc);
- fuse_wait_aborted(fc);
+ return last;
+}
+EXPORT_SYMBOL_GPL(fuse_mount_remove);
- down_write(&fc->killsb);
- fc->sb = NULL;
- up_write(&fc->killsb);
+void fuse_conn_destroy(struct fuse_mount *fm)
+{
+ struct fuse_conn *fc = fm->fc;
+
+ if (fc->destroy)
+ fuse_send_destroy(fm);
+
+ fuse_abort_conn(fc);
+ fuse_wait_aborted(fc);
+
+ if (!list_empty(&fc->entry)) {
+ mutex_lock(&fuse_mutex);
+ list_del(&fc->entry);
+ fuse_ctl_remove_conn(fc);
+ mutex_unlock(&fuse_mutex);
}
}
+EXPORT_SYMBOL_GPL(fuse_conn_destroy);
-void fuse_kill_sb_anon(struct super_block *sb)
+static void fuse_kill_sb_anon(struct super_block *sb)
{
- fuse_sb_destroy(sb);
+ struct fuse_mount *fm = get_fuse_mount_super(sb);
+ bool last;
+
+ if (fm) {
+ last = fuse_mount_remove(fm);
+ if (last)
+ fuse_conn_destroy(fm);
+ }
kill_anon_super(sb);
}
-EXPORT_SYMBOL_GPL(fuse_kill_sb_anon);
static struct file_system_type fuse_fs_type = {
.owner = THIS_MODULE,
@@ -1375,7 +1583,14 @@ MODULE_ALIAS_FS("fuse");
#ifdef CONFIG_BLOCK
static void fuse_kill_sb_blk(struct super_block *sb)
{
- fuse_sb_destroy(sb);
+ struct fuse_mount *fm = get_fuse_mount_super(sb);
+ bool last;
+
+ if (fm) {
+ last = fuse_mount_remove(fm);
+ if (last)
+ fuse_conn_destroy(fm);
+ }
kill_block_super(sb);
}
diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c
index 90e3f01bd796..3b5e91045871 100644
--- a/fs/fuse/readdir.c
+++ b/fs/fuse/readdir.c
@@ -252,7 +252,7 @@ retry:
static void fuse_force_forget(struct file *file, u64 nodeid)
{
struct inode *inode = file_inode(file);
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_mount *fm = get_fuse_mount(inode);
struct fuse_forget_in inarg;
FUSE_ARGS(args);
@@ -266,7 +266,7 @@ static void fuse_force_forget(struct file *file, u64 nodeid)
args.force = true;
args.noreply = true;
- fuse_simple_request(fc, &args);
+ fuse_simple_request(fm, &args);
/* ignore errors */
}
@@ -320,7 +320,7 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
ssize_t res;
struct page *page;
struct inode *inode = file_inode(file);
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_mount *fm = get_fuse_mount(inode);
struct fuse_io_args ia = {};
struct fuse_args_pages *ap = &ia.ap;
struct fuse_page_desc desc = { .length = PAGE_SIZE };
@@ -337,7 +337,7 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
ap->pages = &page;
ap->descs = &desc;
if (plus) {
- attr_version = fuse_get_attr_version(fc);
+ attr_version = fuse_get_attr_version(fm->fc);
fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
FUSE_READDIRPLUS);
} else {
@@ -345,7 +345,7 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
FUSE_READDIR);
}
locked = fuse_lock_inode(inode);
- res = fuse_simple_request(fc, &ap->args);
+ res = fuse_simple_request(fm, &ap->args);
fuse_unlock_inode(inode, locked);
if (res >= 0) {
if (!res) {
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 104f35de5270..21a9e534417c 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -5,12 +5,17 @@
*/
#include <linux/fs.h>
+#include <linux/dax.h>
+#include <linux/pci.h>
+#include <linux/pfn_t.h>
#include <linux/module.h>
#include <linux/virtio.h>
#include <linux/virtio_fs.h>
#include <linux/delay.h>
#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
#include <linux/highmem.h>
+#include <linux/uio.h>
#include "fuse_i.h"
/* List of virtio-fs device instances and a lock for the list. Also provides
@@ -24,6 +29,8 @@ enum {
VQ_REQUEST
};
+#define VQ_NAME_LEN 24
+
/* Per-virtqueue state */
struct virtio_fs_vq {
spinlock_t lock;
@@ -36,7 +43,7 @@ struct virtio_fs_vq {
bool connected;
long in_flight;
struct completion in_flight_zero; /* No inflight requests */
- char name[24];
+ char name[VQ_NAME_LEN];
} ____cacheline_aligned_in_smp;
/* A virtio-fs device instance */
@@ -47,6 +54,12 @@ struct virtio_fs {
struct virtio_fs_vq *vqs;
unsigned int nvqs; /* number of virtqueues */
unsigned int num_request_queues; /* number of request queues */
+ struct dax_device *dax_dev;
+
+ /* DAX memory window where file contents are mapped */
+ void *window_kaddr;
+ phys_addr_t window_phys_addr;
+ size_t window_len;
};
struct virtio_fs_forget_req {
@@ -69,6 +82,44 @@ struct virtio_fs_req_work {
static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
struct fuse_req *req, bool in_flight);
+enum {
+ OPT_DAX,
+};
+
+static const struct fs_parameter_spec virtio_fs_parameters[] = {
+ fsparam_flag("dax", OPT_DAX),
+ {}
+};
+
+static int virtio_fs_parse_param(struct fs_context *fc,
+ struct fs_parameter *param)
+{
+ struct fs_parse_result result;
+ struct fuse_fs_context *ctx = fc->fs_private;
+ int opt;
+
+ opt = fs_parse(fc, virtio_fs_parameters, param, &result);
+ if (opt < 0)
+ return opt;
+
+ switch (opt) {
+ case OPT_DAX:
+ ctx->dax = 1;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void virtio_fs_free_fc(struct fs_context *fc)
+{
+ struct fuse_fs_context *ctx = fc->fs_private;
+
+ kfree(ctx);
+}
+
static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq)
{
struct virtio_fs *fs = vq->vdev->priv;
@@ -289,7 +340,6 @@ static void virtio_fs_request_dispatch_work(struct work_struct *work)
struct fuse_req *req;
struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
dispatch_work.work);
- struct fuse_conn *fc = fsvq->fud->fc;
int ret;
pr_debug("virtio-fs: worker %s called.\n", __func__);
@@ -304,7 +354,7 @@ static void virtio_fs_request_dispatch_work(struct work_struct *work)
list_del_init(&req->list);
spin_unlock(&fsvq->lock);
- fuse_request_end(fc, req);
+ fuse_request_end(req);
}
/* Dispatch pending requests */
@@ -335,7 +385,7 @@ static void virtio_fs_request_dispatch_work(struct work_struct *work)
spin_unlock(&fsvq->lock);
pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n",
ret);
- fuse_request_end(fc, req);
+ fuse_request_end(req);
}
}
}
@@ -495,7 +545,6 @@ static void virtio_fs_request_complete(struct fuse_req *req,
struct virtio_fs_vq *fsvq)
{
struct fuse_pqueue *fpq = &fsvq->fud->pq;
- struct fuse_conn *fc = fsvq->fud->fc;
struct fuse_args *args;
struct fuse_args_pages *ap;
unsigned int len, i, thislen;
@@ -528,7 +577,7 @@ static void virtio_fs_request_complete(struct fuse_req *req,
clear_bit(FR_SENT, &req->flags);
spin_unlock(&fpq->lock);
- fuse_request_end(fc, req);
+ fuse_request_end(req);
spin_lock(&fsvq->lock);
dec_in_flight_req(fsvq);
spin_unlock(&fsvq->lock);
@@ -596,6 +645,26 @@ static void virtio_fs_vq_done(struct virtqueue *vq)
schedule_work(&fsvq->done_work);
}
+static void virtio_fs_init_vq(struct virtio_fs_vq *fsvq, char *name,
+ int vq_type)
+{
+ strncpy(fsvq->name, name, VQ_NAME_LEN);
+ spin_lock_init(&fsvq->lock);
+ INIT_LIST_HEAD(&fsvq->queued_reqs);
+ INIT_LIST_HEAD(&fsvq->end_reqs);
+ init_completion(&fsvq->in_flight_zero);
+
+ if (vq_type == VQ_REQUEST) {
+ INIT_WORK(&fsvq->done_work, virtio_fs_requests_done_work);
+ INIT_DELAYED_WORK(&fsvq->dispatch_work,
+ virtio_fs_request_dispatch_work);
+ } else {
+ INIT_WORK(&fsvq->done_work, virtio_fs_hiprio_done_work);
+ INIT_DELAYED_WORK(&fsvq->dispatch_work,
+ virtio_fs_hiprio_dispatch_work);
+ }
+}
+
/* Initialize virtqueues */
static int virtio_fs_setup_vqs(struct virtio_device *vdev,
struct virtio_fs *fs)
@@ -611,7 +680,7 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev,
if (fs->num_request_queues == 0)
return -EINVAL;
- fs->nvqs = 1 + fs->num_request_queues;
+ fs->nvqs = VQ_REQUEST + fs->num_request_queues;
fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL);
if (!fs->vqs)
return -ENOMEM;
@@ -625,29 +694,17 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev,
goto out;
}
+ /* Initialize the hiprio/forget request virtqueue */
callbacks[VQ_HIPRIO] = virtio_fs_vq_done;
- snprintf(fs->vqs[VQ_HIPRIO].name, sizeof(fs->vqs[VQ_HIPRIO].name),
- "hiprio");
+ virtio_fs_init_vq(&fs->vqs[VQ_HIPRIO], "hiprio", VQ_HIPRIO);
names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name;
- INIT_WORK(&fs->vqs[VQ_HIPRIO].done_work, virtio_fs_hiprio_done_work);
- INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].queued_reqs);
- INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].end_reqs);
- INIT_DELAYED_WORK(&fs->vqs[VQ_HIPRIO].dispatch_work,
- virtio_fs_hiprio_dispatch_work);
- init_completion(&fs->vqs[VQ_HIPRIO].in_flight_zero);
- spin_lock_init(&fs->vqs[VQ_HIPRIO].lock);
/* Initialize the requests virtqueues */
for (i = VQ_REQUEST; i < fs->nvqs; i++) {
- spin_lock_init(&fs->vqs[i].lock);
- INIT_WORK(&fs->vqs[i].done_work, virtio_fs_requests_done_work);
- INIT_DELAYED_WORK(&fs->vqs[i].dispatch_work,
- virtio_fs_request_dispatch_work);
- INIT_LIST_HEAD(&fs->vqs[i].queued_reqs);
- INIT_LIST_HEAD(&fs->vqs[i].end_reqs);
- init_completion(&fs->vqs[i].in_flight_zero);
- snprintf(fs->vqs[i].name, sizeof(fs->vqs[i].name),
- "requests.%u", i - VQ_REQUEST);
+ char vq_name[VQ_NAME_LEN];
+
+ snprintf(vq_name, VQ_NAME_LEN, "requests.%u", i - VQ_REQUEST);
+ virtio_fs_init_vq(&fs->vqs[i], vq_name, VQ_REQUEST);
callbacks[i] = virtio_fs_vq_done;
names[i] = fs->vqs[i].name;
}
@@ -676,6 +733,130 @@ static void virtio_fs_cleanup_vqs(struct virtio_device *vdev,
vdev->config->del_vqs(vdev);
}
+/* Map a window offset to a page frame number. The window offset will have
+ * been produced by .iomap_begin(), which maps a file offset to a window
+ * offset.
+ */
+static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
+ long nr_pages, void **kaddr, pfn_t *pfn)
+{
+ struct virtio_fs *fs = dax_get_private(dax_dev);
+ phys_addr_t offset = PFN_PHYS(pgoff);
+ size_t max_nr_pages = fs->window_len/PAGE_SIZE - pgoff;
+
+ if (kaddr)
+ *kaddr = fs->window_kaddr + offset;
+ if (pfn)
+ *pfn = phys_to_pfn_t(fs->window_phys_addr + offset,
+ PFN_DEV | PFN_MAP);
+ return nr_pages > max_nr_pages ? max_nr_pages : nr_pages;
+}
+
+static size_t virtio_fs_copy_from_iter(struct dax_device *dax_dev,
+ pgoff_t pgoff, void *addr,
+ size_t bytes, struct iov_iter *i)
+{
+ return copy_from_iter(addr, bytes, i);
+}
+
+static size_t virtio_fs_copy_to_iter(struct dax_device *dax_dev,
+ pgoff_t pgoff, void *addr,
+ size_t bytes, struct iov_iter *i)
+{
+ return copy_to_iter(addr, bytes, i);
+}
+
+static int virtio_fs_zero_page_range(struct dax_device *dax_dev,
+ pgoff_t pgoff, size_t nr_pages)
+{
+ long rc;
+ void *kaddr;
+
+ rc = dax_direct_access(dax_dev, pgoff, nr_pages, &kaddr, NULL);
+ if (rc < 0)
+ return rc;
+ memset(kaddr, 0, nr_pages << PAGE_SHIFT);
+ dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT);
+ return 0;
+}
+
+static const struct dax_operations virtio_fs_dax_ops = {
+ .direct_access = virtio_fs_direct_access,
+ .copy_from_iter = virtio_fs_copy_from_iter,
+ .copy_to_iter = virtio_fs_copy_to_iter,
+ .zero_page_range = virtio_fs_zero_page_range,
+};
+
+static void virtio_fs_cleanup_dax(void *data)
+{
+ struct dax_device *dax_dev = data;
+
+ kill_dax(dax_dev);
+ put_dax(dax_dev);
+}
+
+static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs)
+{
+ struct virtio_shm_region cache_reg;
+ struct dev_pagemap *pgmap;
+ bool have_cache;
+
+ if (!IS_ENABLED(CONFIG_FUSE_DAX))
+ return 0;
+
+ /* Get cache region */
+ have_cache = virtio_get_shm_region(vdev, &cache_reg,
+ (u8)VIRTIO_FS_SHMCAP_ID_CACHE);
+ if (!have_cache) {
+ dev_notice(&vdev->dev, "%s: No cache capability\n", __func__);
+ return 0;
+ }
+
+ if (!devm_request_mem_region(&vdev->dev, cache_reg.addr, cache_reg.len,
+ dev_name(&vdev->dev))) {
+ dev_warn(&vdev->dev, "could not reserve region addr=0x%llx len=0x%llx\n",
+ cache_reg.addr, cache_reg.len);
+ return -EBUSY;
+ }
+
+ dev_notice(&vdev->dev, "Cache len: 0x%llx @ 0x%llx\n", cache_reg.len,
+ cache_reg.addr);
+
+ pgmap = devm_kzalloc(&vdev->dev, sizeof(*pgmap), GFP_KERNEL);
+ if (!pgmap)
+ return -ENOMEM;
+
+ pgmap->type = MEMORY_DEVICE_FS_DAX;
+
+ /* Ideally we would directly use the PCI BAR resource but
+ * devm_memremap_pages() wants its own copy in pgmap. So
+ * initialize a struct resource from scratch (only the start
+ * and end fields will be used).
+ */
+ pgmap->range = (struct range) {
+ .start = (phys_addr_t) cache_reg.addr,
+ .end = (phys_addr_t) cache_reg.addr + cache_reg.len - 1,
+ };
+ pgmap->nr_range = 1;
+
+ fs->window_kaddr = devm_memremap_pages(&vdev->dev, pgmap);
+ if (IS_ERR(fs->window_kaddr))
+ return PTR_ERR(fs->window_kaddr);
+
+ fs->window_phys_addr = (phys_addr_t) cache_reg.addr;
+ fs->window_len = (phys_addr_t) cache_reg.len;
+
+ dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n",
+ __func__, fs->window_kaddr, cache_reg.addr, cache_reg.len);
+
+ fs->dax_dev = alloc_dax(fs, NULL, &virtio_fs_dax_ops, 0);
+ if (IS_ERR(fs->dax_dev))
+ return PTR_ERR(fs->dax_dev);
+
+ return devm_add_action_or_reset(&vdev->dev, virtio_fs_cleanup_dax,
+ fs->dax_dev);
+}
+
static int virtio_fs_probe(struct virtio_device *vdev)
{
struct virtio_fs *fs;
@@ -697,6 +878,10 @@ static int virtio_fs_probe(struct virtio_device *vdev)
/* TODO vq affinity */
+ ret = virtio_fs_setup_dax(vdev, fs);
+ if (ret < 0)
+ goto out_vqs;
+
/* Bring the device online in case the filesystem is mounted and
* requests need to be sent before we return.
*/
@@ -833,18 +1018,37 @@ __releases(fiq->lock)
spin_unlock(&fiq->lock);
}
+/* Count number of scatter-gather elements required */
+static unsigned int sg_count_fuse_pages(struct fuse_page_desc *page_descs,
+ unsigned int num_pages,
+ unsigned int total_len)
+{
+ unsigned int i;
+ unsigned int this_len;
+
+ for (i = 0; i < num_pages && total_len; i++) {
+ this_len = min(page_descs[i].length, total_len);
+ total_len -= this_len;
+ }
+
+ return i;
+}
+
/* Return the number of scatter-gather list elements required */
static unsigned int sg_count_fuse_req(struct fuse_req *req)
{
struct fuse_args *args = req->args;
struct fuse_args_pages *ap = container_of(args, typeof(*ap), args);
- unsigned int total_sgs = 1 /* fuse_in_header */;
+ unsigned int size, total_sgs = 1 /* fuse_in_header */;
if (args->in_numargs - args->in_pages)
total_sgs += 1;
- if (args->in_pages)
- total_sgs += ap->num_pages;
+ if (args->in_pages) {
+ size = args->in_args[args->in_numargs - 1].size;
+ total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages,
+ size);
+ }
if (!test_bit(FR_ISREPLY, &req->flags))
return total_sgs;
@@ -854,8 +1058,11 @@ static unsigned int sg_count_fuse_req(struct fuse_req *req)
if (args->out_numargs - args->out_pages)
total_sgs += 1;
- if (args->out_pages)
- total_sgs += ap->num_pages;
+ if (args->out_pages) {
+ size = args->out_args[args->out_numargs - 1].size;
+ total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages,
+ size);
+ }
return total_sgs;
}
@@ -1071,24 +1278,28 @@ static const struct fuse_iqueue_ops virtio_fs_fiq_ops = {
.release = virtio_fs_fiq_release,
};
-static int virtio_fs_fill_super(struct super_block *sb)
+static inline void virtio_fs_ctx_set_defaults(struct fuse_fs_context *ctx)
{
- struct fuse_conn *fc = get_fuse_conn_super(sb);
+ ctx->rootmode = S_IFDIR;
+ ctx->default_permissions = 1;
+ ctx->allow_other = 1;
+ ctx->max_read = UINT_MAX;
+ ctx->blksize = 512;
+ ctx->destroy = true;
+ ctx->no_control = true;
+ ctx->no_force_umount = true;
+}
+
+static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc)
+{
+ struct fuse_mount *fm = get_fuse_mount_super(sb);
+ struct fuse_conn *fc = fm->fc;
struct virtio_fs *fs = fc->iq.priv;
+ struct fuse_fs_context *ctx = fsc->fs_private;
unsigned int i;
int err;
- struct fuse_fs_context ctx = {
- .rootmode = S_IFDIR,
- .default_permissions = 1,
- .allow_other = 1,
- .max_read = UINT_MAX,
- .blksize = 512,
- .destroy = true,
- .no_control = true,
- .no_force_umount = true,
- .no_mount_options = true,
- };
+ virtio_fs_ctx_set_defaults(ctx);
mutex_lock(&virtio_fs_mutex);
/* After holding mutex, make sure virtiofs device is still there.
@@ -1112,8 +1323,10 @@ static int virtio_fs_fill_super(struct super_block *sb)
}
/* virtiofs allocates and installs its own fuse devices */
- ctx.fudptr = NULL;
- err = fuse_fill_super_common(sb, &ctx);
+ ctx->fudptr = NULL;
+ if (ctx->dax)
+ ctx->dax_dev = fs->dax_dev;
+ err = fuse_fill_super_common(sb, ctx);
if (err < 0)
goto err_free_fuse_devs;
@@ -1125,7 +1338,7 @@ static int virtio_fs_fill_super(struct super_block *sb)
/* Previous unmount will stop all queues. Start these again */
virtio_fs_start_all_queues(fs);
- fuse_send_init(fc);
+ fuse_send_init(fm);
mutex_unlock(&virtio_fs_mutex);
return 0;
@@ -1136,18 +1349,17 @@ err:
return err;
}
-static void virtio_kill_sb(struct super_block *sb)
+static void virtio_fs_conn_destroy(struct fuse_mount *fm)
{
- struct fuse_conn *fc = get_fuse_conn_super(sb);
- struct virtio_fs *vfs;
- struct virtio_fs_vq *fsvq;
-
- /* If mount failed, we can still be called without any fc */
- if (!fc)
- return fuse_kill_sb_anon(sb);
+ struct fuse_conn *fc = fm->fc;
+ struct virtio_fs *vfs = fc->iq.priv;
+ struct virtio_fs_vq *fsvq = &vfs->vqs[VQ_HIPRIO];
- vfs = fc->iq.priv;
- fsvq = &vfs->vqs[VQ_HIPRIO];
+ /* Stop dax worker. Soon evict_inodes() will be called which
+ * will free all memory ranges belonging to all inodes.
+ */
+ if (IS_ENABLED(CONFIG_FUSE_DAX))
+ fuse_dax_cancel_work(fc);
/* Stop forget queue. Soon destroy will be sent */
spin_lock(&fsvq->lock);
@@ -1155,9 +1367,9 @@ static void virtio_kill_sb(struct super_block *sb)
spin_unlock(&fsvq->lock);
virtio_fs_drain_all_queues(vfs);
- fuse_kill_sb_anon(sb);
+ fuse_conn_destroy(fm);
- /* fuse_kill_sb_anon() must have sent destroy. Stop all queues
+ /* fuse_conn_destroy() must have sent destroy. Stop all queues
* and drain one more time and free fuse devices. Freeing fuse
* devices will drop their reference on fuse_conn and that in
* turn will drop its reference on virtio_fs object.
@@ -1167,12 +1379,27 @@ static void virtio_kill_sb(struct super_block *sb)
virtio_fs_free_devs(vfs);
}
+static void virtio_kill_sb(struct super_block *sb)
+{
+ struct fuse_mount *fm = get_fuse_mount_super(sb);
+ bool last;
+
+ /* If mount failed, we can still be called without any fc */
+ if (fm) {
+ last = fuse_mount_remove(fm);
+ if (last)
+ virtio_fs_conn_destroy(fm);
+ }
+ kill_anon_super(sb);
+}
+
static int virtio_fs_test_super(struct super_block *sb,
struct fs_context *fsc)
{
- struct fuse_conn *fc = fsc->s_fs_info;
+ struct fuse_mount *fsc_fm = fsc->s_fs_info;
+ struct fuse_mount *sb_fm = get_fuse_mount_super(sb);
- return fc->iq.priv == get_fuse_conn_super(sb)->iq.priv;
+ return fsc_fm->fc->iq.priv == sb_fm->fc->iq.priv;
}
static int virtio_fs_set_super(struct super_block *sb,
@@ -1182,7 +1409,7 @@ static int virtio_fs_set_super(struct super_block *sb,
err = get_anon_bdev(&sb->s_dev);
if (!err)
- fuse_conn_get(fsc->s_fs_info);
+ fuse_mount_get(fsc->s_fs_info);
return err;
}
@@ -1192,6 +1419,7 @@ static int virtio_fs_get_tree(struct fs_context *fsc)
struct virtio_fs *fs;
struct super_block *sb;
struct fuse_conn *fc;
+ struct fuse_mount *fm;
int err;
/* This gets a reference on virtio_fs object. This ptr gets installed
@@ -1212,19 +1440,29 @@ static int virtio_fs_get_tree(struct fs_context *fsc)
return -ENOMEM;
}
- fuse_conn_init(fc, get_user_ns(current_user_ns()), &virtio_fs_fiq_ops,
- fs);
+ fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL);
+ if (!fm) {
+ mutex_lock(&virtio_fs_mutex);
+ virtio_fs_put(fs);
+ mutex_unlock(&virtio_fs_mutex);
+ kfree(fc);
+ return -ENOMEM;
+ }
+
+ fuse_conn_init(fc, fm, get_user_ns(current_user_ns()),
+ &virtio_fs_fiq_ops, fs);
fc->release = fuse_free_conn;
fc->delete_stale = true;
+ fc->auto_submounts = true;
- fsc->s_fs_info = fc;
+ fsc->s_fs_info = fm;
sb = sget_fc(fsc, virtio_fs_test_super, virtio_fs_set_super);
- fuse_conn_put(fc);
+ fuse_mount_put(fm);
if (IS_ERR(sb))
return PTR_ERR(sb);
if (!sb->s_root) {
- err = virtio_fs_fill_super(sb);
+ err = virtio_fs_fill_super(sb, fsc);
if (err) {
deactivate_locked_super(sb);
return err;
@@ -1239,11 +1477,19 @@ static int virtio_fs_get_tree(struct fs_context *fsc)
}
static const struct fs_context_operations virtio_fs_context_ops = {
+ .free = virtio_fs_free_fc,
+ .parse_param = virtio_fs_parse_param,
.get_tree = virtio_fs_get_tree,
};
static int virtio_fs_init_fs_context(struct fs_context *fsc)
{
+ struct fuse_fs_context *ctx;
+
+ ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+ fsc->fs_private = ctx;
fsc->ops = &virtio_fs_context_ops;
return 0;
}
diff --git a/fs/fuse/xattr.c b/fs/fuse/xattr.c
index 20d052e08b3b..371bdcbc7233 100644
--- a/fs/fuse/xattr.c
+++ b/fs/fuse/xattr.c
@@ -14,12 +14,12 @@
int fuse_setxattr(struct inode *inode, const char *name, const void *value,
size_t size, int flags)
{
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_mount *fm = get_fuse_mount(inode);
FUSE_ARGS(args);
struct fuse_setxattr_in inarg;
int err;
- if (fc->no_setxattr)
+ if (fm->fc->no_setxattr)
return -EOPNOTSUPP;
memset(&inarg, 0, sizeof(inarg));
@@ -34,9 +34,9 @@ int fuse_setxattr(struct inode *inode, const char *name, const void *value,
args.in_args[1].value = name;
args.in_args[2].size = size;
args.in_args[2].value = value;
- err = fuse_simple_request(fc, &args);
+ err = fuse_simple_request(fm, &args);
if (err == -ENOSYS) {
- fc->no_setxattr = 1;
+ fm->fc->no_setxattr = 1;
err = -EOPNOTSUPP;
}
if (!err) {
@@ -49,13 +49,13 @@ int fuse_setxattr(struct inode *inode, const char *name, const void *value,
ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value,
size_t size)
{
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_mount *fm = get_fuse_mount(inode);
FUSE_ARGS(args);
struct fuse_getxattr_in inarg;
struct fuse_getxattr_out outarg;
ssize_t ret;
- if (fc->no_getxattr)
+ if (fm->fc->no_getxattr)
return -EOPNOTSUPP;
memset(&inarg, 0, sizeof(inarg));
@@ -77,11 +77,11 @@ ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value,
args.out_args[0].size = sizeof(outarg);
args.out_args[0].value = &outarg;
}
- ret = fuse_simple_request(fc, &args);
+ ret = fuse_simple_request(fm, &args);
if (!ret && !size)
ret = min_t(ssize_t, outarg.size, XATTR_SIZE_MAX);
if (ret == -ENOSYS) {
- fc->no_getxattr = 1;
+ fm->fc->no_getxattr = 1;
ret = -EOPNOTSUPP;
}
return ret;
@@ -107,16 +107,16 @@ static int fuse_verify_xattr_list(char *list, size_t size)
ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
{
struct inode *inode = d_inode(entry);
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_mount *fm = get_fuse_mount(inode);
FUSE_ARGS(args);
struct fuse_getxattr_in inarg;
struct fuse_getxattr_out outarg;
ssize_t ret;
- if (!fuse_allow_current_process(fc))
+ if (!fuse_allow_current_process(fm->fc))
return -EACCES;
- if (fc->no_listxattr)
+ if (fm->fc->no_listxattr)
return -EOPNOTSUPP;
memset(&inarg, 0, sizeof(inarg));
@@ -136,13 +136,13 @@ ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
args.out_args[0].size = sizeof(outarg);
args.out_args[0].value = &outarg;
}
- ret = fuse_simple_request(fc, &args);
+ ret = fuse_simple_request(fm, &args);
if (!ret && !size)
ret = min_t(ssize_t, outarg.size, XATTR_LIST_MAX);
if (ret > 0 && size)
ret = fuse_verify_xattr_list(list, ret);
if (ret == -ENOSYS) {
- fc->no_listxattr = 1;
+ fm->fc->no_listxattr = 1;
ret = -EOPNOTSUPP;
}
return ret;
@@ -150,11 +150,11 @@ ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
int fuse_removexattr(struct inode *inode, const char *name)
{
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_mount *fm = get_fuse_mount(inode);
FUSE_ARGS(args);
int err;
- if (fc->no_removexattr)
+ if (fm->fc->no_removexattr)
return -EOPNOTSUPP;
args.opcode = FUSE_REMOVEXATTR;
@@ -162,9 +162,9 @@ int fuse_removexattr(struct inode *inode, const char *name)
args.in_numargs = 1;
args.in_args[0].size = strlen(name) + 1;
args.in_args[0].value = name;
- err = fuse_simple_request(fc, &args);
+ err = fuse_simple_request(fm, &args);
if (err == -ENOSYS) {
- fc->no_removexattr = 1;
+ fm->fc->no_removexattr = 1;
err = -EOPNOTSUPP;
}
if (!err) {
diff --git a/fs/inode.c b/fs/inode.c
index c27989fbfb51..701f750a71cd 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -181,6 +181,8 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
mapping->a_ops = &empty_aops;
mapping->host = inode;
mapping->flags = 0;
+ if (sb->s_type->fs_flags & FS_THP_SUPPORT)
+ __set_bit(AS_THP_SUPPORT, &mapping->flags);
mapping->wb_err = 0;
atomic_set(&mapping->i_mmap_writable, 0);
#ifdef CONFIG_READ_ONLY_THP_FOR_FS
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 0a182f1333e8..7cb3b4cb9b11 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -18,6 +18,7 @@
#include <linux/fs_struct.h>
#include <linux/task_work.h>
#include <linux/blk-cgroup.h>
+#include <linux/audit.h>
#include "io-wq.h"
@@ -429,14 +430,10 @@ static void io_wq_switch_mm(struct io_worker *worker, struct io_wq_work *work)
mmput(worker->mm);
worker->mm = NULL;
}
- if (!work->mm)
- return;
- if (mmget_not_zero(work->mm)) {
- kthread_use_mm(work->mm);
- worker->mm = work->mm;
- /* hang on to this mm */
- work->mm = NULL;
+ if (mmget_not_zero(work->identity->mm)) {
+ kthread_use_mm(work->identity->mm);
+ worker->mm = work->identity->mm;
return;
}
@@ -448,9 +445,11 @@ static inline void io_wq_switch_blkcg(struct io_worker *worker,
struct io_wq_work *work)
{
#ifdef CONFIG_BLK_CGROUP
- if (work->blkcg_css != worker->blkcg_css) {
- kthread_associate_blkcg(work->blkcg_css);
- worker->blkcg_css = work->blkcg_css;
+ if (!(work->flags & IO_WQ_WORK_BLKCG))
+ return;
+ if (work->identity->blkcg_css != worker->blkcg_css) {
+ kthread_associate_blkcg(work->identity->blkcg_css);
+ worker->blkcg_css = work->identity->blkcg_css;
}
#endif
}
@@ -458,9 +457,9 @@ static inline void io_wq_switch_blkcg(struct io_worker *worker,
static void io_wq_switch_creds(struct io_worker *worker,
struct io_wq_work *work)
{
- const struct cred *old_creds = override_creds(work->creds);
+ const struct cred *old_creds = override_creds(work->identity->creds);
- worker->cur_creds = work->creds;
+ worker->cur_creds = work->identity->creds;
if (worker->saved_creds)
put_cred(old_creds); /* creds set by previous switch */
else
@@ -470,20 +469,26 @@ static void io_wq_switch_creds(struct io_worker *worker,
static void io_impersonate_work(struct io_worker *worker,
struct io_wq_work *work)
{
- if (work->files && current->files != work->files) {
+ if ((work->flags & IO_WQ_WORK_FILES) &&
+ current->files != work->identity->files) {
task_lock(current);
- current->files = work->files;
- current->nsproxy = work->nsproxy;
+ current->files = work->identity->files;
+ current->nsproxy = work->identity->nsproxy;
task_unlock(current);
}
- if (work->fs && current->fs != work->fs)
- current->fs = work->fs;
- if (work->mm != worker->mm)
+ if ((work->flags & IO_WQ_WORK_FS) && current->fs != work->identity->fs)
+ current->fs = work->identity->fs;
+ if ((work->flags & IO_WQ_WORK_MM) && work->identity->mm != worker->mm)
io_wq_switch_mm(worker, work);
- if (worker->cur_creds != work->creds)
+ if ((work->flags & IO_WQ_WORK_CREDS) &&
+ worker->cur_creds != work->identity->creds)
io_wq_switch_creds(worker, work);
- current->signal->rlim[RLIMIT_FSIZE].rlim_cur = work->fsize;
+ current->signal->rlim[RLIMIT_FSIZE].rlim_cur = work->identity->fsize;
io_wq_switch_blkcg(worker, work);
+#ifdef CONFIG_AUDIT
+ current->loginuid = work->identity->loginuid;
+ current->sessionid = work->identity->sessionid;
+#endif
}
static void io_assign_current_work(struct io_worker *worker,
@@ -496,6 +501,11 @@ static void io_assign_current_work(struct io_worker *worker,
cond_resched();
}
+#ifdef CONFIG_AUDIT
+ current->loginuid = KUIDT_INIT(AUDIT_UID_UNSET);
+ current->sessionid = AUDIT_SID_UNSET;
+#endif
+
spin_lock_irq(&worker->lock);
worker->cur_work = work;
spin_unlock_irq(&worker->lock);
@@ -676,6 +686,7 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
kfree(worker);
return false;
}
+ kthread_bind_mask(worker->task, cpumask_of_node(wqe->node));
raw_spin_lock_irq(&wqe->lock);
hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
diff --git a/fs/io-wq.h b/fs/io-wq.h
index 84bcf6a85523..be21c500c925 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -1,6 +1,8 @@
#ifndef INTERNAL_IO_WQ_H
#define INTERNAL_IO_WQ_H
+#include <linux/io_uring.h>
+
struct io_wq;
enum {
@@ -10,6 +12,12 @@ enum {
IO_WQ_WORK_NO_CANCEL = 8,
IO_WQ_WORK_CONCURRENT = 16,
+ IO_WQ_WORK_FILES = 32,
+ IO_WQ_WORK_FS = 64,
+ IO_WQ_WORK_MM = 128,
+ IO_WQ_WORK_CREDS = 256,
+ IO_WQ_WORK_BLKCG = 512,
+
IO_WQ_HASH_SHIFT = 24, /* upper 8 bits are used for hash key */
};
@@ -85,15 +93,7 @@ static inline void wq_list_del(struct io_wq_work_list *list,
struct io_wq_work {
struct io_wq_work_node list;
- struct files_struct *files;
- struct mm_struct *mm;
-#ifdef CONFIG_BLK_CGROUP
- struct cgroup_subsys_state *blkcg_css;
-#endif
- const struct cred *creds;
- struct nsproxy *nsproxy;
- struct fs_struct *fs;
- unsigned long fsize;
+ struct io_identity *identity;
unsigned flags;
};
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 2e1dc354cd08..02dc81622081 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -81,6 +81,7 @@
#include <linux/pagemap.h>
#include <linux/io_uring.h>
#include <linux/blk-cgroup.h>
+#include <linux/audit.h>
#define CREATE_TRACE_POINTS
#include <trace/events/io_uring.h>
@@ -327,6 +328,11 @@ struct io_ring_ctx {
const struct cred *creds;
+#ifdef CONFIG_AUDIT
+ kuid_t loginuid;
+ unsigned int sessionid;
+#endif
+
struct completion ref_comp;
struct completion sq_thread_comp;
@@ -574,7 +580,6 @@ enum {
REQ_F_NOWAIT_BIT,
REQ_F_LINK_TIMEOUT_BIT,
REQ_F_ISREG_BIT,
- REQ_F_COMP_LOCKED_BIT,
REQ_F_NEED_CLEANUP_BIT,
REQ_F_POLLED_BIT,
REQ_F_BUFFER_SELECTED_BIT,
@@ -613,8 +618,6 @@ enum {
REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT),
/* regular file */
REQ_F_ISREG = BIT(REQ_F_ISREG_BIT),
- /* completion under lock */
- REQ_F_COMP_LOCKED = BIT(REQ_F_COMP_LOCKED_BIT),
/* needs cleanup */
REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT),
/* already went through poll handler */
@@ -732,8 +735,6 @@ struct io_submit_state {
};
struct io_op_def {
- /* needs current->mm setup, does mm access */
- unsigned needs_mm : 1;
/* needs req->file assigned */
unsigned needs_file : 1;
/* don't fail if file grab fails */
@@ -744,10 +745,6 @@ struct io_op_def {
unsigned unbound_nonreg_file : 1;
/* opcode is not supported by this kernel */
unsigned not_supported : 1;
- /* needs file table */
- unsigned file_table : 1;
- /* needs ->fs */
- unsigned needs_fs : 1;
/* set if opcode supports polled "wait" */
unsigned pollin : 1;
unsigned pollout : 1;
@@ -757,45 +754,42 @@ struct io_op_def {
unsigned needs_fsize : 1;
/* must always have async data allocated */
unsigned needs_async_data : 1;
- /* needs blkcg context, issues async io potentially */
- unsigned needs_blkcg : 1;
/* size of async data needed, if any */
unsigned short async_size;
+ unsigned work_flags;
};
-static const struct io_op_def io_op_defs[] __read_mostly = {
+static const struct io_op_def io_op_defs[] = {
[IORING_OP_NOP] = {},
[IORING_OP_READV] = {
- .needs_mm = 1,
.needs_file = 1,
.unbound_nonreg_file = 1,
.pollin = 1,
.buffer_select = 1,
.needs_async_data = 1,
- .needs_blkcg = 1,
.async_size = sizeof(struct io_async_rw),
+ .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
},
[IORING_OP_WRITEV] = {
- .needs_mm = 1,
.needs_file = 1,
.hash_reg_file = 1,
.unbound_nonreg_file = 1,
.pollout = 1,
.needs_fsize = 1,
.needs_async_data = 1,
- .needs_blkcg = 1,
.async_size = sizeof(struct io_async_rw),
+ .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
},
[IORING_OP_FSYNC] = {
.needs_file = 1,
- .needs_blkcg = 1,
+ .work_flags = IO_WQ_WORK_BLKCG,
},
[IORING_OP_READ_FIXED] = {
.needs_file = 1,
.unbound_nonreg_file = 1,
.pollin = 1,
- .needs_blkcg = 1,
.async_size = sizeof(struct io_async_rw),
+ .work_flags = IO_WQ_WORK_BLKCG,
},
[IORING_OP_WRITE_FIXED] = {
.needs_file = 1,
@@ -803,8 +797,8 @@ static const struct io_op_def io_op_defs[] __read_mostly = {
.unbound_nonreg_file = 1,
.pollout = 1,
.needs_fsize = 1,
- .needs_blkcg = 1,
.async_size = sizeof(struct io_async_rw),
+ .work_flags = IO_WQ_WORK_BLKCG,
},
[IORING_OP_POLL_ADD] = {
.needs_file = 1,
@@ -813,137 +807,123 @@ static const struct io_op_def io_op_defs[] __read_mostly = {
[IORING_OP_POLL_REMOVE] = {},
[IORING_OP_SYNC_FILE_RANGE] = {
.needs_file = 1,
- .needs_blkcg = 1,
+ .work_flags = IO_WQ_WORK_BLKCG,
},
[IORING_OP_SENDMSG] = {
- .needs_mm = 1,
.needs_file = 1,
.unbound_nonreg_file = 1,
- .needs_fs = 1,
.pollout = 1,
.needs_async_data = 1,
- .needs_blkcg = 1,
.async_size = sizeof(struct io_async_msghdr),
+ .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG |
+ IO_WQ_WORK_FS,
},
[IORING_OP_RECVMSG] = {
- .needs_mm = 1,
.needs_file = 1,
.unbound_nonreg_file = 1,
- .needs_fs = 1,
.pollin = 1,
.buffer_select = 1,
.needs_async_data = 1,
- .needs_blkcg = 1,
.async_size = sizeof(struct io_async_msghdr),
+ .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG |
+ IO_WQ_WORK_FS,
},
[IORING_OP_TIMEOUT] = {
- .needs_mm = 1,
.needs_async_data = 1,
.async_size = sizeof(struct io_timeout_data),
+ .work_flags = IO_WQ_WORK_MM,
},
[IORING_OP_TIMEOUT_REMOVE] = {},
[IORING_OP_ACCEPT] = {
- .needs_mm = 1,
.needs_file = 1,
.unbound_nonreg_file = 1,
- .file_table = 1,
.pollin = 1,
+ .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_FILES,
},
[IORING_OP_ASYNC_CANCEL] = {},
[IORING_OP_LINK_TIMEOUT] = {
- .needs_mm = 1,
.needs_async_data = 1,
.async_size = sizeof(struct io_timeout_data),
+ .work_flags = IO_WQ_WORK_MM,
},
[IORING_OP_CONNECT] = {
- .needs_mm = 1,
.needs_file = 1,
.unbound_nonreg_file = 1,
.pollout = 1,
.needs_async_data = 1,
.async_size = sizeof(struct io_async_connect),
+ .work_flags = IO_WQ_WORK_MM,
},
[IORING_OP_FALLOCATE] = {
.needs_file = 1,
.needs_fsize = 1,
- .needs_blkcg = 1,
+ .work_flags = IO_WQ_WORK_BLKCG,
},
[IORING_OP_OPENAT] = {
- .file_table = 1,
- .needs_fs = 1,
- .needs_blkcg = 1,
+ .work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_BLKCG |
+ IO_WQ_WORK_FS,
},
[IORING_OP_CLOSE] = {
.needs_file = 1,
.needs_file_no_error = 1,
- .file_table = 1,
- .needs_blkcg = 1,
+ .work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_BLKCG,
},
[IORING_OP_FILES_UPDATE] = {
- .needs_mm = 1,
- .file_table = 1,
+ .work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_MM,
},
[IORING_OP_STATX] = {
- .needs_mm = 1,
- .needs_fs = 1,
- .file_table = 1,
- .needs_blkcg = 1,
+ .work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_MM |
+ IO_WQ_WORK_FS | IO_WQ_WORK_BLKCG,
},
[IORING_OP_READ] = {
- .needs_mm = 1,
.needs_file = 1,
.unbound_nonreg_file = 1,
.pollin = 1,
.buffer_select = 1,
- .needs_blkcg = 1,
.async_size = sizeof(struct io_async_rw),
+ .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
},
[IORING_OP_WRITE] = {
- .needs_mm = 1,
.needs_file = 1,
.unbound_nonreg_file = 1,
.pollout = 1,
.needs_fsize = 1,
- .needs_blkcg = 1,
.async_size = sizeof(struct io_async_rw),
+ .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
},
[IORING_OP_FADVISE] = {
.needs_file = 1,
- .needs_blkcg = 1,
+ .work_flags = IO_WQ_WORK_BLKCG,
},
[IORING_OP_MADVISE] = {
- .needs_mm = 1,
- .needs_blkcg = 1,
+ .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
},
[IORING_OP_SEND] = {
- .needs_mm = 1,
.needs_file = 1,
.unbound_nonreg_file = 1,
.pollout = 1,
- .needs_blkcg = 1,
+ .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
},
[IORING_OP_RECV] = {
- .needs_mm = 1,
.needs_file = 1,
.unbound_nonreg_file = 1,
.pollin = 1,
.buffer_select = 1,
- .needs_blkcg = 1,
+ .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
},
[IORING_OP_OPENAT2] = {
- .file_table = 1,
- .needs_fs = 1,
- .needs_blkcg = 1,
+ .work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_FS |
+ IO_WQ_WORK_BLKCG,
},
[IORING_OP_EPOLL_CTL] = {
.unbound_nonreg_file = 1,
- .file_table = 1,
+ .work_flags = IO_WQ_WORK_FILES,
},
[IORING_OP_SPLICE] = {
.needs_file = 1,
.hash_reg_file = 1,
.unbound_nonreg_file = 1,
- .needs_blkcg = 1,
+ .work_flags = IO_WQ_WORK_BLKCG,
},
[IORING_OP_PROVIDE_BUFFERS] = {},
[IORING_OP_REMOVE_BUFFERS] = {},
@@ -963,8 +943,8 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
struct io_comp_state *cs);
static void io_cqring_fill_event(struct io_kiocb *req, long res);
static void io_put_req(struct io_kiocb *req);
+static void io_put_req_deferred(struct io_kiocb *req, int nr);
static void io_double_put_req(struct io_kiocb *req);
-static void __io_double_put_req(struct io_kiocb *req);
static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req);
static void __io_queue_linked_timeout(struct io_kiocb *req);
static void io_queue_linked_timeout(struct io_kiocb *req);
@@ -986,7 +966,7 @@ static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec,
static struct kmem_cache *req_cachep;
-static const struct file_operations io_uring_fops __read_mostly;
+static const struct file_operations io_uring_fops;
struct sock *io_uring_get_socket(struct file *file)
{
@@ -1034,7 +1014,7 @@ static int __io_sq_thread_acquire_mm(struct io_ring_ctx *ctx)
static int io_sq_thread_acquire_mm(struct io_ring_ctx *ctx,
struct io_kiocb *req)
{
- if (!io_op_defs[req->opcode].needs_mm)
+ if (!(io_op_defs[req->opcode].work_flags & IO_WQ_WORK_MM))
return 0;
return __io_sq_thread_acquire_mm(ctx);
}
@@ -1066,16 +1046,48 @@ static inline void req_set_fail_links(struct io_kiocb *req)
}
/*
+ * None of these are dereferenced, they are simply used to check if any of
+ * them have changed. If we're under current and check they are still the
+ * same, we're fine to grab references to them for actual out-of-line use.
+ */
+static void io_init_identity(struct io_identity *id)
+{
+ id->files = current->files;
+ id->mm = current->mm;
+#ifdef CONFIG_BLK_CGROUP
+ rcu_read_lock();
+ id->blkcg_css = blkcg_css();
+ rcu_read_unlock();
+#endif
+ id->creds = current_cred();
+ id->nsproxy = current->nsproxy;
+ id->fs = current->fs;
+ id->fsize = rlimit(RLIMIT_FSIZE);
+#ifdef CONFIG_AUDIT
+ id->loginuid = current->loginuid;
+ id->sessionid = current->sessionid;
+#endif
+ refcount_set(&id->count, 1);
+}
+
+/*
* Note: must call io_req_init_async() for the first time you
* touch any members of io_wq_work.
*/
static inline void io_req_init_async(struct io_kiocb *req)
{
+ struct io_uring_task *tctx = current->io_uring;
+
if (req->flags & REQ_F_WORK_INITIALIZED)
return;
memset(&req->work, 0, sizeof(req->work));
req->flags |= REQ_F_WORK_INITIALIZED;
+
+ /* Grab a ref if this isn't our static identity */
+ req->work.identity = tctx->identity;
+ if (tctx->identity != &tctx->__identity)
+ refcount_inc(&req->work.identity->count);
}
static inline bool io_async_submit(struct io_ring_ctx *ctx)
@@ -1181,105 +1193,195 @@ static void __io_commit_cqring(struct io_ring_ctx *ctx)
}
}
-/*
- * Returns true if we need to defer file table putting. This can only happen
- * from the error path with REQ_F_COMP_LOCKED set.
- */
-static bool io_req_clean_work(struct io_kiocb *req)
+static void io_put_identity(struct io_uring_task *tctx, struct io_kiocb *req)
+{
+ if (req->work.identity == &tctx->__identity)
+ return;
+ if (refcount_dec_and_test(&req->work.identity->count))
+ kfree(req->work.identity);
+}
+
+static void io_req_clean_work(struct io_kiocb *req)
{
if (!(req->flags & REQ_F_WORK_INITIALIZED))
- return false;
+ return;
req->flags &= ~REQ_F_WORK_INITIALIZED;
- if (req->work.mm) {
- mmdrop(req->work.mm);
- req->work.mm = NULL;
+ if (req->work.flags & IO_WQ_WORK_MM) {
+ mmdrop(req->work.identity->mm);
+ req->work.flags &= ~IO_WQ_WORK_MM;
}
#ifdef CONFIG_BLK_CGROUP
- if (req->work.blkcg_css)
- css_put(req->work.blkcg_css);
+ if (req->work.flags & IO_WQ_WORK_BLKCG) {
+ css_put(req->work.identity->blkcg_css);
+ req->work.flags &= ~IO_WQ_WORK_BLKCG;
+ }
#endif
- if (req->work.creds) {
- put_cred(req->work.creds);
- req->work.creds = NULL;
+ if (req->work.flags & IO_WQ_WORK_CREDS) {
+ put_cred(req->work.identity->creds);
+ req->work.flags &= ~IO_WQ_WORK_CREDS;
}
- if (req->work.fs) {
- struct fs_struct *fs = req->work.fs;
-
- if (req->flags & REQ_F_COMP_LOCKED)
- return true;
+ if (req->work.flags & IO_WQ_WORK_FS) {
+ struct fs_struct *fs = req->work.identity->fs;
- spin_lock(&req->work.fs->lock);
+ spin_lock(&req->work.identity->fs->lock);
if (--fs->users)
fs = NULL;
- spin_unlock(&req->work.fs->lock);
+ spin_unlock(&req->work.identity->fs->lock);
if (fs)
free_fs_struct(fs);
- req->work.fs = NULL;
+ req->work.flags &= ~IO_WQ_WORK_FS;
}
- return false;
+ io_put_identity(req->task->io_uring, req);
}
-static void io_prep_async_work(struct io_kiocb *req)
+/*
+ * Create a private copy of io_identity, since some fields don't match
+ * the current context.
+ */
+static bool io_identity_cow(struct io_kiocb *req)
+{
+ struct io_uring_task *tctx = current->io_uring;
+ const struct cred *creds = NULL;
+ struct io_identity *id;
+
+ if (req->work.flags & IO_WQ_WORK_CREDS)
+ creds = req->work.identity->creds;
+
+ id = kmemdup(req->work.identity, sizeof(*id), GFP_KERNEL);
+ if (unlikely(!id)) {
+ req->work.flags |= IO_WQ_WORK_CANCEL;
+ return false;
+ }
+
+ /*
+ * We can safely just re-init the creds we copied Either the field
+ * matches the current one, or we haven't grabbed it yet. The only
+ * exception is ->creds, through registered personalities, so handle
+ * that one separately.
+ */
+ io_init_identity(id);
+ if (creds)
+ req->work.identity->creds = creds;
+
+ /* add one for this request */
+ refcount_inc(&id->count);
+
+ /* drop old identity, assign new one. one ref for req, one for tctx */
+ if (req->work.identity != tctx->identity &&
+ refcount_sub_and_test(2, &req->work.identity->count))
+ kfree(req->work.identity);
+
+ req->work.identity = id;
+ tctx->identity = id;
+ return true;
+}
+
+static bool io_grab_identity(struct io_kiocb *req)
{
const struct io_op_def *def = &io_op_defs[req->opcode];
+ struct io_identity *id = req->work.identity;
struct io_ring_ctx *ctx = req->ctx;
- io_req_init_async(req);
+ if (def->needs_fsize && id->fsize != rlimit(RLIMIT_FSIZE))
+ return false;
- if (req->flags & REQ_F_ISREG) {
- if (def->hash_reg_file || (ctx->flags & IORING_SETUP_IOPOLL))
- io_wq_hash_work(&req->work, file_inode(req->file));
- } else {
- if (def->unbound_nonreg_file)
- req->work.flags |= IO_WQ_WORK_UNBOUND;
- }
- if (!req->work.files && io_op_defs[req->opcode].file_table &&
+ if (!(req->work.flags & IO_WQ_WORK_FILES) &&
+ (def->work_flags & IO_WQ_WORK_FILES) &&
!(req->flags & REQ_F_NO_FILE_TABLE)) {
- req->work.files = get_files_struct(current);
- get_nsproxy(current->nsproxy);
- req->work.nsproxy = current->nsproxy;
+ if (id->files != current->files ||
+ id->nsproxy != current->nsproxy)
+ return false;
+ atomic_inc(&id->files->count);
+ get_nsproxy(id->nsproxy);
req->flags |= REQ_F_INFLIGHT;
spin_lock_irq(&ctx->inflight_lock);
list_add(&req->inflight_entry, &ctx->inflight_list);
spin_unlock_irq(&ctx->inflight_lock);
- }
- if (!req->work.mm && def->needs_mm) {
- mmgrab(current->mm);
- req->work.mm = current->mm;
+ req->work.flags |= IO_WQ_WORK_FILES;
}
#ifdef CONFIG_BLK_CGROUP
- if (!req->work.blkcg_css && def->needs_blkcg) {
+ if (!(req->work.flags & IO_WQ_WORK_BLKCG) &&
+ (def->work_flags & IO_WQ_WORK_BLKCG)) {
rcu_read_lock();
- req->work.blkcg_css = blkcg_css();
+ if (id->blkcg_css != blkcg_css()) {
+ rcu_read_unlock();
+ return false;
+ }
/*
* This should be rare, either the cgroup is dying or the task
* is moving cgroups. Just punt to root for the handful of ios.
*/
- if (!css_tryget_online(req->work.blkcg_css))
- req->work.blkcg_css = NULL;
+ if (css_tryget_online(id->blkcg_css))
+ req->work.flags |= IO_WQ_WORK_BLKCG;
rcu_read_unlock();
}
#endif
- if (!req->work.creds)
- req->work.creds = get_current_cred();
- if (!req->work.fs && def->needs_fs) {
- spin_lock(&current->fs->lock);
- if (!current->fs->in_exec) {
- req->work.fs = current->fs;
- req->work.fs->users++;
+ if (!(req->work.flags & IO_WQ_WORK_CREDS)) {
+ if (id->creds != current_cred())
+ return false;
+ get_cred(id->creds);
+ req->work.flags |= IO_WQ_WORK_CREDS;
+ }
+#ifdef CONFIG_AUDIT
+ if (!uid_eq(current->loginuid, id->loginuid) ||
+ current->sessionid != id->sessionid)
+ return false;
+#endif
+ if (!(req->work.flags & IO_WQ_WORK_FS) &&
+ (def->work_flags & IO_WQ_WORK_FS)) {
+ if (current->fs != id->fs)
+ return false;
+ spin_lock(&id->fs->lock);
+ if (!id->fs->in_exec) {
+ id->fs->users++;
+ req->work.flags |= IO_WQ_WORK_FS;
} else {
req->work.flags |= IO_WQ_WORK_CANCEL;
}
spin_unlock(&current->fs->lock);
}
- if (def->needs_fsize)
- req->work.fsize = rlimit(RLIMIT_FSIZE);
- else
- req->work.fsize = RLIM_INFINITY;
+
+ return true;
+}
+
+static void io_prep_async_work(struct io_kiocb *req)
+{
+ const struct io_op_def *def = &io_op_defs[req->opcode];
+ struct io_ring_ctx *ctx = req->ctx;
+ struct io_identity *id;
+
+ io_req_init_async(req);
+ id = req->work.identity;
+
+ if (req->flags & REQ_F_ISREG) {
+ if (def->hash_reg_file || (ctx->flags & IORING_SETUP_IOPOLL))
+ io_wq_hash_work(&req->work, file_inode(req->file));
+ } else {
+ if (def->unbound_nonreg_file)
+ req->work.flags |= IO_WQ_WORK_UNBOUND;
+ }
+
+ /* ->mm can never change on us */
+ if (!(req->work.flags & IO_WQ_WORK_MM) &&
+ (def->work_flags & IO_WQ_WORK_MM)) {
+ mmgrab(id->mm);
+ req->work.flags |= IO_WQ_WORK_MM;
+ }
+
+ /* if we fail grabbing identity, we must COW, regrab, and retry */
+ if (io_grab_identity(req))
+ return;
+
+ if (!io_identity_cow(req))
+ return;
+
+ /* can't fail at this point */
+ if (!io_grab_identity(req))
+ WARN_ON(1);
}
static void io_prep_async_link(struct io_kiocb *req)
@@ -1325,9 +1427,8 @@ static void io_kill_timeout(struct io_kiocb *req)
atomic_set(&req->ctx->cq_timeouts,
atomic_read(&req->ctx->cq_timeouts) + 1);
list_del_init(&req->timeout.list);
- req->flags |= REQ_F_COMP_LOCKED;
io_cqring_fill_event(req, 0);
- io_put_req(req);
+ io_put_req_deferred(req, 1);
}
}
@@ -1378,8 +1479,7 @@ static void __io_queue_deferred(struct io_ring_ctx *ctx)
if (link) {
__io_queue_linked_timeout(link);
/* drop submission reference */
- link->flags |= REQ_F_COMP_LOCKED;
- io_put_req(link);
+ io_put_req_deferred(link, 1);
}
kfree(de);
} while (!list_empty(&ctx->defer_list));
@@ -1471,8 +1571,9 @@ static inline bool io_match_files(struct io_kiocb *req,
{
if (!files)
return true;
- if (req->flags & REQ_F_WORK_INITIALIZED)
- return req->work.files == files;
+ if ((req->flags & REQ_F_WORK_INITIALIZED) &&
+ (req->work.flags & IO_WQ_WORK_FILES))
+ return req->work.identity->files == files;
return false;
}
@@ -1606,13 +1707,19 @@ static void io_submit_flush_completions(struct io_comp_state *cs)
req = list_first_entry(&cs->list, struct io_kiocb, compl.list);
list_del(&req->compl.list);
__io_cqring_fill_event(req, req->result, req->compl.cflags);
- if (!(req->flags & REQ_F_LINK_HEAD)) {
- req->flags |= REQ_F_COMP_LOCKED;
- io_put_req(req);
- } else {
+
+ /*
+ * io_free_req() doesn't care about completion_lock unless one
+ * of these flags is set. REQ_F_WORK_INITIALIZED is in the list
+ * because of a potential deadlock with req->work.fs->lock
+ */
+ if (req->flags & (REQ_F_FAIL_LINK|REQ_F_LINK_TIMEOUT
+ |REQ_F_WORK_INITIALIZED)) {
spin_unlock_irq(&ctx->completion_lock);
io_put_req(req);
spin_lock_irq(&ctx->completion_lock);
+ } else {
+ io_put_req(req);
}
}
io_commit_cqring(ctx);
@@ -1699,7 +1806,7 @@ static inline void io_put_file(struct io_kiocb *req, struct file *file,
fput(file);
}
-static bool io_dismantle_req(struct io_kiocb *req)
+static void io_dismantle_req(struct io_kiocb *req)
{
io_clean_op(req);
@@ -1708,15 +1815,17 @@ static bool io_dismantle_req(struct io_kiocb *req)
if (req->file)
io_put_file(req, req->file, (req->flags & REQ_F_FIXED_FILE));
- return io_req_clean_work(req);
+ io_req_clean_work(req);
}
-static void __io_free_req_finish(struct io_kiocb *req)
+static void __io_free_req(struct io_kiocb *req)
{
struct io_uring_task *tctx = req->task->io_uring;
struct io_ring_ctx *ctx = req->ctx;
- atomic_long_inc(&tctx->req_complete);
+ io_dismantle_req(req);
+
+ percpu_counter_dec(&tctx->inflight);
if (tctx->in_idle)
wake_up(&tctx->wait);
put_task_struct(req->task);
@@ -1728,39 +1837,6 @@ static void __io_free_req_finish(struct io_kiocb *req)
percpu_ref_put(&ctx->refs);
}
-static void io_req_task_file_table_put(struct callback_head *cb)
-{
- struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
- struct fs_struct *fs = req->work.fs;
-
- spin_lock(&req->work.fs->lock);
- if (--fs->users)
- fs = NULL;
- spin_unlock(&req->work.fs->lock);
- if (fs)
- free_fs_struct(fs);
- req->work.fs = NULL;
- __io_free_req_finish(req);
-}
-
-static void __io_free_req(struct io_kiocb *req)
-{
- if (!io_dismantle_req(req)) {
- __io_free_req_finish(req);
- } else {
- int ret;
-
- init_task_work(&req->task_work, io_req_task_file_table_put);
- ret = task_work_add(req->task, &req->task_work, TWA_RESUME);
- if (unlikely(ret)) {
- struct task_struct *tsk;
-
- tsk = io_wq_get_task(req->ctx->io_wq);
- task_work_add(tsk, &req->task_work, 0);
- }
- }
-}
-
static bool io_link_cancel_timeout(struct io_kiocb *req)
{
struct io_timeout_data *io = req->async_data;
@@ -1772,7 +1848,7 @@ static bool io_link_cancel_timeout(struct io_kiocb *req)
io_cqring_fill_event(req, -ECANCELED);
io_commit_cqring(ctx);
req->flags &= ~REQ_F_LINK_HEAD;
- io_put_req(req);
+ io_put_req_deferred(req, 1);
return true;
}
@@ -1791,7 +1867,6 @@ static bool __io_kill_linked_timeout(struct io_kiocb *req)
return false;
list_del_init(&link->link_list);
- link->flags |= REQ_F_COMP_LOCKED;
wake_ev = io_link_cancel_timeout(link);
req->flags &= ~REQ_F_LINK_TIMEOUT;
return wake_ev;
@@ -1800,17 +1875,12 @@ static bool __io_kill_linked_timeout(struct io_kiocb *req)
static void io_kill_linked_timeout(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
+ unsigned long flags;
bool wake_ev;
- if (!(req->flags & REQ_F_COMP_LOCKED)) {
- unsigned long flags;
-
- spin_lock_irqsave(&ctx->completion_lock, flags);
- wake_ev = __io_kill_linked_timeout(req);
- spin_unlock_irqrestore(&ctx->completion_lock, flags);
- } else {
- wake_ev = __io_kill_linked_timeout(req);
- }
+ spin_lock_irqsave(&ctx->completion_lock, flags);
+ wake_ev = __io_kill_linked_timeout(req);
+ spin_unlock_irqrestore(&ctx->completion_lock, flags);
if (wake_ev)
io_cqring_ev_posted(ctx);
@@ -1850,28 +1920,29 @@ static void __io_fail_links(struct io_kiocb *req)
trace_io_uring_fail_link(req, link);
io_cqring_fill_event(link, -ECANCELED);
- link->flags |= REQ_F_COMP_LOCKED;
- __io_double_put_req(link);
- req->flags &= ~REQ_F_LINK_TIMEOUT;
+
+ /*
+ * It's ok to free under spinlock as they're not linked anymore,
+ * but avoid REQ_F_WORK_INITIALIZED because it may deadlock on
+ * work.fs->lock.
+ */
+ if (link->flags & REQ_F_WORK_INITIALIZED)
+ io_put_req_deferred(link, 2);
+ else
+ io_double_put_req(link);
}
io_commit_cqring(ctx);
- io_cqring_ev_posted(ctx);
}
static void io_fail_links(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
+ unsigned long flags;
- if (!(req->flags & REQ_F_COMP_LOCKED)) {
- unsigned long flags;
-
- spin_lock_irqsave(&ctx->completion_lock, flags);
- __io_fail_links(req);
- spin_unlock_irqrestore(&ctx->completion_lock, flags);
- } else {
- __io_fail_links(req);
- }
+ spin_lock_irqsave(&ctx->completion_lock, flags);
+ __io_fail_links(req);
+ spin_unlock_irqrestore(&ctx->completion_lock, flags);
io_cqring_ev_posted(ctx);
}
@@ -2033,7 +2104,9 @@ static void io_req_free_batch_finish(struct io_ring_ctx *ctx,
if (rb->to_free)
__io_req_free_batch_flush(ctx, rb);
if (rb->task) {
- atomic_long_add(rb->task_refs, &rb->task->io_uring->req_complete);
+ struct io_uring_task *tctx = rb->task->io_uring;
+
+ percpu_counter_sub(&tctx->inflight, rb->task_refs);
put_task_struct_many(rb->task, rb->task_refs);
rb->task = NULL;
}
@@ -2050,7 +2123,9 @@ static void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req)
if (req->task != rb->task) {
if (rb->task) {
- atomic_long_add(rb->task_refs, &rb->task->io_uring->req_complete);
+ struct io_uring_task *tctx = rb->task->io_uring;
+
+ percpu_counter_sub(&tctx->inflight, rb->task_refs);
put_task_struct_many(rb->task, rb->task_refs);
}
rb->task = req->task;
@@ -2058,7 +2133,7 @@ static void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req)
}
rb->task_refs++;
- WARN_ON_ONCE(io_dismantle_req(req));
+ io_dismantle_req(req);
rb->reqs[rb->to_free++] = req;
if (unlikely(rb->to_free == ARRAY_SIZE(rb->reqs)))
__io_req_free_batch_flush(req->ctx, rb);
@@ -2085,6 +2160,34 @@ static void io_put_req(struct io_kiocb *req)
io_free_req(req);
}
+static void io_put_req_deferred_cb(struct callback_head *cb)
+{
+ struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
+
+ io_free_req(req);
+}
+
+static void io_free_req_deferred(struct io_kiocb *req)
+{
+ int ret;
+
+ init_task_work(&req->task_work, io_put_req_deferred_cb);
+ ret = io_req_task_work_add(req, true);
+ if (unlikely(ret)) {
+ struct task_struct *tsk;
+
+ tsk = io_wq_get_task(req->ctx->io_wq);
+ task_work_add(tsk, &req->task_work, 0);
+ wake_up_process(tsk);
+ }
+}
+
+static inline void io_put_req_deferred(struct io_kiocb *req, int refs)
+{
+ if (refcount_sub_and_test(refs, &req->refs))
+ io_free_req_deferred(req);
+}
+
static struct io_wq_work *io_steal_work(struct io_kiocb *req)
{
struct io_kiocb *nxt;
@@ -2101,17 +2204,6 @@ static struct io_wq_work *io_steal_work(struct io_kiocb *req)
return nxt ? &nxt->work : NULL;
}
-/*
- * Must only be used if we don't need to care about links, usually from
- * within the completion handling itself.
- */
-static void __io_double_put_req(struct io_kiocb *req)
-{
- /* drop both submit and complete references */
- if (refcount_sub_and_test(2, &req->refs))
- __io_free_req(req);
-}
-
static void io_double_put_req(struct io_kiocb *req)
{
/* drop both submit and complete references */
@@ -2601,7 +2693,7 @@ static struct file *__io_file_get(struct io_submit_state *state, int fd)
static bool io_bdev_nowait(struct block_device *bdev)
{
#ifdef CONFIG_BLOCK
- return !bdev || queue_is_mq(bdev_get_queue(bdev));
+ return !bdev || blk_queue_nowait(bdev_get_queue(bdev));
#else
return true;
#endif
@@ -3989,7 +4081,7 @@ static int io_madvise(struct io_kiocb *req, bool force_nonblock)
if (force_nonblock)
return -EAGAIN;
- ret = do_madvise(ma->addr, ma->len, ma->advice);
+ ret = do_madvise(current->mm, ma->addr, ma->len, ma->advice);
if (ret < 0)
req_set_fail_links(req);
io_req_complete(req, ret);
@@ -4123,7 +4215,7 @@ static int io_close(struct io_kiocb *req, bool force_nonblock,
}
/* No ->flush() or already async, safely close from here */
- ret = filp_close(close->put_file, req->work.files);
+ ret = filp_close(close->put_file, req->work.identity->files);
if (ret < 0)
req_set_fail_links(req);
fput(close->put_file);
@@ -4845,10 +4937,9 @@ static void io_poll_task_handler(struct io_kiocb *req, struct io_kiocb **nxt)
hash_del(&req->hash_node);
io_poll_complete(req, req->result, 0);
- req->flags |= REQ_F_COMP_LOCKED;
- *nxt = io_put_req_find_next(req);
spin_unlock_irq(&ctx->completion_lock);
+ *nxt = io_put_req_find_next(req);
io_cqring_ev_posted(ctx);
}
@@ -4917,6 +5008,8 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
* for write). Setup a separate io_poll_iocb if this happens.
*/
if (unlikely(poll->head)) {
+ struct io_poll_iocb *poll_one = poll;
+
/* already have a 2nd entry, fail a third attempt */
if (*poll_ptr) {
pt->error = -EINVAL;
@@ -4927,7 +5020,7 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
pt->error = -ENOMEM;
return;
}
- io_init_poll_iocb(poll, req->poll.events, io_poll_double_wake);
+ io_init_poll_iocb(poll, poll_one->events, io_poll_double_wake);
refcount_inc(&req->refs);
poll->wait.private = req;
*poll_ptr = poll;
@@ -5144,9 +5237,8 @@ static bool io_poll_remove_one(struct io_kiocb *req)
if (do_complete) {
io_cqring_fill_event(req, -ECANCELED);
io_commit_cqring(req->ctx);
- req->flags |= REQ_F_COMP_LOCKED;
req_set_fail_links(req);
- io_put_req(req);
+ io_put_req_deferred(req, 1);
}
return do_complete;
@@ -5328,9 +5420,8 @@ static int __io_timeout_cancel(struct io_kiocb *req)
list_del_init(&req->timeout.list);
req_set_fail_links(req);
- req->flags |= REQ_F_COMP_LOCKED;
io_cqring_fill_event(req, -ECANCELED);
- io_put_req(req);
+ io_put_req_deferred(req, 1);
return 0;
}
@@ -5740,9 +5831,9 @@ static void io_req_drop_files(struct io_kiocb *req)
wake_up(&ctx->inflight_wait);
spin_unlock_irqrestore(&ctx->inflight_lock, flags);
req->flags &= ~REQ_F_INFLIGHT;
- put_files_struct(req->work.files);
- put_nsproxy(req->work.nsproxy);
- req->work.files = NULL;
+ put_files_struct(req->work.identity->files);
+ put_nsproxy(req->work.identity->nsproxy);
+ req->work.flags &= ~IO_WQ_WORK_FILES;
}
static void __io_clean_op(struct io_kiocb *req)
@@ -6100,14 +6191,15 @@ static void __io_queue_sqe(struct io_kiocb *req, struct io_comp_state *cs)
again:
linked_timeout = io_prep_linked_timeout(req);
- if ((req->flags & REQ_F_WORK_INITIALIZED) && req->work.creds &&
- req->work.creds != current_cred()) {
+ if ((req->flags & REQ_F_WORK_INITIALIZED) && req->work.identity->creds &&
+ req->work.identity->creds != current_cred()) {
if (old_creds)
revert_creds(old_creds);
- if (old_creds == req->work.creds)
+ if (old_creds == req->work.identity->creds)
old_creds = NULL; /* restored original creds */
else
- old_creds = override_creds(req->work.creds);
+ old_creds = override_creds(req->work.identity->creds);
+ req->work.flags |= IO_WQ_WORK_CREDS;
}
ret = io_issue_sqe(req, true, cs);
@@ -6410,11 +6502,17 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
id = READ_ONCE(sqe->personality);
if (id) {
+ struct io_identity *iod;
+
io_req_init_async(req);
- req->work.creds = idr_find(&ctx->personality_idr, id);
- if (unlikely(!req->work.creds))
+ iod = idr_find(&ctx->personality_idr, id);
+ if (unlikely(!iod))
return -EINVAL;
- get_cred(req->work.creds);
+ refcount_inc(&iod->count);
+ io_put_identity(current->io_uring, req);
+ get_cred(iod->creds);
+ req->work.identity = iod;
+ req->work.flags |= IO_WQ_WORK_CREDS;
}
/* same numerical values with corresponding REQ_F_*, safe to copy */
@@ -6447,7 +6545,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
if (!percpu_ref_tryget_many(&ctx->refs, nr))
return -EAGAIN;
- atomic_long_add(nr, &current->io_uring->req_issue);
+ percpu_counter_add(&current->io_uring->inflight, nr);
refcount_add(nr, &current->usage);
io_submit_state_start(&state, ctx, nr);
@@ -6489,10 +6587,12 @@ fail_req:
if (unlikely(submitted != nr)) {
int ref_used = (submitted == -EAGAIN) ? 0 : submitted;
+ struct io_uring_task *tctx = current->io_uring;
+ int unused = nr - ref_used;
- percpu_ref_put_many(&ctx->refs, nr - ref_used);
- atomic_long_sub(nr - ref_used, &current->io_uring->req_issue);
- put_task_struct_many(current, nr - ref_used);
+ percpu_ref_put_many(&ctx->refs, unused);
+ percpu_counter_sub(&tctx->inflight, unused);
+ put_task_struct_many(current, unused);
}
if (link)
io_queue_link_head(link, &state.comp);
@@ -6672,6 +6772,10 @@ static int io_sq_thread(void *data)
old_cred = override_creds(ctx->creds);
}
io_sq_thread_associate_blkcg(ctx, &cur_css);
+#ifdef CONFIG_AUDIT
+ current->loginuid = ctx->loginuid;
+ current->sessionid = ctx->sessionid;
+#endif
ret |= __io_sq_thread(ctx, start_jiffies, cap_entries);
@@ -7306,7 +7410,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
spin_lock_init(&file_data->lock);
nr_tables = DIV_ROUND_UP(nr_args, IORING_MAX_FILES_TABLE);
- file_data->table = kcalloc(nr_tables, sizeof(file_data->table),
+ file_data->table = kcalloc(nr_tables, sizeof(*file_data->table),
GFP_KERNEL);
if (!file_data->table)
goto out_free;
@@ -7317,6 +7421,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
if (io_sqe_alloc_file_tables(file_data, nr_tables, nr_args))
goto out_ref;
+ ctx->file_data = file_data;
for (i = 0; i < nr_args; i++, ctx->nr_user_files++) {
struct fixed_file_table *table;
@@ -7351,7 +7456,6 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
table->files[index] = file;
}
- ctx->file_data = file_data;
ret = io_sqe_files_scm(ctx);
if (ret) {
io_sqe_files_unregister(ctx);
@@ -7384,6 +7488,7 @@ out_ref:
out_free:
kfree(file_data->table);
kfree(file_data);
+ ctx->file_data = NULL;
return ret;
}
@@ -7609,17 +7714,24 @@ out_fput:
static int io_uring_alloc_task_context(struct task_struct *task)
{
struct io_uring_task *tctx;
+ int ret;
tctx = kmalloc(sizeof(*tctx), GFP_KERNEL);
if (unlikely(!tctx))
return -ENOMEM;
+ ret = percpu_counter_init(&tctx->inflight, 0, GFP_KERNEL);
+ if (unlikely(ret)) {
+ kfree(tctx);
+ return ret;
+ }
+
xa_init(&tctx->xa);
init_waitqueue_head(&tctx->wait);
tctx->last = NULL;
tctx->in_idle = 0;
- atomic_long_set(&tctx->req_issue, 0);
- atomic_long_set(&tctx->req_complete, 0);
+ io_init_identity(&tctx->__identity);
+ tctx->identity = &tctx->__identity;
task->io_uring = tctx;
return 0;
}
@@ -7629,6 +7741,10 @@ void __io_uring_free(struct task_struct *tsk)
struct io_uring_task *tctx = tsk->io_uring;
WARN_ON_ONCE(!xa_empty(&tctx->xa));
+ WARN_ON_ONCE(refcount_read(&tctx->identity->count) != 1);
+ if (tctx->identity != &tctx->__identity)
+ kfree(tctx->identity);
+ percpu_counter_destroy(&tctx->inflight);
kfree(tctx);
tsk->io_uring = NULL;
}
@@ -8205,11 +8321,14 @@ static int io_uring_fasync(int fd, struct file *file, int on)
static int io_remove_personalities(int id, void *p, void *data)
{
struct io_ring_ctx *ctx = data;
- const struct cred *cred;
+ struct io_identity *iod;
- cred = idr_remove(&ctx->personality_idr, id);
- if (cred)
- put_cred(cred);
+ iod = idr_remove(&ctx->personality_idr, id);
+ if (iod) {
+ put_cred(iod->creds);
+ if (refcount_dec_and_test(&iod->count))
+ kfree(iod);
+ }
return 0;
}
@@ -8281,7 +8400,8 @@ static bool io_wq_files_match(struct io_wq_work *work, void *data)
{
struct files_struct *files = data;
- return !files || work->files == files;
+ return !files || ((work->flags & IO_WQ_WORK_FILES) &&
+ work->identity->files == files);
}
/*
@@ -8436,7 +8556,8 @@ static bool io_uring_cancel_files(struct io_ring_ctx *ctx,
spin_lock_irq(&ctx->inflight_lock);
list_for_each_entry(req, &ctx->inflight_list, inflight_entry) {
- if (files && req->work.files != files)
+ if (files && (req->work.flags & IO_WQ_WORK_FILES) &&
+ req->work.identity->files != files)
continue;
/* req is being completed, ignore */
if (!refcount_inc_not_zero(&req->refs))
@@ -8608,12 +8729,6 @@ void __io_uring_files_cancel(struct files_struct *files)
}
}
-static inline bool io_uring_task_idle(struct io_uring_task *tctx)
-{
- return atomic_long_read(&tctx->req_issue) ==
- atomic_long_read(&tctx->req_complete);
-}
-
/*
* Find any io_uring fd that this task has registered or done IO on, and cancel
* requests.
@@ -8622,14 +8737,16 @@ void __io_uring_task_cancel(void)
{
struct io_uring_task *tctx = current->io_uring;
DEFINE_WAIT(wait);
- long completions;
+ s64 inflight;
/* make sure overflow events are dropped */
tctx->in_idle = true;
- while (!io_uring_task_idle(tctx)) {
+ do {
/* read completions before cancelations */
- completions = atomic_long_read(&tctx->req_complete);
+ inflight = percpu_counter_sum(&tctx->inflight);
+ if (!inflight)
+ break;
__io_uring_files_cancel(NULL);
prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE);
@@ -8638,12 +8755,10 @@ void __io_uring_task_cancel(void)
* If we've seen completions, retry. This avoids a race where
* a completion comes in before we did prepare_to_wait().
*/
- if (completions != atomic_long_read(&tctx->req_complete))
+ if (inflight != percpu_counter_sum(&tctx->inflight))
continue;
- if (io_uring_task_idle(tctx))
- break;
schedule();
- }
+ } while (1);
finish_wait(&tctx->wait, &wait);
tctx->in_idle = false;
@@ -9109,7 +9224,10 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
ctx->compat = in_compat_syscall();
ctx->user = user;
ctx->creds = get_current_cred();
-
+#ifdef CONFIG_AUDIT
+ ctx->loginuid = current->loginuid;
+ ctx->sessionid = current->sessionid;
+#endif
ctx->sqo_task = get_task_struct(current);
/*
@@ -9277,23 +9395,33 @@ out:
static int io_register_personality(struct io_ring_ctx *ctx)
{
- const struct cred *creds = get_current_cred();
- int id;
+ struct io_identity *id;
+ int ret;
- id = idr_alloc_cyclic(&ctx->personality_idr, (void *) creds, 1,
- USHRT_MAX, GFP_KERNEL);
- if (id < 0)
- put_cred(creds);
- return id;
+ id = kmalloc(sizeof(*id), GFP_KERNEL);
+ if (unlikely(!id))
+ return -ENOMEM;
+
+ io_init_identity(id);
+ id->creds = get_current_cred();
+
+ ret = idr_alloc_cyclic(&ctx->personality_idr, id, 1, USHRT_MAX, GFP_KERNEL);
+ if (ret < 0) {
+ put_cred(id->creds);
+ kfree(id);
+ }
+ return ret;
}
static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id)
{
- const struct cred *old_creds;
+ struct io_identity *iod;
- old_creds = idr_remove(&ctx->personality_idr, id);
- if (old_creds) {
- put_cred(old_creds);
+ iod = idr_remove(&ctx->personality_idr, id);
+ if (iod) {
+ put_cred(iod->creds);
+ if (refcount_dec_and_test(&iod->count))
+ kfree(iod);
return 0;
}
diff --git a/fs/libfs.c b/fs/libfs.c
index e0d42e977d9a..fc34361c1489 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -20,6 +20,8 @@
#include <linux/fs_context.h>
#include <linux/pseudo_fs.h>
#include <linux/fsnotify.h>
+#include <linux/unicode.h>
+#include <linux/fscrypt.h>
#include <linux/uaccess.h>
@@ -1363,3 +1365,88 @@ bool is_empty_dir_inode(struct inode *inode)
return (inode->i_fop == &empty_dir_operations) &&
(inode->i_op == &empty_dir_inode_operations);
}
+
+#ifdef CONFIG_UNICODE
+/*
+ * Determine if the name of a dentry should be casefolded.
+ *
+ * Return: if names will need casefolding
+ */
+static bool needs_casefold(const struct inode *dir)
+{
+ return IS_CASEFOLDED(dir) && dir->i_sb->s_encoding;
+}
+
+/**
+ * generic_ci_d_compare - generic d_compare implementation for casefolding filesystems
+ * @dentry: dentry whose name we are checking against
+ * @len: len of name of dentry
+ * @str: str pointer to name of dentry
+ * @name: Name to compare against
+ *
+ * Return: 0 if names match, 1 if mismatch, or -ERRNO
+ */
+int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
+ const char *str, const struct qstr *name)
+{
+ const struct dentry *parent = READ_ONCE(dentry->d_parent);
+ const struct inode *dir = READ_ONCE(parent->d_inode);
+ const struct super_block *sb = dentry->d_sb;
+ const struct unicode_map *um = sb->s_encoding;
+ struct qstr qstr = QSTR_INIT(str, len);
+ char strbuf[DNAME_INLINE_LEN];
+ int ret;
+
+ if (!dir || !needs_casefold(dir))
+ goto fallback;
+ /*
+ * If the dentry name is stored in-line, then it may be concurrently
+ * modified by a rename. If this happens, the VFS will eventually retry
+ * the lookup, so it doesn't matter what ->d_compare() returns.
+ * However, it's unsafe to call utf8_strncasecmp() with an unstable
+ * string. Therefore, we have to copy the name into a temporary buffer.
+ */
+ if (len <= DNAME_INLINE_LEN - 1) {
+ memcpy(strbuf, str, len);
+ strbuf[len] = 0;
+ qstr.name = strbuf;
+ /* prevent compiler from optimizing out the temporary buffer */
+ barrier();
+ }
+ ret = utf8_strncasecmp(um, name, &qstr);
+ if (ret >= 0)
+ return ret;
+
+ if (sb_has_strict_encoding(sb))
+ return -EINVAL;
+fallback:
+ if (len != name->len)
+ return 1;
+ return !!memcmp(str, name->name, len);
+}
+EXPORT_SYMBOL(generic_ci_d_compare);
+
+/**
+ * generic_ci_d_hash - generic d_hash implementation for casefolding filesystems
+ * @dentry: dentry of the parent directory
+ * @str: qstr of name whose hash we should fill in
+ *
+ * Return: 0 if hash was successful or unchanged, and -EINVAL on error
+ */
+int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str)
+{
+ const struct inode *dir = READ_ONCE(dentry->d_inode);
+ struct super_block *sb = dentry->d_sb;
+ const struct unicode_map *um = sb->s_encoding;
+ int ret = 0;
+
+ if (!dir || !needs_casefold(dir))
+ return 0;
+
+ ret = utf8_casefold_hash(um, dentry, str);
+ if (ret < 0 && sb_has_strict_encoding(sb))
+ return -EINVAL;
+ return 0;
+}
+EXPORT_SYMBOL(generic_ci_d_hash);
+#endif
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 1eabd91870e6..1d9488cf0534 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -417,7 +417,7 @@ void nsm_release(struct nsm_handle *nsm)
/*
* XDR functions for NSM.
*
- * See http://www.opengroup.org/ for details on the Network
+ * See https://www.opengroup.org/ for details on the Network
* Status Monitor wire protocol.
*/
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index 222afba70bc0..29ec8b09a52d 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -94,6 +94,7 @@ enum {
static const struct constant_table nfs_param_enums_local_lock[] = {
{ "all", Opt_local_lock_all },
{ "flock", Opt_local_lock_flock },
+ { "posix", Opt_local_lock_posix },
{ "none", Opt_local_lock_none },
{}
};
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 6b063227e34e..2bcbe38afe2e 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -32,9 +32,9 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ;
/*
* nfs_path - reconstruct the path given an arbitrary dentry
* @base - used to return pointer to the end of devname part of path
- * @dentry - pointer to dentry
+ * @dentry_in - pointer to dentry
* @buffer - result buffer
- * @buflen - length of buffer
+ * @buflen_in - length of buffer
* @flags - options (see below)
*
* Helper function for constructing the server pathname
@@ -49,15 +49,19 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ;
* the original device (export) name
* (if unset, the original name is returned verbatim)
*/
-char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen,
- unsigned flags)
+char *nfs_path(char **p, struct dentry *dentry_in, char *buffer,
+ ssize_t buflen_in, unsigned flags)
{
char *end;
int namelen;
unsigned seq;
const char *base;
+ struct dentry *dentry;
+ ssize_t buflen;
rename_retry:
+ buflen = buflen_in;
+ dentry = dentry_in;
end = buffer+buflen;
*--end = '\0';
buflen--;
diff --git a/fs/nfs/nfs42xattr.c b/fs/nfs/nfs42xattr.c
index 86777996cfec..b51424ff8159 100644
--- a/fs/nfs/nfs42xattr.c
+++ b/fs/nfs/nfs42xattr.c
@@ -67,7 +67,6 @@ struct nfs4_xattr_bucket {
struct nfs4_xattr_cache {
struct kref ref;
- spinlock_t hash_lock; /* protects hashtable and lru */
struct nfs4_xattr_bucket buckets[NFS4_XATTR_HASH_SIZE];
struct list_head lru;
struct list_head dispose;
@@ -882,7 +881,7 @@ nfs4_xattr_cache_count(struct shrinker *shrink, struct shrink_control *sc)
{
unsigned long count;
- count = list_lru_count(&nfs4_xattr_cache_lru);
+ count = list_lru_shrink_count(&nfs4_xattr_cache_lru, sc);
return vfs_pressure_ratio(count);
}
@@ -976,7 +975,7 @@ nfs4_xattr_entry_count(struct shrinker *shrink, struct shrink_control *sc)
lru = (shrink == &nfs4_xattr_large_entry_shrinker) ?
&nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru;
- count = list_lru_count(lru);
+ count = list_lru_shrink_count(lru, sc);
return vfs_pressure_ratio(count);
}
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index cc50085e151c..0dc31ad2362e 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -45,6 +45,15 @@
#define encode_deallocate_maxsz (op_encode_hdr_maxsz + \
encode_fallocate_maxsz)
#define decode_deallocate_maxsz (op_decode_hdr_maxsz)
+#define encode_read_plus_maxsz (op_encode_hdr_maxsz + \
+ encode_stateid_maxsz + 3)
+#define NFS42_READ_PLUS_SEGMENT_SIZE (1 /* data_content4 */ + \
+ 2 /* data_info4.di_offset */ + \
+ 2 /* data_info4.di_length */)
+#define decode_read_plus_maxsz (op_decode_hdr_maxsz + \
+ 1 /* rpr_eof */ + \
+ 1 /* rpr_contents count */ + \
+ 2 * NFS42_READ_PLUS_SEGMENT_SIZE)
#define encode_seek_maxsz (op_encode_hdr_maxsz + \
encode_stateid_maxsz + \
2 /* offset */ + \
@@ -128,6 +137,14 @@
decode_putfh_maxsz + \
decode_deallocate_maxsz + \
decode_getattr_maxsz)
+#define NFS4_enc_read_plus_sz (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
+ encode_putfh_maxsz + \
+ encode_read_plus_maxsz)
+#define NFS4_dec_read_plus_sz (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
+ decode_putfh_maxsz + \
+ decode_read_plus_maxsz)
#define NFS4_enc_seek_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
@@ -324,6 +341,16 @@ static void encode_deallocate(struct xdr_stream *xdr,
encode_fallocate(xdr, args);
}
+static void encode_read_plus(struct xdr_stream *xdr,
+ const struct nfs_pgio_args *args,
+ struct compound_hdr *hdr)
+{
+ encode_op_hdr(xdr, OP_READ_PLUS, decode_read_plus_maxsz, hdr);
+ encode_nfs4_stateid(xdr, &args->stateid);
+ encode_uint64(xdr, args->offset);
+ encode_uint32(xdr, args->count);
+}
+
static void encode_seek(struct xdr_stream *xdr,
const struct nfs42_seek_args *args,
struct compound_hdr *hdr)
@@ -723,6 +750,28 @@ static void nfs4_xdr_enc_deallocate(struct rpc_rqst *req,
}
/*
+ * Encode READ_PLUS request
+ */
+static void nfs4_xdr_enc_read_plus(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const void *data)
+{
+ const struct nfs_pgio_args *args = data;
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_read_plus(xdr, args, &hdr);
+
+ rpc_prepare_reply_pages(req, args->pages, args->pgbase,
+ args->count, hdr.replen);
+ encode_nops(&hdr);
+}
+
+/*
* Encode SEEK request
*/
static void nfs4_xdr_enc_seek(struct rpc_rqst *req,
@@ -970,6 +1019,97 @@ static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *re
return decode_op_hdr(xdr, OP_DEALLOCATE);
}
+static int decode_read_plus_data(struct xdr_stream *xdr, struct nfs_pgio_res *res,
+ uint32_t *eof)
+{
+ uint32_t count, recvd;
+ uint64_t offset;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 8 + 4);
+ if (unlikely(!p))
+ return -EIO;
+
+ p = xdr_decode_hyper(p, &offset);
+ count = be32_to_cpup(p);
+ recvd = xdr_align_data(xdr, res->count, count);
+ res->count += recvd;
+
+ if (count > recvd) {
+ dprintk("NFS: server cheating in read reply: "
+ "count %u > recvd %u\n", count, recvd);
+ *eof = 0;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int decode_read_plus_hole(struct xdr_stream *xdr, struct nfs_pgio_res *res,
+ uint32_t *eof)
+{
+ uint64_t offset, length, recvd;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 8 + 8);
+ if (unlikely(!p))
+ return -EIO;
+
+ p = xdr_decode_hyper(p, &offset);
+ p = xdr_decode_hyper(p, &length);
+ recvd = xdr_expand_hole(xdr, res->count, length);
+ res->count += recvd;
+
+ if (recvd < length) {
+ *eof = 0;
+ return 1;
+ }
+ return 0;
+}
+
+static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res)
+{
+ uint32_t eof, segments, type;
+ int status, i;
+ __be32 *p;
+
+ status = decode_op_hdr(xdr, OP_READ_PLUS);
+ if (status)
+ return status;
+
+ p = xdr_inline_decode(xdr, 4 + 4);
+ if (unlikely(!p))
+ return -EIO;
+
+ eof = be32_to_cpup(p++);
+ segments = be32_to_cpup(p++);
+ if (segments == 0)
+ goto out;
+
+ for (i = 0; i < segments; i++) {
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ return -EIO;
+
+ type = be32_to_cpup(p++);
+ if (type == NFS4_CONTENT_DATA)
+ status = decode_read_plus_data(xdr, res, &eof);
+ else if (type == NFS4_CONTENT_HOLE)
+ status = decode_read_plus_hole(xdr, res, &eof);
+ else
+ return -EINVAL;
+
+ if (status < 0)
+ return status;
+ if (status > 0)
+ break;
+ }
+
+out:
+ res->eof = eof;
+ return 0;
+}
+
static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res)
{
int status;
@@ -1147,6 +1287,33 @@ out:
}
/*
+ * Decode READ_PLUS request
+ */
+static int nfs4_xdr_dec_read_plus(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ void *data)
+{
+ struct nfs_pgio_res *res = data;
+ struct compound_hdr hdr;
+ int status;
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
+ if (status)
+ goto out;
+ status = decode_putfh(xdr);
+ if (status)
+ goto out;
+ status = decode_read_plus(xdr, res);
+ if (!status)
+ status = res->count;
+out:
+ return status;
+}
+
+/*
* Decode SEEK request
*/
static int nfs4_xdr_dec_seek(struct rpc_rqst *rqstp,
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 0c9505dc852c..065cb04222a1 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -599,6 +599,14 @@ static inline bool nfs4_stateid_is_newer(const nfs4_stateid *s1, const nfs4_stat
return (s32)(be32_to_cpu(s1->seqid) - be32_to_cpu(s2->seqid)) > 0;
}
+static inline bool nfs4_stateid_is_next(const nfs4_stateid *s1, const nfs4_stateid *s2)
+{
+ u32 seq1 = be32_to_cpu(s1->seqid);
+ u32 seq2 = be32_to_cpu(s2->seqid);
+
+ return seq2 == seq1 + 1U || (seq2 == 1U && seq1 == 0xffffffffU);
+}
+
static inline bool nfs4_stateid_match_or_older(const nfs4_stateid *dst, const nfs4_stateid *src)
{
return nfs4_stateid_match_other(dst, src) &&
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index daacc78a3d48..be7915c861ce 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -1045,6 +1045,8 @@ static int nfs4_server_common_setup(struct nfs_server *server,
server->caps |= server->nfs_client->cl_mvops->init_caps;
if (server->flags & NFS_MOUNT_NORDIRPLUS)
server->caps &= ~NFS_CAP_READDIRPLUS;
+ if (server->nfs_client->cl_proto == XPRT_TRANSPORT_RDMA)
+ server->caps &= ~NFS_CAP_READ_PLUS;
/*
* Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower
* authentication.
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index fdfc77486ace..91be7f628e4a 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -145,7 +145,8 @@ static ssize_t __nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
/* Only offload copy if superblock is the same */
if (file_in->f_op != &nfs4_file_operations)
return -EXDEV;
- if (!nfs_server_capable(file_inode(file_out), NFS_CAP_COPY))
+ if (!nfs_server_capable(file_inode(file_out), NFS_CAP_COPY) ||
+ !nfs_server_capable(file_inode(file_in), NFS_CAP_COPY))
return -EOPNOTSUPP;
if (file_inode(file_in) == file_inode(file_out))
return -EOPNOTSUPP;
diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
index 62e6eea5c516..8d8aba305ecc 100644
--- a/fs/nfs/nfs4idmap.c
+++ b/fs/nfs/nfs4idmap.c
@@ -46,6 +46,7 @@
#include <keys/user-type.h>
#include <keys/request_key_auth-type.h>
#include <linux/module.h>
+#include <linux/user_namespace.h>
#include "internal.h"
#include "netns.h"
@@ -69,13 +70,13 @@ struct idmap {
struct rpc_pipe *idmap_pipe;
struct idmap_legacy_upcalldata *idmap_upcall_data;
struct mutex idmap_mutex;
- const struct cred *cred;
+ struct user_namespace *user_ns;
};
static struct user_namespace *idmap_userns(const struct idmap *idmap)
{
- if (idmap && idmap->cred)
- return idmap->cred->user_ns;
+ if (idmap && idmap->user_ns)
+ return idmap->user_ns;
return &init_user_ns;
}
@@ -286,7 +287,7 @@ static struct key *nfs_idmap_request_key(const char *name, size_t namelen,
if (ret < 0)
return ERR_PTR(ret);
- if (!idmap->cred || idmap->cred->user_ns == &init_user_ns)
+ if (!idmap->user_ns || idmap->user_ns == &init_user_ns)
rkey = request_key(&key_type_id_resolver, desc, "");
if (IS_ERR(rkey)) {
mutex_lock(&idmap->idmap_mutex);
@@ -462,7 +463,7 @@ nfs_idmap_new(struct nfs_client *clp)
return -ENOMEM;
mutex_init(&idmap->idmap_mutex);
- idmap->cred = get_cred(clp->cl_rpcclient->cl_cred);
+ idmap->user_ns = get_user_ns(clp->cl_rpcclient->cl_cred->user_ns);
rpc_init_pipe_dir_object(&idmap->idmap_pdo,
&nfs_idmap_pipe_dir_object_ops,
@@ -486,7 +487,7 @@ nfs_idmap_new(struct nfs_client *clp)
err_destroy_pipe:
rpc_destroy_pipe_data(idmap->idmap_pipe);
err:
- put_cred(idmap->cred);
+ get_user_ns(idmap->user_ns);
kfree(idmap);
return error;
}
@@ -503,7 +504,7 @@ nfs_idmap_delete(struct nfs_client *clp)
&clp->cl_rpcclient->cl_pipedir_objects,
&idmap->idmap_pdo);
rpc_destroy_pipe_data(idmap->idmap_pipe);
- put_cred(idmap->cred);
+ put_user_ns(idmap->user_ns);
kfree(idmap);
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 6e95c85fe395..9e0ca9b2b210 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -63,6 +63,7 @@
#include "callback.h"
#include "pnfs.h"
#include "netns.h"
+#include "sysfs.h"
#include "nfs4idmap.h"
#include "nfs4session.h"
#include "fscache.h"
@@ -70,6 +71,10 @@
#include "nfs4trace.h"
+#ifdef CONFIG_NFS_V4_2
+#include "nfs42.h"
+#endif /* CONFIG_NFS_V4_2 */
+
#define NFSDBG_FACILITY NFSDBG_PROC
#define NFS4_BITMASK_SZ 3
@@ -107,6 +112,9 @@ static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *,
static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *,
const struct cred *, bool);
#endif
+static void nfs4_bitmask_adjust(__u32 *bitmask, struct inode *inode,
+ struct nfs_server *server,
+ struct nfs4_label *label);
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
static inline struct nfs4_label *
@@ -1547,19 +1555,6 @@ static void nfs_state_log_update_open_stateid(struct nfs4_state *state)
wake_up_all(&state->waitq);
}
-static void nfs_state_log_out_of_order_open_stateid(struct nfs4_state *state,
- const nfs4_stateid *stateid)
-{
- u32 state_seqid = be32_to_cpu(state->open_stateid.seqid);
- u32 stateid_seqid = be32_to_cpu(stateid->seqid);
-
- if (stateid_seqid == state_seqid + 1U ||
- (stateid_seqid == 1U && state_seqid == 0xffffffffU))
- nfs_state_log_update_open_stateid(state);
- else
- set_bit(NFS_STATE_CHANGE_WAIT, &state->flags);
-}
-
static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state)
{
struct nfs_client *clp = state->owner->so_server->nfs_client;
@@ -1585,21 +1580,19 @@ static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state)
* i.e. The stateid seqids have to be initialised to 1, and
* are then incremented on every state transition.
*/
-static bool nfs_need_update_open_stateid(struct nfs4_state *state,
+static bool nfs_stateid_is_sequential(struct nfs4_state *state,
const nfs4_stateid *stateid)
{
- if (test_bit(NFS_OPEN_STATE, &state->flags) == 0 ||
- !nfs4_stateid_match_other(stateid, &state->open_stateid)) {
+ if (test_bit(NFS_OPEN_STATE, &state->flags)) {
+ /* The common case - we're updating to a new sequence number */
+ if (nfs4_stateid_match_other(stateid, &state->open_stateid) &&
+ nfs4_stateid_is_next(&state->open_stateid, stateid)) {
+ return true;
+ }
+ } else {
+ /* This is the first OPEN in this generation */
if (stateid->seqid == cpu_to_be32(1))
- nfs_state_log_update_open_stateid(state);
- else
- set_bit(NFS_STATE_CHANGE_WAIT, &state->flags);
- return true;
- }
-
- if (nfs4_stateid_is_newer(stateid, &state->open_stateid)) {
- nfs_state_log_out_of_order_open_stateid(state, stateid);
- return true;
+ return true;
}
return false;
}
@@ -1673,16 +1666,16 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state,
int status = 0;
for (;;) {
- if (!nfs_need_update_open_stateid(state, stateid))
- return;
- if (!test_bit(NFS_STATE_CHANGE_WAIT, &state->flags))
+ if (nfs_stateid_is_sequential(state, stateid))
break;
+
if (status)
break;
/* Rely on seqids for serialisation with NFSv4.0 */
if (!nfs4_has_session(NFS_SERVER(state->inode)->nfs_client))
break;
+ set_bit(NFS_STATE_CHANGE_WAIT, &state->flags);
prepare_to_wait(&state->waitq, &wait, TASK_KILLABLE);
/*
* Ensure we process the state changes in the same order
@@ -1693,6 +1686,7 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state,
spin_unlock(&state->owner->so_lock);
rcu_read_unlock();
trace_nfs4_open_stateid_update_wait(state->inode, stateid, 0);
+
if (!signal_pending(current)) {
if (schedule_timeout(5*HZ) == 0)
status = -EAGAIN;
@@ -3435,7 +3429,8 @@ static bool nfs4_refresh_open_old_stateid(nfs4_stateid *dst,
__be32 seqid_open;
u32 dst_seqid;
bool ret;
- int seq;
+ int seq, status = -EAGAIN;
+ DEFINE_WAIT(wait);
for (;;) {
ret = false;
@@ -3447,15 +3442,41 @@ static bool nfs4_refresh_open_old_stateid(nfs4_stateid *dst,
continue;
break;
}
+
+ write_seqlock(&state->seqlock);
seqid_open = state->open_stateid.seqid;
- if (read_seqretry(&state->seqlock, seq))
- continue;
dst_seqid = be32_to_cpu(dst->seqid);
- if ((s32)(dst_seqid - be32_to_cpu(seqid_open)) >= 0)
- dst->seqid = cpu_to_be32(dst_seqid + 1);
- else
+
+ /* Did another OPEN bump the state's seqid? try again: */
+ if ((s32)(be32_to_cpu(seqid_open) - dst_seqid) > 0) {
dst->seqid = seqid_open;
+ write_sequnlock(&state->seqlock);
+ ret = true;
+ break;
+ }
+
+ /* server says we're behind but we haven't seen the update yet */
+ set_bit(NFS_STATE_CHANGE_WAIT, &state->flags);
+ prepare_to_wait(&state->waitq, &wait, TASK_KILLABLE);
+ write_sequnlock(&state->seqlock);
+ trace_nfs4_close_stateid_update_wait(state->inode, dst, 0);
+
+ if (signal_pending(current))
+ status = -EINTR;
+ else
+ if (schedule_timeout(5*HZ) != 0)
+ status = 0;
+
+ finish_wait(&state->waitq, &wait);
+
+ if (!status)
+ continue;
+ if (status == -EINTR)
+ break;
+
+ /* we slept the whole 5 seconds, we must have lost a seqid */
+ dst->seqid = cpu_to_be32(dst_seqid + 1);
ret = true;
break;
}
@@ -3632,9 +3653,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
if (calldata->arg.fmode == 0 || calldata->arg.fmode == FMODE_READ) {
/* Close-to-open cache consistency revalidation */
- if (!nfs4_have_delegation(inode, FMODE_READ))
+ if (!nfs4_have_delegation(inode, FMODE_READ)) {
calldata->arg.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask;
- else
+ nfs4_bitmask_adjust(calldata->arg.bitmask, inode, NFS_SERVER(inode), NULL);
+ } else
calldata->arg.bitmask = NULL;
}
@@ -5255,28 +5277,60 @@ static bool nfs4_read_stateid_changed(struct rpc_task *task,
return true;
}
-static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
+static bool nfs4_read_plus_not_supported(struct rpc_task *task,
+ struct nfs_pgio_header *hdr)
{
+ struct nfs_server *server = NFS_SERVER(hdr->inode);
+ struct rpc_message *msg = &task->tk_msg;
+ if (msg->rpc_proc == &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS] &&
+ server->caps & NFS_CAP_READ_PLUS && task->tk_status == -ENOTSUPP) {
+ server->caps &= ~NFS_CAP_READ_PLUS;
+ msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
+ rpc_restart_call_prepare(task);
+ return true;
+ }
+ return false;
+}
+
+static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
+{
dprintk("--> %s\n", __func__);
if (!nfs4_sequence_done(task, &hdr->res.seq_res))
return -EAGAIN;
if (nfs4_read_stateid_changed(task, &hdr->args))
return -EAGAIN;
+ if (nfs4_read_plus_not_supported(task, hdr))
+ return -EAGAIN;
if (task->tk_status > 0)
nfs_invalidate_atime(hdr->inode);
return hdr->pgio_done_cb ? hdr->pgio_done_cb(task, hdr) :
nfs4_read_done_cb(task, hdr);
}
+#ifdef CONFIG_NFS_V4_2
+static void nfs42_read_plus_support(struct nfs_server *server, struct rpc_message *msg)
+{
+ if (server->caps & NFS_CAP_READ_PLUS)
+ msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS];
+ else
+ msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
+}
+#else
+static void nfs42_read_plus_support(struct nfs_server *server, struct rpc_message *msg)
+{
+ msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
+}
+#endif /* CONFIG_NFS_V4_2 */
+
static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr,
struct rpc_message *msg)
{
hdr->timestamp = jiffies;
if (!hdr->pgio_done_cb)
hdr->pgio_done_cb = nfs4_read_done_cb;
- msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
+ nfs42_read_plus_support(NFS_SERVER(hdr->inode), msg);
nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0, 0);
}
@@ -5360,6 +5414,38 @@ bool nfs4_write_need_cache_consistency_data(struct nfs_pgio_header *hdr)
return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0;
}
+static void nfs4_bitmask_adjust(__u32 *bitmask, struct inode *inode,
+ struct nfs_server *server,
+ struct nfs4_label *label)
+{
+
+ unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
+
+ if ((cache_validity & NFS_INO_INVALID_DATA) ||
+ (cache_validity & NFS_INO_REVAL_PAGECACHE) ||
+ (cache_validity & NFS_INO_REVAL_FORCED) ||
+ (cache_validity & NFS_INO_INVALID_OTHER))
+ nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, label), inode);
+
+ if (cache_validity & NFS_INO_INVALID_ATIME)
+ bitmask[1] |= FATTR4_WORD1_TIME_ACCESS;
+ if (cache_validity & NFS_INO_INVALID_ACCESS)
+ bitmask[0] |= FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER |
+ FATTR4_WORD1_OWNER_GROUP;
+ if (cache_validity & NFS_INO_INVALID_ACL)
+ bitmask[0] |= FATTR4_WORD0_ACL;
+ if (cache_validity & NFS_INO_INVALID_LABEL)
+ bitmask[2] |= FATTR4_WORD2_SECURITY_LABEL;
+ if (cache_validity & NFS_INO_INVALID_CTIME)
+ bitmask[0] |= FATTR4_WORD0_CHANGE;
+ if (cache_validity & NFS_INO_INVALID_MTIME)
+ bitmask[1] |= FATTR4_WORD1_TIME_MODIFY;
+ if (cache_validity & NFS_INO_INVALID_SIZE)
+ bitmask[0] |= FATTR4_WORD0_SIZE;
+ if (cache_validity & NFS_INO_INVALID_BLOCKS)
+ bitmask[1] |= FATTR4_WORD1_SPACE_USED;
+}
+
static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr,
struct rpc_message *msg,
struct rpc_clnt **clnt)
@@ -5369,8 +5455,10 @@ static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr,
if (!nfs4_write_need_cache_consistency_data(hdr)) {
hdr->args.bitmask = NULL;
hdr->res.fattr = NULL;
- } else
+ } else {
hdr->args.bitmask = server->cache_consistency_bitmask;
+ nfs4_bitmask_adjust(hdr->args.bitmask, hdr->inode, server, NULL);
+ }
if (!hdr->pgio_done_cb)
hdr->pgio_done_cb = nfs4_write_done_cb;
@@ -6006,9 +6094,34 @@ static void nfs4_init_boot_verifier(const struct nfs_client *clp,
memcpy(bootverf->data, verf, sizeof(bootverf->data));
}
+static size_t
+nfs4_get_uniquifier(struct nfs_client *clp, char *buf, size_t buflen)
+{
+ struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
+ struct nfs_netns_client *nn_clp = nn->nfs_client;
+ const char *id;
+
+ buf[0] = '\0';
+
+ if (nn_clp) {
+ rcu_read_lock();
+ id = rcu_dereference(nn_clp->identifier);
+ if (id)
+ strscpy(buf, id, buflen);
+ rcu_read_unlock();
+ }
+
+ if (nfs4_client_id_uniquifier[0] != '\0' && buf[0] == '\0')
+ strscpy(buf, nfs4_client_id_uniquifier, buflen);
+
+ return strlen(buf);
+}
+
static int
nfs4_init_nonuniform_client_string(struct nfs_client *clp)
{
+ char buf[NFS4_CLIENT_ID_UNIQ_LEN];
+ size_t buflen;
size_t len;
char *str;
@@ -6022,8 +6135,11 @@ nfs4_init_nonuniform_client_string(struct nfs_client *clp)
strlen(rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)) +
1;
rcu_read_unlock();
- if (nfs4_client_id_uniquifier[0] != '\0')
- len += strlen(nfs4_client_id_uniquifier) + 1;
+
+ buflen = nfs4_get_uniquifier(clp, buf, sizeof(buf));
+ if (buflen)
+ len += buflen + 1;
+
if (len > NFS4_OPAQUE_LIMIT + 1)
return -EINVAL;
@@ -6037,10 +6153,9 @@ nfs4_init_nonuniform_client_string(struct nfs_client *clp)
return -ENOMEM;
rcu_read_lock();
- if (nfs4_client_id_uniquifier[0] != '\0')
+ if (buflen)
scnprintf(str, len, "Linux NFSv4.0 %s/%s/%s",
- clp->cl_rpcclient->cl_nodename,
- nfs4_client_id_uniquifier,
+ clp->cl_rpcclient->cl_nodename, buf,
rpc_peeraddr2str(clp->cl_rpcclient,
RPC_DISPLAY_ADDR));
else
@@ -6055,50 +6170,23 @@ nfs4_init_nonuniform_client_string(struct nfs_client *clp)
}
static int
-nfs4_init_uniquifier_client_string(struct nfs_client *clp)
-{
- size_t len;
- char *str;
-
- len = 10 + 10 + 1 + 10 + 1 +
- strlen(nfs4_client_id_uniquifier) + 1 +
- strlen(clp->cl_rpcclient->cl_nodename) + 1;
-
- if (len > NFS4_OPAQUE_LIMIT + 1)
- return -EINVAL;
-
- /*
- * Since this string is allocated at mount time, and held until the
- * nfs_client is destroyed, we can use GFP_KERNEL here w/o worrying
- * about a memory-reclaim deadlock.
- */
- str = kmalloc(len, GFP_KERNEL);
- if (!str)
- return -ENOMEM;
-
- scnprintf(str, len, "Linux NFSv%u.%u %s/%s",
- clp->rpc_ops->version, clp->cl_minorversion,
- nfs4_client_id_uniquifier,
- clp->cl_rpcclient->cl_nodename);
- clp->cl_owner_id = str;
- return 0;
-}
-
-static int
nfs4_init_uniform_client_string(struct nfs_client *clp)
{
+ char buf[NFS4_CLIENT_ID_UNIQ_LEN];
+ size_t buflen;
size_t len;
char *str;
if (clp->cl_owner_id != NULL)
return 0;
- if (nfs4_client_id_uniquifier[0] != '\0')
- return nfs4_init_uniquifier_client_string(clp);
-
len = 10 + 10 + 1 + 10 + 1 +
strlen(clp->cl_rpcclient->cl_nodename) + 1;
+ buflen = nfs4_get_uniquifier(clp, buf, sizeof(buf));
+ if (buflen)
+ len += buflen + 1;
+
if (len > NFS4_OPAQUE_LIMIT + 1)
return -EINVAL;
@@ -6111,9 +6199,14 @@ nfs4_init_uniform_client_string(struct nfs_client *clp)
if (!str)
return -ENOMEM;
- scnprintf(str, len, "Linux NFSv%u.%u %s",
- clp->rpc_ops->version, clp->cl_minorversion,
- clp->cl_rpcclient->cl_nodename);
+ if (buflen)
+ scnprintf(str, len, "Linux NFSv%u.%u %s/%s",
+ clp->rpc_ops->version, clp->cl_minorversion,
+ buf, clp->cl_rpcclient->cl_nodename);
+ else
+ scnprintf(str, len, "Linux NFSv%u.%u %s",
+ clp->rpc_ops->version, clp->cl_minorversion,
+ clp->cl_rpcclient->cl_nodename);
clp->cl_owner_id = str;
return 0;
}
@@ -6406,6 +6499,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred,
data->args.fhandle = &data->fh;
data->args.stateid = &data->stateid;
data->args.bitmask = server->cache_consistency_bitmask;
+ nfs4_bitmask_adjust(data->args.bitmask, inode, server, NULL);
nfs_copy_fh(&data->fh, NFS_FH(inode));
nfs4_stateid_copy(&data->stateid, stateid);
data->res.fattr = &data->fattr;
@@ -7440,7 +7534,7 @@ nfs4_listxattr_nfs4_label(struct inode *inode, char *list, size_t list_len)
if (nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL)) {
len = security_inode_listsecurity(inode, list, list_len);
- if (list_len && len > list_len)
+ if (len >= 0 && list_len && len > list_len)
return -ERANGE;
}
return len;
@@ -8039,9 +8133,11 @@ int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name,
* both PNFS and NON_PNFS flags set, and not having one of NON_PNFS, PNFS, or
* DS flags set.
*/
-static int nfs4_check_cl_exchange_flags(u32 flags)
+static int nfs4_check_cl_exchange_flags(u32 flags, u32 version)
{
- if (flags & ~EXCHGID4_FLAG_MASK_R)
+ if (version >= 2 && (flags & ~EXCHGID4_2_FLAG_MASK_R))
+ goto out_inval;
+ else if (version < 2 && (flags & ~EXCHGID4_FLAG_MASK_R))
goto out_inval;
if ((flags & EXCHGID4_FLAG_USE_PNFS_MDS) &&
(flags & EXCHGID4_FLAG_USE_NON_PNFS))
@@ -8454,7 +8550,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cre
if (status != 0)
goto out;
- status = nfs4_check_cl_exchange_flags(resp->flags);
+ status = nfs4_check_cl_exchange_flags(resp->flags,
+ clp->cl_mvops->minor_version);
if (status != 0)
goto out;
@@ -9693,7 +9790,6 @@ _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
.rpc_argp = &args,
.rpc_resp = &res,
};
- struct rpc_clnt *clnt = server->client;
struct nfs4_call_sync_data data = {
.seq_server = server,
.seq_args = &args.seq_args,
@@ -9710,8 +9806,7 @@ _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
int status;
if (use_integrity) {
- clnt = server->nfs_client->cl_rpcclient;
- task_setup.rpc_client = clnt;
+ task_setup.rpc_client = server->nfs_client->cl_rpcclient;
cred = nfs4_get_clid_cred(server->nfs_client);
msg.rpc_cred = cred;
@@ -10165,7 +10260,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = {
| NFS_CAP_SEEK
| NFS_CAP_LAYOUTSTATS
| NFS_CAP_CLONE
- | NFS_CAP_LAYOUTERROR,
+ | NFS_CAP_LAYOUTERROR
+ | NFS_CAP_READ_PLUS,
.init_client = nfs41_init_client,
.shutdown_client = nfs41_shutdown_client,
.match_stateid = nfs41_match_stateid,
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index b4f852d4d099..484c1da96dea 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -1511,6 +1511,7 @@ DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_setattr);
DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_delegreturn);
DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_open_stateid_update);
DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_open_stateid_update_wait);
+DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_close_stateid_update_wait);
DECLARE_EVENT_CLASS(nfs4_getattr_event,
TP_PROTO(
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 0b3510f62623..c6dbfcae7517 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -5308,7 +5308,6 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
uint32_t attrlen,
bitmap[3] = {0};
int status;
- unsigned int pg_offset;
res->acl_len = 0;
if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
@@ -5316,9 +5315,6 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
xdr_enter_page(xdr, xdr->buf->page_len);
- /* Calculate the offset of the page data */
- pg_offset = xdr->buf->head[0].iov_len;
-
if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
goto out;
if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
@@ -5331,7 +5327,7 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
/* The bitmap (xdr len + bitmaps) and the attr xdr len words
* are stored with the acl data to handle the problem of
* variable length bitmaps.*/
- res->acl_data_offset = xdr_stream_pos(xdr) - pg_offset;
+ res->acl_data_offset = xdr_page_pos(xdr);
res->acl_len = attrlen;
/* Check for receive buffer overflow */
@@ -7619,6 +7615,7 @@ const struct rpc_procinfo nfs4_procedures[] = {
PROC42(SETXATTR, enc_setxattr, dec_setxattr),
PROC42(LISTXATTRS, enc_listxattrs, dec_listxattrs),
PROC42(REMOVEXATTR, enc_removexattr, dec_removexattr),
+ PROC42(READ_PLUS, enc_read_plus, dec_read_plus),
};
static unsigned int nfs_version4_counts[ARRAY_SIZE(nfs4_procedures)];
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 71f7741126b6..0e50b9d45c32 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -902,7 +902,7 @@ restart:
}
/*
- * Called by the state manger to remove all layouts established under an
+ * Called by the state manager to remove all layouts established under an
* expired lease.
*/
void
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index f943e37853fa..78c46a517fcf 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -889,7 +889,7 @@ static struct nfs_server *nfs_try_mount_request(struct fs_context *fc)
default:
if (rpcauth_get_gssinfo(flavor, &info) != 0)
continue;
- /* Fallthrough */
+ break;
}
dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", flavor);
ctx->selected_flavor = flavor;
diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c
index c489496b5659..8cb70755e3c9 100644
--- a/fs/nfs/sysfs.c
+++ b/fs/nfs/sysfs.c
@@ -79,7 +79,12 @@ static ssize_t nfs_netns_identifier_show(struct kobject *kobj,
struct nfs_netns_client *c = container_of(kobj,
struct nfs_netns_client,
kobject);
- return scnprintf(buf, PAGE_SIZE, "%s\n", c->identifier);
+ ssize_t ret;
+
+ rcu_read_lock();
+ ret = scnprintf(buf, PAGE_SIZE, "%s\n", rcu_dereference(c->identifier));
+ rcu_read_unlock();
+ return ret;
}
/* Strip trailing '\n' */
@@ -107,7 +112,7 @@ static ssize_t nfs_netns_identifier_store(struct kobject *kobj,
p = kmemdup_nul(buf, len, GFP_KERNEL);
if (!p)
return -ENOMEM;
- old = xchg(&c->identifier, p);
+ old = rcu_dereference_protected(xchg(&c->identifier, (char __rcu *)p), 1);
if (old) {
synchronize_rcu();
kfree(old);
@@ -121,7 +126,7 @@ static void nfs_netns_client_release(struct kobject *kobj)
struct nfs_netns_client,
kobject);
- kfree(c->identifier);
+ kfree(rcu_dereference_raw(c->identifier));
kfree(c);
}
diff --git a/fs/nfs/sysfs.h b/fs/nfs/sysfs.h
index ebcbdc40483b..5501ef573c32 100644
--- a/fs/nfs/sysfs.h
+++ b/fs/nfs/sysfs.h
@@ -11,7 +11,7 @@
struct nfs_netns_client {
struct kobject kobject;
struct net *net;
- const char *identifier;
+ const char __rcu *identifier;
};
extern struct kobject *nfs_client_kobj;
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index e516ae389ca5..5900879d5693 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -355,7 +355,7 @@ void nilfs_bmap_lookup_dirty_buffers(struct nilfs_bmap *bmap,
/**
* nilfs_bmap_assign - assign a new block number to a block
* @bmap: bmap
- * @bhp: pointer to buffer head
+ * @bh: pointer to buffer head
* @blocknr: block number
* @binfo: block information
*
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index 86d4d850d130..025fb082575a 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -889,7 +889,7 @@ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno)
* nilfs_cpfile_change_cpmode - change checkpoint mode
* @cpfile: inode of checkpoint file
* @cno: checkpoint number
- * @status: mode of checkpoint
+ * @mode: mode of checkpoint
*
* Description: nilfs_change_cpmode() changes the mode of the checkpoint
* specified by @cno. The mode @mode is NILFS_CHECKPOINT or NILFS_SNAPSHOT.
@@ -930,12 +930,12 @@ int nilfs_cpfile_change_cpmode(struct inode *cpfile, __u64 cno, int mode)
/**
* nilfs_cpfile_get_stat - get checkpoint statistics
* @cpfile: inode of checkpoint file
- * @stat: pointer to a structure of checkpoint statistics
+ * @cpstat: pointer to a structure of checkpoint statistics
*
* Description: nilfs_cpfile_get_stat() returns information about checkpoints.
*
* Return Value: On success, 0 is returned, and checkpoints information is
- * stored in the place pointed by @stat. On error, one of the following
+ * stored in the place pointed by @cpstat. On error, one of the following
* negative error codes is returned.
*
* %-EIO - I/O error.
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index b175f1330408..171fb5cd427f 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -69,7 +69,6 @@ struct buffer_head *nilfs_grab_buffer(struct inode *inode,
/**
* nilfs_forget_buffer - discard dirty state
- * @inode: owner inode of the buffer
* @bh: buffer head of the buffer to be discarded
*/
void nilfs_forget_buffer(struct buffer_head *bh)
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index 42ff67c0c14f..63722475e17e 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -546,13 +546,13 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
/**
* nilfs_sufile_get_stat - get segment usage statistics
* @sufile: inode of segment usage file
- * @stat: pointer to a structure of segment usage statistics
+ * @sustat: pointer to a structure of segment usage statistics
*
* Description: nilfs_sufile_get_stat() returns information about segment
* usage.
*
* Return Value: On success, 0 is returned, and segment usage information is
- * stored in the place pointed by @stat. On error, one of the following
+ * stored in the place pointed by @sustat. On error, one of the following
* negative error codes is returned.
*
* %-EIO - I/O error.
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index c942910a8649..9167884a61ec 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -531,6 +531,7 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
struct inode *dirid = fanotify_dfid_inode(mask, data, data_type, dir);
const struct path *path = fsnotify_data_path(data, data_type);
unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
+ struct mem_cgroup *old_memcg;
struct inode *child = NULL;
bool name_event = false;
@@ -580,7 +581,7 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
gfp |= __GFP_RETRY_MAYFAIL;
/* Whoever is interested in the event, pays for the allocation. */
- memalloc_use_memcg(group->memcg);
+ old_memcg = set_active_memcg(group->memcg);
if (fanotify_is_perm_event(mask)) {
event = fanotify_alloc_perm_event(path, gfp);
@@ -608,7 +609,7 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
event->pid = get_pid(task_tgid(current));
out:
- memalloc_unuse_memcg();
+ set_active_memcg(old_memcg);
return event;
}
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index a65cf8c9f600..9ddcbadc98e2 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -66,6 +66,7 @@ static int inotify_one_event(struct fsnotify_group *group, u32 mask,
int ret;
int len = 0;
int alloc_len = sizeof(struct inotify_event_info);
+ struct mem_cgroup *old_memcg;
if ((inode_mark->mask & FS_EXCL_UNLINK) &&
path && d_unlinked(path->dentry))
@@ -87,9 +88,9 @@ static int inotify_one_event(struct fsnotify_group *group, u32 mask,
* trigger OOM killer in the target monitoring memcg as it may have
* security repercussion.
*/
- memalloc_use_memcg(group->memcg);
+ old_memcg = set_active_memcg(group->memcg);
event = kmalloc(alloc_len, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
- memalloc_unuse_memcg();
+ set_active_memcg(old_memcg);
if (unlikely(!event)) {
/*
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index d07fb92b7253..955ecd4030f0 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -43,7 +43,8 @@ static bool ovl_must_copy_xattr(const char *name)
!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN);
}
-int ovl_copy_xattr(struct dentry *old, struct dentry *new)
+int ovl_copy_xattr(struct super_block *sb, struct dentry *old,
+ struct dentry *new)
{
ssize_t list_size, size, value_size = 0;
char *buf, *name, *value = NULL;
@@ -81,7 +82,7 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new)
}
list_size -= slen;
- if (ovl_is_private_xattr(name))
+ if (ovl_is_private_xattr(sb, name))
continue;
retry:
size = vfs_getxattr(old, name, value, value_size);
@@ -128,7 +129,8 @@ out:
return error;
}
-static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
+static int ovl_copy_up_data(struct ovl_fs *ofs, struct path *old,
+ struct path *new, loff_t len)
{
struct file *old_file;
struct file *new_file;
@@ -218,7 +220,7 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
len -= bytes;
}
out:
- if (!error)
+ if (!error && ovl_should_sync(ofs))
error = vfs_fsync(new_file, 0);
fput(new_file);
out_fput:
@@ -354,7 +356,8 @@ int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
}
/* Store file handle of @upper dir in @index dir entry */
-static int ovl_set_upper_fh(struct dentry *upper, struct dentry *index)
+static int ovl_set_upper_fh(struct ovl_fs *ofs, struct dentry *upper,
+ struct dentry *index)
{
const struct ovl_fh *fh;
int err;
@@ -363,7 +366,7 @@ static int ovl_set_upper_fh(struct dentry *upper, struct dentry *index)
if (IS_ERR(fh))
return PTR_ERR(fh);
- err = ovl_do_setxattr(index, OVL_XATTR_UPPER, fh->buf, fh->fb.len, 0);
+ err = ovl_do_setxattr(ofs, index, OVL_XATTR_UPPER, fh->buf, fh->fb.len);
kfree(fh);
return err;
@@ -408,7 +411,7 @@ static int ovl_create_index(struct dentry *dentry, struct dentry *origin,
if (IS_ERR(temp))
goto free_name;
- err = ovl_set_upper_fh(upper, temp);
+ err = ovl_set_upper_fh(OVL_FS(dentry->d_sb), upper, temp);
if (err)
goto out;
@@ -484,6 +487,7 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c)
static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
{
+ struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
int err;
/*
@@ -499,12 +503,13 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
upperpath.dentry = temp;
ovl_path_lowerdata(c->dentry, &datapath);
- err = ovl_copy_up_data(&datapath, &upperpath, c->stat.size);
+ err = ovl_copy_up_data(ofs, &datapath, &upperpath,
+ c->stat.size);
if (err)
return err;
}
- err = ovl_copy_xattr(c->lowerpath.dentry, temp);
+ err = ovl_copy_xattr(c->dentry->d_sb, c->lowerpath.dentry, temp);
if (err)
return err;
@@ -781,9 +786,33 @@ static bool ovl_need_meta_copy_up(struct dentry *dentry, umode_t mode,
return true;
}
+static ssize_t ovl_getxattr(struct dentry *dentry, char *name, char **value)
+{
+ ssize_t res;
+ char *buf;
+
+ res = vfs_getxattr(dentry, name, NULL, 0);
+ if (res == -ENODATA || res == -EOPNOTSUPP)
+ res = 0;
+
+ if (res > 0) {
+ buf = kzalloc(res, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ res = vfs_getxattr(dentry, name, buf, res);
+ if (res < 0)
+ kfree(buf);
+ else
+ *value = buf;
+ }
+ return res;
+}
+
/* Copy up data of an inode which was copied up metadata only in the past. */
static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c)
{
+ struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
struct path upperpath, datapath;
int err;
char *capability = NULL;
@@ -799,12 +828,12 @@ static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c)
if (c->stat.size) {
err = cap_size = ovl_getxattr(upperpath.dentry, XATTR_NAME_CAPS,
- &capability, 0);
- if (err < 0 && err != -ENODATA)
+ &capability);
+ if (cap_size < 0)
goto out;
}
- err = ovl_copy_up_data(&datapath, &upperpath, c->stat.size);
+ err = ovl_copy_up_data(ofs, &datapath, &upperpath, c->stat.size);
if (err)
goto out_free;
@@ -813,14 +842,14 @@ static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c)
* don't want that to happen for normal copy-up operation.
*/
if (capability) {
- err = ovl_do_setxattr(upperpath.dentry, XATTR_NAME_CAPS,
- capability, cap_size, 0);
+ err = vfs_setxattr(upperpath.dentry, XATTR_NAME_CAPS,
+ capability, cap_size, 0);
if (err)
goto out_free;
}
- err = vfs_removexattr(upperpath.dentry, OVL_XATTR_METACOPY);
+ err = ovl_do_removexattr(ofs, upperpath.dentry, OVL_XATTR_METACOPY);
if (err)
goto out_free;
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 1bba4813f9cb..28a075b5f5b2 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -394,7 +394,7 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry,
if (IS_ERR(opaquedir))
goto out_unlock;
- err = ovl_copy_xattr(upper, opaquedir);
+ err = ovl_copy_xattr(dentry->d_sb, upper, opaquedir);
if (err)
goto out_cleanup;
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 0e696f72cf65..ed35be3fafc6 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -752,7 +752,7 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
goto out_err;
}
if (index) {
- err = ovl_verify_origin(index, origin.dentry, false);
+ err = ovl_verify_origin(ofs, index, origin.dentry, false);
if (err)
goto out_err;
}
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 0d940e29d62b..efccb7c1f9bc 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -136,6 +136,13 @@ static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
static int ovl_real_fdget(const struct file *file, struct fd *real)
{
+ if (d_is_dir(file_dentry(file))) {
+ real->flags = 0;
+ real->file = ovl_dir_real_file(file, false);
+
+ return PTR_ERR_OR_ZERO(real->file);
+ }
+
return ovl_real_fdget_meta(file, real, false);
}
@@ -331,6 +338,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
struct fd real;
const struct cred *old_cred;
ssize_t ret;
+ int ifl = iocb->ki_flags;
if (!iov_iter_count(iter))
return 0;
@@ -346,11 +354,14 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
if (ret)
goto out_unlock;
+ if (!ovl_should_sync(OVL_FS(inode->i_sb)))
+ ifl &= ~(IOCB_DSYNC | IOCB_SYNC);
+
old_cred = ovl_override_creds(file_inode(file)->i_sb);
if (is_sync_kiocb(iocb)) {
file_start_write(real.file);
ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
- ovl_iocb_to_rwf(iocb->ki_flags));
+ ovl_iocb_to_rwf(ifl));
file_end_write(real.file);
/* Update size */
ovl_copyattr(ovl_inode_real(inode), inode);
@@ -370,6 +381,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
real.flags = 0;
aio_req->orig_iocb = iocb;
kiocb_clone(&aio_req->iocb, iocb, real.file);
+ aio_req->iocb.ki_flags = ifl;
aio_req->iocb.ki_complete = ovl_aio_rw_complete;
ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
if (ret != -EIOCBQUEUED)
@@ -433,6 +445,9 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
const struct cred *old_cred;
int ret;
+ if (!ovl_should_sync(OVL_FS(file_inode(file)->i_sb)))
+ return 0;
+
ret = ovl_real_fdget_meta(file, &real, !datasync);
if (ret)
return ret;
@@ -544,12 +559,28 @@ static long ovl_real_ioctl(struct file *file, unsigned int cmd,
return ret;
}
+static unsigned int ovl_iflags_to_fsflags(unsigned int iflags)
+{
+ unsigned int flags = 0;
+
+ if (iflags & S_SYNC)
+ flags |= FS_SYNC_FL;
+ if (iflags & S_APPEND)
+ flags |= FS_APPEND_FL;
+ if (iflags & S_IMMUTABLE)
+ flags |= FS_IMMUTABLE_FL;
+ if (iflags & S_NOATIME)
+ flags |= FS_NOATIME_FL;
+
+ return flags;
+}
+
static long ovl_ioctl_set_flags(struct file *file, unsigned int cmd,
- unsigned long arg, unsigned int iflags)
+ unsigned long arg, unsigned int flags)
{
long ret;
struct inode *inode = file_inode(file);
- unsigned int old_iflags;
+ unsigned int oldflags;
if (!inode_owner_or_capable(inode))
return -EACCES;
@@ -561,10 +592,9 @@ static long ovl_ioctl_set_flags(struct file *file, unsigned int cmd,
inode_lock(inode);
/* Check the capability before cred override */
- ret = -EPERM;
- old_iflags = READ_ONCE(inode->i_flags);
- if (((iflags ^ old_iflags) & (S_APPEND | S_IMMUTABLE)) &&
- !capable(CAP_LINUX_IMMUTABLE))
+ oldflags = ovl_iflags_to_fsflags(READ_ONCE(inode->i_flags));
+ ret = vfs_ioc_setflags_prepare(inode, oldflags, flags);
+ if (ret)
goto unlock;
ret = ovl_maybe_copy_up(file_dentry(file), O_WRONLY);
@@ -583,22 +613,6 @@ unlock:
}
-static unsigned int ovl_fsflags_to_iflags(unsigned int flags)
-{
- unsigned int iflags = 0;
-
- if (flags & FS_SYNC_FL)
- iflags |= S_SYNC;
- if (flags & FS_APPEND_FL)
- iflags |= S_APPEND;
- if (flags & FS_IMMUTABLE_FL)
- iflags |= S_IMMUTABLE;
- if (flags & FS_NOATIME_FL)
- iflags |= S_NOATIME;
-
- return iflags;
-}
-
static long ovl_ioctl_set_fsflags(struct file *file, unsigned int cmd,
unsigned long arg)
{
@@ -607,24 +621,23 @@ static long ovl_ioctl_set_fsflags(struct file *file, unsigned int cmd,
if (get_user(flags, (int __user *) arg))
return -EFAULT;
- return ovl_ioctl_set_flags(file, cmd, arg,
- ovl_fsflags_to_iflags(flags));
+ return ovl_ioctl_set_flags(file, cmd, arg, flags);
}
-static unsigned int ovl_fsxflags_to_iflags(unsigned int xflags)
+static unsigned int ovl_fsxflags_to_fsflags(unsigned int xflags)
{
- unsigned int iflags = 0;
+ unsigned int flags = 0;
if (xflags & FS_XFLAG_SYNC)
- iflags |= S_SYNC;
+ flags |= FS_SYNC_FL;
if (xflags & FS_XFLAG_APPEND)
- iflags |= S_APPEND;
+ flags |= FS_APPEND_FL;
if (xflags & FS_XFLAG_IMMUTABLE)
- iflags |= S_IMMUTABLE;
+ flags |= FS_IMMUTABLE_FL;
if (xflags & FS_XFLAG_NOATIME)
- iflags |= S_NOATIME;
+ flags |= FS_NOATIME_FL;
- return iflags;
+ return flags;
}
static long ovl_ioctl_set_fsxflags(struct file *file, unsigned int cmd,
@@ -637,10 +650,10 @@ static long ovl_ioctl_set_fsxflags(struct file *file, unsigned int cmd,
return -EFAULT;
return ovl_ioctl_set_flags(file, cmd, arg,
- ovl_fsxflags_to_iflags(fa.fsx_xflags));
+ ovl_fsxflags_to_fsflags(fa.fsx_xflags));
}
-static long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
long ret;
@@ -665,8 +678,8 @@ static long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
return ret;
}
-static long ovl_compat_ioctl(struct file *file, unsigned int cmd,
- unsigned long arg)
+#ifdef CONFIG_COMPAT
+long ovl_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
switch (cmd) {
case FS_IOC32_GETFLAGS:
@@ -683,6 +696,7 @@ static long ovl_compat_ioctl(struct file *file, unsigned int cmd,
return ovl_ioctl(file, cmd, arg);
}
+#endif
enum ovl_copyop {
OVL_COPY,
@@ -784,7 +798,9 @@ const struct file_operations ovl_file_operations = {
.fallocate = ovl_fallocate,
.fadvise = ovl_fadvise,
.unlocked_ioctl = ovl_ioctl,
+#ifdef CONFIG_COMPAT
.compat_ioctl = ovl_compat_ioctl,
+#endif
.splice_read = ovl_splice_read,
.splice_write = ovl_splice_write,
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 8be6cd264f66..b584dca845ba 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -327,7 +327,7 @@ static const char *ovl_get_link(struct dentry *dentry,
return p;
}
-bool ovl_is_private_xattr(const char *name)
+bool ovl_is_private_xattr(struct super_block *sb, const char *name)
{
return strncmp(name, OVL_XATTR_PREFIX,
sizeof(OVL_XATTR_PREFIX) - 1) == 0;
@@ -391,15 +391,18 @@ int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
return res;
}
-static bool ovl_can_list(const char *s)
+static bool ovl_can_list(struct super_block *sb, const char *s)
{
+ /* Never list private (.overlay) */
+ if (ovl_is_private_xattr(sb, s))
+ return false;
+
/* List all non-trusted xatts */
if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0)
return true;
- /* Never list trusted.overlay, list other trusted for superuser only */
- return !ovl_is_private_xattr(s) &&
- ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN);
+ /* list other trusted for superuser only */
+ return ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN);
}
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
@@ -425,7 +428,7 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
return -EIO;
len -= slen;
- if (!ovl_can_list(s)) {
+ if (!ovl_can_list(dentry->d_sb, s)) {
res -= slen;
memmove(s, s + slen, len);
} else {
@@ -722,8 +725,8 @@ static int ovl_set_nlink_common(struct dentry *dentry,
if (WARN_ON(len >= sizeof(buf)))
return -EIO;
- return ovl_do_setxattr(ovl_dentry_upper(dentry),
- OVL_XATTR_NLINK, buf, len, 0);
+ return ovl_do_setxattr(OVL_FS(inode->i_sb), ovl_dentry_upper(dentry),
+ OVL_XATTR_NLINK, buf, len);
}
int ovl_set_nlink_upper(struct dentry *dentry)
@@ -736,7 +739,7 @@ int ovl_set_nlink_lower(struct dentry *dentry)
return ovl_set_nlink_common(dentry, ovl_dentry_lower(dentry), "L%+i");
}
-unsigned int ovl_get_nlink(struct dentry *lowerdentry,
+unsigned int ovl_get_nlink(struct ovl_fs *ofs, struct dentry *lowerdentry,
struct dentry *upperdentry,
unsigned int fallback)
{
@@ -748,7 +751,8 @@ unsigned int ovl_get_nlink(struct dentry *lowerdentry,
if (!lowerdentry || !upperdentry || d_inode(lowerdentry)->i_nlink == 1)
return fallback;
- err = vfs_getxattr(upperdentry, OVL_XATTR_NLINK, &buf, sizeof(buf) - 1);
+ err = ovl_do_getxattr(ofs, upperdentry, OVL_XATTR_NLINK,
+ &buf, sizeof(buf) - 1);
if (err < 0)
goto fail;
@@ -946,6 +950,7 @@ static struct inode *ovl_iget5(struct super_block *sb, struct inode *newinode,
struct inode *ovl_get_inode(struct super_block *sb,
struct ovl_inode_params *oip)
{
+ struct ovl_fs *ofs = OVL_FS(sb);
struct dentry *upperdentry = oip->upperdentry;
struct ovl_path *lowerpath = oip->lowerpath;
struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL;
@@ -993,7 +998,8 @@ struct inode *ovl_get_inode(struct super_block *sb,
/* Recalculate nlink for non-dir due to indexing */
if (!is_dir)
- nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink);
+ nlink = ovl_get_nlink(ofs, lowerdentry, upperdentry,
+ nlink);
set_nlink(inode, nlink);
ino = key->i_ino;
} else {
@@ -1009,7 +1015,7 @@ struct inode *ovl_get_inode(struct super_block *sb,
ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev);
ovl_inode_init(inode, oip, ino, fsid);
- if (upperdentry && ovl_is_impuredir(upperdentry))
+ if (upperdentry && ovl_is_impuredir(sb, upperdentry))
ovl_set_flag(OVL_IMPURE, inode);
if (oip->index)
@@ -1023,7 +1029,7 @@ struct inode *ovl_get_inode(struct super_block *sb,
/* Check for non-merge dir that may have whiteouts */
if (is_dir) {
if (((upperdentry && lowerdentry) || oip->numlower > 1) ||
- ovl_check_origin_xattr(upperdentry ?: lowerdentry)) {
+ ovl_check_origin_xattr(ofs, upperdentry ?: lowerdentry)) {
ovl_set_flag(OVL_WHITEOUTS, inode);
}
}
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index f7d4358db637..a6162c4076db 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -30,8 +30,9 @@ static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d,
{
int res;
char *buf;
+ struct ovl_fs *ofs = OVL_FS(d->sb);
- buf = ovl_get_redirect_xattr(dentry, prelen + strlen(post));
+ buf = ovl_get_redirect_xattr(ofs, dentry, prelen + strlen(post));
if (IS_ERR_OR_NULL(buf))
return PTR_ERR(buf);
@@ -104,12 +105,13 @@ int ovl_check_fb_len(struct ovl_fb *fb, int fb_len)
return 0;
}
-static struct ovl_fh *ovl_get_fh(struct dentry *dentry, const char *name)
+static struct ovl_fh *ovl_get_fh(struct ovl_fs *ofs, struct dentry *dentry,
+ enum ovl_xattr ox)
{
int res, err;
struct ovl_fh *fh = NULL;
- res = vfs_getxattr(dentry, name, NULL, 0);
+ res = ovl_do_getxattr(ofs, dentry, ox, NULL, 0);
if (res < 0) {
if (res == -ENODATA || res == -EOPNOTSUPP)
return NULL;
@@ -123,7 +125,7 @@ static struct ovl_fh *ovl_get_fh(struct dentry *dentry, const char *name)
if (!fh)
return ERR_PTR(-ENOMEM);
- res = vfs_getxattr(dentry, name, fh->buf, res);
+ res = ovl_do_getxattr(ofs, dentry, ox, fh->buf, res);
if (res < 0)
goto fail;
@@ -186,9 +188,9 @@ struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt,
return real;
}
-static bool ovl_is_opaquedir(struct dentry *dentry)
+static bool ovl_is_opaquedir(struct super_block *sb, struct dentry *dentry)
{
- return ovl_check_dir_xattr(dentry, OVL_XATTR_OPAQUE);
+ return ovl_check_dir_xattr(sb, dentry, OVL_XATTR_OPAQUE);
}
static struct dentry *ovl_lookup_positive_unlocked(const char *name,
@@ -251,7 +253,7 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
d->stop = true;
goto put_and_out;
}
- err = ovl_check_metacopy_xattr(this);
+ err = ovl_check_metacopy_xattr(OVL_FS(d->sb), this);
if (err < 0)
goto out_err;
@@ -271,7 +273,7 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
if (d->last)
goto out;
- if (ovl_is_opaquedir(this)) {
+ if (ovl_is_opaquedir(d->sb, this)) {
d->stop = true;
if (last_element)
d->opaque = true;
@@ -391,7 +393,7 @@ invalid:
static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry,
struct ovl_path **stackp)
{
- struct ovl_fh *fh = ovl_get_fh(upperdentry, OVL_XATTR_ORIGIN);
+ struct ovl_fh *fh = ovl_get_fh(ofs, upperdentry, OVL_XATTR_ORIGIN);
int err;
if (IS_ERR_OR_NULL(fh))
@@ -413,10 +415,10 @@ static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry,
* Verify that @fh matches the file handle stored in xattr @name.
* Return 0 on match, -ESTALE on mismatch, < 0 on error.
*/
-static int ovl_verify_fh(struct dentry *dentry, const char *name,
- const struct ovl_fh *fh)
+static int ovl_verify_fh(struct ovl_fs *ofs, struct dentry *dentry,
+ enum ovl_xattr ox, const struct ovl_fh *fh)
{
- struct ovl_fh *ofh = ovl_get_fh(dentry, name);
+ struct ovl_fh *ofh = ovl_get_fh(ofs, dentry, ox);
int err = 0;
if (!ofh)
@@ -440,8 +442,9 @@ static int ovl_verify_fh(struct dentry *dentry, const char *name,
*
* Return 0 on match, -ESTALE on mismatch, -ENODATA on no xattr, < 0 on error.
*/
-int ovl_verify_set_fh(struct dentry *dentry, const char *name,
- struct dentry *real, bool is_upper, bool set)
+int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
+ enum ovl_xattr ox, struct dentry *real, bool is_upper,
+ bool set)
{
struct inode *inode;
struct ovl_fh *fh;
@@ -454,9 +457,9 @@ int ovl_verify_set_fh(struct dentry *dentry, const char *name,
goto fail;
}
- err = ovl_verify_fh(dentry, name, fh);
+ err = ovl_verify_fh(ofs, dentry, ox, fh);
if (set && err == -ENODATA)
- err = ovl_do_setxattr(dentry, name, fh->buf, fh->fb.len, 0);
+ err = ovl_do_setxattr(ofs, dentry, ox, fh->buf, fh->fb.len);
if (err)
goto fail;
@@ -481,7 +484,7 @@ struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index)
if (!d_is_dir(index))
return dget(index);
- fh = ovl_get_fh(index, OVL_XATTR_UPPER);
+ fh = ovl_get_fh(ofs, index, OVL_XATTR_UPPER);
if (IS_ERR_OR_NULL(fh))
return ERR_CAST(fh);
@@ -574,7 +577,7 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
goto fail;
}
- err = ovl_verify_fh(upper, OVL_XATTR_ORIGIN, fh);
+ err = ovl_verify_fh(ofs, upper, OVL_XATTR_ORIGIN, fh);
dput(upper);
if (err)
goto fail;
@@ -585,7 +588,7 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
if (err)
goto fail;
- if (ovl_get_nlink(origin.dentry, index, 0) == 0)
+ if (ovl_get_nlink(ofs, origin.dentry, index, 0) == 0)
goto orphan;
}
@@ -741,7 +744,7 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
}
/* Verify that dir index 'upper' xattr points to upper dir */
- err = ovl_verify_upper(index, upper, false);
+ err = ovl_verify_upper(ofs, index, upper, false);
if (err) {
if (err == -ESTALE) {
pr_warn_ratelimited("suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n",
@@ -790,12 +793,12 @@ int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
}
/* Fix missing 'origin' xattr */
-static int ovl_fix_origin(struct dentry *dentry, struct dentry *lower,
- struct dentry *upper)
+static int ovl_fix_origin(struct ovl_fs *ofs, struct dentry *dentry,
+ struct dentry *lower, struct dentry *upper)
{
int err;
- if (ovl_check_origin_xattr(upper))
+ if (ovl_check_origin_xattr(ofs, upper))
return 0;
err = ovl_want_write(dentry);
@@ -920,7 +923,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
* of lower dir and set upper parent "impure".
*/
if (upperdentry && !ctr && !ofs->noxattr && d.is_dir) {
- err = ovl_fix_origin(dentry, this, upperdentry);
+ err = ovl_fix_origin(ofs, dentry, this, upperdentry);
if (err) {
dput(this);
goto out_put;
@@ -939,7 +942,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
if (upperdentry && !ctr &&
((d.is_dir && ovl_verify_lower(dentry->d_sb)) ||
(!d.is_dir && ofs->config.index && origin_path))) {
- err = ovl_verify_origin(upperdentry, this, false);
+ err = ovl_verify_origin(ofs, upperdentry, this, false);
if (err) {
dput(this);
if (d.is_dir)
@@ -1060,13 +1063,13 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
ovl_dentry_set_upper_alias(dentry);
else if (index) {
upperdentry = dget(index);
- upperredirect = ovl_get_redirect_xattr(upperdentry, 0);
+ upperredirect = ovl_get_redirect_xattr(ofs, upperdentry, 0);
if (IS_ERR(upperredirect)) {
err = PTR_ERR(upperredirect);
upperredirect = NULL;
goto out_free_oe;
}
- err = ovl_check_metacopy_xattr(upperdentry);
+ err = ovl_check_metacopy_xattr(ofs, upperdentry);
if (err < 0)
goto out_free_oe;
uppermetacopy = err;
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 29bc1ec699e7..f8880aa2ba0e 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -23,13 +23,16 @@ enum ovl_path_type {
#define OVL_TYPE_ORIGIN(type) ((type) & __OVL_PATH_ORIGIN)
#define OVL_XATTR_PREFIX XATTR_TRUSTED_PREFIX "overlay."
-#define OVL_XATTR_OPAQUE OVL_XATTR_PREFIX "opaque"
-#define OVL_XATTR_REDIRECT OVL_XATTR_PREFIX "redirect"
-#define OVL_XATTR_ORIGIN OVL_XATTR_PREFIX "origin"
-#define OVL_XATTR_IMPURE OVL_XATTR_PREFIX "impure"
-#define OVL_XATTR_NLINK OVL_XATTR_PREFIX "nlink"
-#define OVL_XATTR_UPPER OVL_XATTR_PREFIX "upper"
-#define OVL_XATTR_METACOPY OVL_XATTR_PREFIX "metacopy"
+
+enum ovl_xattr {
+ OVL_XATTR_OPAQUE,
+ OVL_XATTR_REDIRECT,
+ OVL_XATTR_ORIGIN,
+ OVL_XATTR_IMPURE,
+ OVL_XATTR_NLINK,
+ OVL_XATTR_UPPER,
+ OVL_XATTR_METACOPY,
+};
enum ovl_inode_flag {
/* Pure upper dir that may contain non pure upper entries */
@@ -110,6 +113,12 @@ struct ovl_fh {
#define OVL_FH_FID_OFFSET (OVL_FH_WIRE_OFFSET + \
offsetof(struct ovl_fb, fid))
+extern const char *ovl_xattr_table[];
+static inline const char *ovl_xattr(struct ovl_fs *ofs, enum ovl_xattr ox)
+{
+ return ovl_xattr_table[ox];
+}
+
static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry)
{
int err = vfs_rmdir(dir, dentry);
@@ -170,17 +179,29 @@ static inline int ovl_do_symlink(struct inode *dir, struct dentry *dentry,
return err;
}
-static inline int ovl_do_setxattr(struct dentry *dentry, const char *name,
- const void *value, size_t size, int flags)
+static inline ssize_t ovl_do_getxattr(struct ovl_fs *ofs, struct dentry *dentry,
+ enum ovl_xattr ox, void *value,
+ size_t size)
+{
+ const char *name = ovl_xattr(ofs, ox);
+ return vfs_getxattr(dentry, name, value, size);
+}
+
+static inline int ovl_do_setxattr(struct ovl_fs *ofs, struct dentry *dentry,
+ enum ovl_xattr ox, const void *value,
+ size_t size)
{
- int err = vfs_setxattr(dentry, name, value, size, flags);
- pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, 0x%x) = %i\n",
- dentry, name, min((int)size, 48), value, size, flags, err);
+ const char *name = ovl_xattr(ofs, ox);
+ int err = vfs_setxattr(dentry, name, value, size, 0);
+ pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, 0) = %i\n",
+ dentry, name, min((int)size, 48), value, size, err);
return err;
}
-static inline int ovl_do_removexattr(struct dentry *dentry, const char *name)
+static inline int ovl_do_removexattr(struct ovl_fs *ofs, struct dentry *dentry,
+ enum ovl_xattr ox)
{
+ const char *name = ovl_xattr(ofs, ox);
int err = vfs_removexattr(dentry, name);
pr_debug("removexattr(%pd2, \"%s\") = %i\n", dentry, name, err);
return err;
@@ -280,10 +301,11 @@ struct file *ovl_path_open(struct path *path, int flags);
int ovl_copy_up_start(struct dentry *dentry, int flags);
void ovl_copy_up_end(struct dentry *dentry);
bool ovl_already_copied_up(struct dentry *dentry, int flags);
-bool ovl_check_origin_xattr(struct dentry *dentry);
-bool ovl_check_dir_xattr(struct dentry *dentry, const char *name);
+bool ovl_check_origin_xattr(struct ovl_fs *ofs, struct dentry *dentry);
+bool ovl_check_dir_xattr(struct super_block *sb, struct dentry *dentry,
+ enum ovl_xattr ox);
int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
- const char *name, const void *value, size_t size,
+ enum ovl_xattr ox, const void *value, size_t size,
int xerr);
int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry);
void ovl_set_flag(unsigned long flag, struct inode *inode);
@@ -296,15 +318,15 @@ bool ovl_need_index(struct dentry *dentry);
int ovl_nlink_start(struct dentry *dentry);
void ovl_nlink_end(struct dentry *dentry);
int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir);
-int ovl_check_metacopy_xattr(struct dentry *dentry);
+int ovl_check_metacopy_xattr(struct ovl_fs *ofs, struct dentry *dentry);
bool ovl_is_metacopy_dentry(struct dentry *dentry);
-char *ovl_get_redirect_xattr(struct dentry *dentry, int padding);
-ssize_t ovl_getxattr(struct dentry *dentry, char *name, char **value,
- size_t padding);
+char *ovl_get_redirect_xattr(struct ovl_fs *ofs, struct dentry *dentry,
+ int padding);
-static inline bool ovl_is_impuredir(struct dentry *dentry)
+static inline bool ovl_is_impuredir(struct super_block *sb,
+ struct dentry *dentry)
{
- return ovl_check_dir_xattr(dentry, OVL_XATTR_IMPURE);
+ return ovl_check_dir_xattr(sb, dentry, OVL_XATTR_IMPURE);
}
/*
@@ -365,8 +387,9 @@ struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt,
bool connected);
int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
struct dentry *upperdentry, struct ovl_path **stackp);
-int ovl_verify_set_fh(struct dentry *dentry, const char *name,
- struct dentry *real, bool is_upper, bool set);
+int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
+ enum ovl_xattr ox, struct dentry *real, bool is_upper,
+ bool set);
struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index);
int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index);
int ovl_get_index_name(struct dentry *origin, struct qstr *name);
@@ -378,20 +401,22 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags);
bool ovl_lower_positive(struct dentry *dentry);
-static inline int ovl_verify_origin(struct dentry *upper,
+static inline int ovl_verify_origin(struct ovl_fs *ofs, struct dentry *upper,
struct dentry *origin, bool set)
{
- return ovl_verify_set_fh(upper, OVL_XATTR_ORIGIN, origin, false, set);
+ return ovl_verify_set_fh(ofs, upper, OVL_XATTR_ORIGIN, origin,
+ false, set);
}
-static inline int ovl_verify_upper(struct dentry *index,
- struct dentry *upper, bool set)
+static inline int ovl_verify_upper(struct ovl_fs *ofs, struct dentry *index,
+ struct dentry *upper, bool set)
{
- return ovl_verify_set_fh(index, OVL_XATTR_UPPER, upper, true, set);
+ return ovl_verify_set_fh(ofs, index, OVL_XATTR_UPPER, upper, true, set);
}
/* readdir.c */
extern const struct file_operations ovl_dir_operations;
+struct file *ovl_dir_real_file(const struct file *file, bool want_upper);
int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list);
void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list);
void ovl_cache_free(struct list_head *list);
@@ -404,7 +429,7 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs);
/* inode.c */
int ovl_set_nlink_upper(struct dentry *dentry);
int ovl_set_nlink_lower(struct dentry *dentry);
-unsigned int ovl_get_nlink(struct dentry *lowerdentry,
+unsigned int ovl_get_nlink(struct ovl_fs *ofs, struct dentry *lowerdentry,
struct dentry *upperdentry,
unsigned int fallback);
int ovl_setattr(struct dentry *dentry, struct iattr *attr);
@@ -418,7 +443,7 @@ int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
struct posix_acl *ovl_get_acl(struct inode *inode, int type);
int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags);
-bool ovl_is_private_xattr(const char *name);
+bool ovl_is_private_xattr(struct super_block *sb, const char *name);
struct ovl_inode_params {
struct inode *newinode;
@@ -479,12 +504,15 @@ struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr);
extern const struct file_operations ovl_file_operations;
int __init ovl_aio_request_cache_init(void);
void ovl_aio_request_cache_destroy(void);
+long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+long ovl_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
/* copy_up.c */
int ovl_copy_up(struct dentry *dentry);
int ovl_copy_up_with_data(struct dentry *dentry);
int ovl_maybe_copy_up(struct dentry *dentry, int flags);
-int ovl_copy_xattr(struct dentry *old, struct dentry *new);
+int ovl_copy_xattr(struct super_block *sb, struct dentry *old,
+ struct dentry *new);
int ovl_set_attr(struct dentry *upper, struct kstat *stat);
struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper);
int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index b429c80879ee..1b5a2094df8e 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -17,6 +17,7 @@ struct ovl_config {
bool nfs_export;
int xino;
bool metacopy;
+ bool ovl_volatile;
};
struct ovl_sb {
@@ -90,6 +91,11 @@ static inline struct ovl_fs *OVL_FS(struct super_block *sb)
return (struct ovl_fs *)sb->s_fs_info;
}
+static inline bool ovl_should_sync(struct ovl_fs *ofs)
+{
+ return !ofs->config.ovl_volatile;
+}
+
/* private information held for every overlayfs dentry */
struct ovl_entry {
union {
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 6918b98faeb6..01620ebae1bd 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -606,6 +606,7 @@ static struct ovl_dir_cache *ovl_cache_get_impure(struct path *path)
{
int res;
struct dentry *dentry = path->dentry;
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
struct ovl_dir_cache *cache;
cache = ovl_dir_cache(d_inode(dentry));
@@ -632,7 +633,7 @@ static struct ovl_dir_cache *ovl_cache_get_impure(struct path *path)
* Removing the "impure" xattr is best effort.
*/
if (!ovl_want_write(dentry)) {
- ovl_do_removexattr(ovl_dentry_upper(dentry),
+ ovl_do_removexattr(ofs, ovl_dentry_upper(dentry),
OVL_XATTR_IMPURE);
ovl_drop_write(dentry);
}
@@ -839,7 +840,7 @@ out_unlock:
return res;
}
-static struct file *ovl_dir_open_realfile(struct file *file,
+static struct file *ovl_dir_open_realfile(const struct file *file,
struct path *realpath)
{
struct file *res;
@@ -852,16 +853,22 @@ static struct file *ovl_dir_open_realfile(struct file *file,
return res;
}
-static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
- int datasync)
+/*
+ * Like ovl_real_fdget(), returns upperfile if dir was copied up since open.
+ * Unlike ovl_real_fdget(), this caches upperfile in file->private_data.
+ *
+ * TODO: use same abstract type for file->private_data of dir and file so
+ * upperfile could also be cached for files as well.
+ */
+struct file *ovl_dir_real_file(const struct file *file, bool want_upper)
{
+
struct ovl_dir_file *od = file->private_data;
struct dentry *dentry = file->f_path.dentry;
struct file *realfile = od->realfile;
- /* Nothing to sync for lower */
if (!OVL_TYPE_UPPER(ovl_path_type(dentry)))
- return 0;
+ return want_upper ? NULL : realfile;
/*
* Need to check if we started out being a lower dir, but got copied up
@@ -880,7 +887,7 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
if (!od->upperfile) {
if (IS_ERR(realfile)) {
inode_unlock(inode);
- return PTR_ERR(realfile);
+ return realfile;
}
smp_store_release(&od->upperfile, realfile);
} else {
@@ -893,6 +900,25 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
}
}
+ return realfile;
+}
+
+static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
+ int datasync)
+{
+ struct file *realfile;
+ int err;
+
+ if (!ovl_should_sync(OVL_FS(file->f_path.dentry->d_sb)))
+ return 0;
+
+ realfile = ovl_dir_real_file(file, true);
+ err = PTR_ERR_OR_ZERO(realfile);
+
+ /* Nothing to sync for lower */
+ if (!realfile || err)
+ return err;
+
return vfs_fsync_range(realfile, start, end, datasync);
}
@@ -945,6 +971,10 @@ const struct file_operations ovl_dir_operations = {
.llseek = ovl_dir_llseek,
.fsync = ovl_dir_fsync,
.release = ovl_dir_release,
+ .unlocked_ioctl = ovl_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = ovl_compat_ioctl,
+#endif
};
int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
@@ -1051,7 +1081,9 @@ int ovl_check_d_type_supported(struct path *realpath)
return rdd.d_type_supported;
}
-static void ovl_workdir_cleanup_recurse(struct path *path, int level)
+#define OVL_INCOMPATDIR_NAME "incompat"
+
+static int ovl_workdir_cleanup_recurse(struct path *path, int level)
{
int err;
struct inode *dir = path->dentry->d_inode;
@@ -1065,6 +1097,19 @@ static void ovl_workdir_cleanup_recurse(struct path *path, int level)
.root = &root,
.is_lowest = false,
};
+ bool incompat = false;
+
+ /*
+ * The "work/incompat" directory is treated specially - if it is not
+ * empty, instead of printing a generic error and mounting read-only,
+ * we will error about incompat features and fail the mount.
+ *
+ * When called from ovl_indexdir_cleanup(), path->dentry->d_name.name
+ * starts with '#'.
+ */
+ if (level == 2 &&
+ !strcmp(path->dentry->d_name.name, OVL_INCOMPATDIR_NAME))
+ incompat = true;
err = ovl_dir_read(path, &rdd);
if (err)
@@ -1079,17 +1124,25 @@ static void ovl_workdir_cleanup_recurse(struct path *path, int level)
continue;
if (p->len == 2 && p->name[1] == '.')
continue;
+ } else if (incompat) {
+ pr_err("overlay with incompat feature '%s' cannot be mounted\n",
+ p->name);
+ err = -EINVAL;
+ break;
}
dentry = lookup_one_len(p->name, path->dentry, p->len);
if (IS_ERR(dentry))
continue;
if (dentry->d_inode)
- ovl_workdir_cleanup(dir, path->mnt, dentry, level);
+ err = ovl_workdir_cleanup(dir, path->mnt, dentry, level);
dput(dentry);
+ if (err)
+ break;
}
inode_unlock(dir);
out:
ovl_cache_free(&list);
+ return err;
}
int ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
@@ -1106,9 +1159,10 @@ int ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
struct path path = { .mnt = mnt, .dentry = dentry };
inode_unlock(dir);
- ovl_workdir_cleanup_recurse(&path, level + 1);
+ err = ovl_workdir_cleanup_recurse(&path, level + 1);
inode_lock_nested(dir, I_MUTEX_PARENT);
- err = ovl_cleanup(dir, dentry);
+ if (!err)
+ err = ovl_cleanup(dir, dentry);
}
return err;
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 4b38141c2985..290983bcfbb3 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -264,6 +264,8 @@ static int ovl_sync_fs(struct super_block *sb, int wait)
if (!ovl_upper_mnt(ofs))
return 0;
+ if (!ovl_should_sync(ofs))
+ return 0;
/*
* Not called for sync(2) call or an emergency sync (SB_I_SKIP_SYNC).
* All the super blocks will be iterated, including upper_sb.
@@ -362,6 +364,8 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
if (ofs->config.metacopy != ovl_metacopy_def)
seq_printf(m, ",metacopy=%s",
ofs->config.metacopy ? "on" : "off");
+ if (ofs->config.ovl_volatile)
+ seq_puts(m, ",volatile");
return 0;
}
@@ -376,9 +380,11 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data)
if (*flags & SB_RDONLY && !sb_rdonly(sb)) {
upper_sb = ovl_upper_mnt(ofs)->mnt_sb;
- down_read(&upper_sb->s_umount);
- ret = sync_filesystem(upper_sb);
- up_read(&upper_sb->s_umount);
+ if (ovl_should_sync(ofs)) {
+ down_read(&upper_sb->s_umount);
+ ret = sync_filesystem(upper_sb);
+ up_read(&upper_sb->s_umount);
+ }
}
return ret;
@@ -411,6 +417,7 @@ enum {
OPT_XINO_AUTO,
OPT_METACOPY_ON,
OPT_METACOPY_OFF,
+ OPT_VOLATILE,
OPT_ERR,
};
@@ -429,6 +436,7 @@ static const match_table_t ovl_tokens = {
{OPT_XINO_AUTO, "xino=auto"},
{OPT_METACOPY_ON, "metacopy=on"},
{OPT_METACOPY_OFF, "metacopy=off"},
+ {OPT_VOLATILE, "volatile"},
{OPT_ERR, NULL}
};
@@ -573,6 +581,10 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
metacopy_opt = true;
break;
+ case OPT_VOLATILE:
+ config->ovl_volatile = true;
+ break;
+
default:
pr_err("unrecognized mount option \"%s\" or missing value\n",
p);
@@ -595,6 +607,11 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
config->index = false;
}
+ if (!config->upperdir && config->ovl_volatile) {
+ pr_info("option \"volatile\" is meaningless in a non-upper mount, ignoring it.\n");
+ config->ovl_volatile = false;
+ }
+
err = ovl_parse_redirect_mode(config, config->redirect_mode);
if (err)
return err;
@@ -705,8 +722,12 @@ retry:
goto out_unlock;
retried = true;
- ovl_workdir_cleanup(dir, mnt, work, 0);
+ err = ovl_workdir_cleanup(dir, mnt, work, 0);
dput(work);
+ if (err == -EINVAL) {
+ work = ERR_PTR(err);
+ goto out_unlock;
+ }
goto retry;
}
@@ -1199,11 +1220,50 @@ out_unlock:
return err;
}
+static struct dentry *ovl_lookup_or_create(struct dentry *parent,
+ const char *name, umode_t mode)
+{
+ size_t len = strlen(name);
+ struct dentry *child;
+
+ inode_lock_nested(parent->d_inode, I_MUTEX_PARENT);
+ child = lookup_one_len(name, parent, len);
+ if (!IS_ERR(child) && !child->d_inode)
+ child = ovl_create_real(parent->d_inode, child,
+ OVL_CATTR(mode));
+ inode_unlock(parent->d_inode);
+ dput(parent);
+
+ return child;
+}
+
+/*
+ * Creates $workdir/work/incompat/volatile/dirty file if it is not already
+ * present.
+ */
+static int ovl_create_volatile_dirty(struct ovl_fs *ofs)
+{
+ unsigned int ctr;
+ struct dentry *d = dget(ofs->workbasedir);
+ static const char *const volatile_path[] = {
+ OVL_WORKDIR_NAME, "incompat", "volatile", "dirty"
+ };
+ const char *const *name = volatile_path;
+
+ for (ctr = ARRAY_SIZE(volatile_path); ctr; ctr--, name++) {
+ d = ovl_lookup_or_create(d, *name, ctr > 1 ? S_IFDIR : S_IFREG);
+ if (IS_ERR(d))
+ return PTR_ERR(d);
+ }
+ dput(d);
+ return 0;
+}
+
static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
struct path *workpath)
{
struct vfsmount *mnt = ovl_upper_mnt(ofs);
- struct dentry *temp;
+ struct dentry *temp, *workdir;
bool rename_whiteout;
bool d_type;
int fh_type;
@@ -1213,10 +1273,13 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
if (err)
return err;
- ofs->workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false);
- if (!ofs->workdir)
+ workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false);
+ err = PTR_ERR(workdir);
+ if (IS_ERR_OR_NULL(workdir))
goto out;
+ ofs->workdir = workdir;
+
err = ovl_setup_trap(sb, ofs->workdir, &ofs->workdir_trap, "workdir");
if (err)
goto out;
@@ -1256,7 +1319,7 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
/*
* Check if upper/work fs supports trusted.overlay.* xattr
*/
- err = ovl_do_setxattr(ofs->workdir, OVL_XATTR_OPAQUE, "0", 1, 0);
+ err = ovl_do_setxattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE, "0", 1);
if (err) {
ofs->noxattr = true;
ofs->config.index = false;
@@ -1264,7 +1327,7 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
pr_warn("upper fs does not support xattr, falling back to index=off and metacopy=off.\n");
err = 0;
} else {
- vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE);
+ ovl_do_removexattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE);
}
/*
@@ -1279,6 +1342,18 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
goto out;
}
+ /*
+ * For volatile mount, create a incompat/volatile/dirty file to keep
+ * track of it.
+ */
+ if (ofs->config.ovl_volatile) {
+ err = ovl_create_volatile_dirty(ofs);
+ if (err < 0) {
+ pr_err("Failed to create volatile/dirty file.\n");
+ goto out;
+ }
+ }
+
/* Check if upper/work fs supports file handles */
fh_type = ovl_can_decode_fh(ofs->workdir->d_sb);
if (ofs->config.index && !fh_type) {
@@ -1347,6 +1422,7 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
struct ovl_entry *oe, struct path *upperpath)
{
struct vfsmount *mnt = ovl_upper_mnt(ofs);
+ struct dentry *indexdir;
int err;
err = mnt_want_write(mnt);
@@ -1354,8 +1430,8 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
return err;
/* Verify lower root is upper root origin */
- err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry,
- true);
+ err = ovl_verify_origin(ofs, upperpath->dentry,
+ oe->lowerstack[0].dentry, true);
if (err) {
pr_err("failed to verify upper root origin\n");
goto out;
@@ -1366,9 +1442,12 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
ofs->workdir_trap = NULL;
dput(ofs->workdir);
ofs->workdir = NULL;
- ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true);
- if (ofs->indexdir) {
- ofs->workdir = dget(ofs->indexdir);
+ indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true);
+ if (IS_ERR(indexdir)) {
+ err = PTR_ERR(indexdir);
+ } else if (indexdir) {
+ ofs->indexdir = indexdir;
+ ofs->workdir = dget(indexdir);
err = ovl_setup_trap(sb, ofs->indexdir, &ofs->indexdir_trap,
"indexdir");
@@ -1383,13 +1462,15 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
* "trusted.overlay.upper" to indicate that index may have
* directory entries.
*/
- if (ovl_check_origin_xattr(ofs->indexdir)) {
- err = ovl_verify_set_fh(ofs->indexdir, OVL_XATTR_ORIGIN,
+ if (ovl_check_origin_xattr(ofs, ofs->indexdir)) {
+ err = ovl_verify_set_fh(ofs, ofs->indexdir,
+ OVL_XATTR_ORIGIN,
upperpath->dentry, true, false);
if (err)
pr_err("failed to verify index dir 'origin' xattr\n");
}
- err = ovl_verify_upper(ofs->indexdir, upperpath->dentry, true);
+ err = ovl_verify_upper(ofs, ofs->indexdir, upperpath->dentry,
+ true);
if (err)
pr_err("failed to verify index dir 'upper' xattr\n");
@@ -1755,7 +1836,7 @@ static struct dentry *ovl_get_root(struct super_block *sb,
ino = d_inode(upperdentry)->i_ino;
fsid = 0;
ovl_dentry_set_upper_alias(root);
- if (ovl_is_impuredir(upperdentry))
+ if (ovl_is_impuredir(sb, upperdentry))
ovl_set_flag(OVL_IMPURE, d_inode(root));
}
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 56c1f89f20c9..23f475627d07 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -544,11 +544,11 @@ void ovl_copy_up_end(struct dentry *dentry)
ovl_inode_unlock(d_inode(dentry));
}
-bool ovl_check_origin_xattr(struct dentry *dentry)
+bool ovl_check_origin_xattr(struct ovl_fs *ofs, struct dentry *dentry)
{
int res;
- res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0);
+ res = ovl_do_getxattr(ofs, dentry, OVL_XATTR_ORIGIN, NULL, 0);
/* Zero size value means "copied up but origin unknown" */
if (res >= 0)
@@ -557,7 +557,8 @@ bool ovl_check_origin_xattr(struct dentry *dentry)
return false;
}
-bool ovl_check_dir_xattr(struct dentry *dentry, const char *name)
+bool ovl_check_dir_xattr(struct super_block *sb, struct dentry *dentry,
+ enum ovl_xattr ox)
{
int res;
char val;
@@ -565,15 +566,36 @@ bool ovl_check_dir_xattr(struct dentry *dentry, const char *name)
if (!d_is_dir(dentry))
return false;
- res = vfs_getxattr(dentry, name, &val, 1);
+ res = ovl_do_getxattr(OVL_FS(sb), dentry, ox, &val, 1);
if (res == 1 && val == 'y')
return true;
return false;
}
+#define OVL_XATTR_OPAQUE_POSTFIX "opaque"
+#define OVL_XATTR_REDIRECT_POSTFIX "redirect"
+#define OVL_XATTR_ORIGIN_POSTFIX "origin"
+#define OVL_XATTR_IMPURE_POSTFIX "impure"
+#define OVL_XATTR_NLINK_POSTFIX "nlink"
+#define OVL_XATTR_UPPER_POSTFIX "upper"
+#define OVL_XATTR_METACOPY_POSTFIX "metacopy"
+
+#define OVL_XATTR_TAB_ENTRY(x) \
+ [x] = OVL_XATTR_PREFIX x ## _POSTFIX
+
+const char *ovl_xattr_table[] = {
+ OVL_XATTR_TAB_ENTRY(OVL_XATTR_OPAQUE),
+ OVL_XATTR_TAB_ENTRY(OVL_XATTR_REDIRECT),
+ OVL_XATTR_TAB_ENTRY(OVL_XATTR_ORIGIN),
+ OVL_XATTR_TAB_ENTRY(OVL_XATTR_IMPURE),
+ OVL_XATTR_TAB_ENTRY(OVL_XATTR_NLINK),
+ OVL_XATTR_TAB_ENTRY(OVL_XATTR_UPPER),
+ OVL_XATTR_TAB_ENTRY(OVL_XATTR_METACOPY),
+};
+
int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
- const char *name, const void *value, size_t size,
+ enum ovl_xattr ox, const void *value, size_t size,
int xerr)
{
int err;
@@ -582,10 +604,10 @@ int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
if (ofs->noxattr)
return xerr;
- err = ovl_do_setxattr(upperdentry, name, value, size, 0);
+ err = ovl_do_setxattr(ofs, upperdentry, ox, value, size);
if (err == -EOPNOTSUPP) {
- pr_warn("cannot set %s xattr on upper\n", name);
+ pr_warn("cannot set %s xattr on upper\n", ovl_xattr(ofs, ox));
ofs->noxattr = true;
return xerr;
}
@@ -845,7 +867,7 @@ err:
}
/* err < 0, 0 if no metacopy xattr, 1 if metacopy xattr found */
-int ovl_check_metacopy_xattr(struct dentry *dentry)
+int ovl_check_metacopy_xattr(struct ovl_fs *ofs, struct dentry *dentry)
{
int res;
@@ -853,7 +875,7 @@ int ovl_check_metacopy_xattr(struct dentry *dentry)
if (!S_ISREG(d_inode(dentry)->i_mode))
return 0;
- res = vfs_getxattr(dentry, OVL_XATTR_METACOPY, NULL, 0);
+ res = ovl_do_getxattr(ofs, dentry, OVL_XATTR_METACOPY, NULL, 0);
if (res < 0) {
if (res == -ENODATA || res == -EOPNOTSUPP)
return 0;
@@ -882,49 +904,27 @@ bool ovl_is_metacopy_dentry(struct dentry *dentry)
return (oe->numlower > 1);
}
-ssize_t ovl_getxattr(struct dentry *dentry, char *name, char **value,
- size_t padding)
-{
- ssize_t res;
- char *buf = NULL;
-
- res = vfs_getxattr(dentry, name, NULL, 0);
- if (res < 0) {
- if (res == -ENODATA || res == -EOPNOTSUPP)
- return -ENODATA;
- goto fail;
- }
-
- if (res != 0) {
- buf = kzalloc(res + padding, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- res = vfs_getxattr(dentry, name, buf, res);
- if (res < 0)
- goto fail;
- }
- *value = buf;
-
- return res;
-
-fail:
- pr_warn_ratelimited("failed to get xattr %s: err=%zi)\n",
- name, res);
- kfree(buf);
- return res;
-}
-
-char *ovl_get_redirect_xattr(struct dentry *dentry, int padding)
+char *ovl_get_redirect_xattr(struct ovl_fs *ofs, struct dentry *dentry,
+ int padding)
{
int res;
char *s, *next, *buf = NULL;
- res = ovl_getxattr(dentry, OVL_XATTR_REDIRECT, &buf, padding + 1);
- if (res == -ENODATA)
+ res = ovl_do_getxattr(ofs, dentry, OVL_XATTR_REDIRECT, NULL, 0);
+ if (res == -ENODATA || res == -EOPNOTSUPP)
return NULL;
if (res < 0)
- return ERR_PTR(res);
+ goto fail;
+ if (res == 0)
+ goto invalid;
+
+ buf = kzalloc(res + padding + 1, GFP_KERNEL);
+ if (!buf)
+ return ERR_PTR(-ENOMEM);
+
+ res = ovl_do_getxattr(ofs, dentry, OVL_XATTR_REDIRECT, buf, res);
+ if (res < 0)
+ goto fail;
if (res == 0)
goto invalid;
@@ -943,6 +943,10 @@ char *ovl_get_redirect_xattr(struct dentry *dentry, int padding)
invalid:
pr_warn_ratelimited("invalid redirect (%s)\n", buf);
res = -EINVAL;
+ goto err_free;
+fail:
+ pr_warn_ratelimited("failed to get redirect (%i)\n", res);
+err_free:
kfree(buf);
return ERR_PTR(res);
}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index aa69c35d904c..0f707003dda5 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1268,6 +1268,10 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
kuid_t kloginuid;
int rv;
+ /* Don't let kthreads write their own loginuid */
+ if (current->flags & PF_KTHREAD)
+ return -EPERM;
+
rcu_read_lock();
if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) {
rcu_read_unlock();
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 846d43df3fdf..217aa2705d5d 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1244,24 +1244,6 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
count = -EINTR;
goto out_mm;
}
- /*
- * Avoid to modify vma->vm_flags
- * without locked ops while the
- * coredump reads the vm_flags.
- */
- if (!mmget_still_valid(mm)) {
- /*
- * Silently return "count"
- * like if get_task_mm()
- * failed. FIXME: should this
- * function have returned
- * -ESRCH if get_task_mm()
- * failed like if
- * get_proc_task() fails?
- */
- mmap_write_unlock(mm);
- goto out_mm;
- }
for (vma = mm->mmap; vma; vma = vma->vm_next) {
vma->vm_flags &= ~VM_SOFTDIRTY;
vma_set_page_prot(vma);
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 414695454956..355523f4a4bf 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -224,7 +224,7 @@ static unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
if (!pages)
goto out_free;
- nr = find_get_pages(inode->i_mapping, &pgoff, lpages, pages);
+ nr = find_get_pages_contig(inode->i_mapping, pgoff, lpages, pages);
if (nr != lpages)
goto out_free_pages; /* leave if some pages were missing */
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index e582d001f792..b1b7d3f5752f 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -356,6 +356,7 @@ static struct inode *romfs_iget(struct super_block *sb, unsigned long pos)
}
i->i_mode = mode;
+ i->i_blocks = (i->i_size + 511) >> 9;
unlock_new_inode(i);
return i;
diff --git a/fs/ubifs/auth.c b/fs/ubifs/auth.c
index cc5c0abfd536..b93b3cd10bfd 100644
--- a/fs/ubifs/auth.c
+++ b/fs/ubifs/auth.c
@@ -54,7 +54,7 @@ static int ubifs_hash_calc_hmac(const struct ubifs_info *c, const u8 *hash,
* ubifs_prepare_auth_node - Prepare an authentication node
* @c: UBIFS file-system description object
* @node: the node to calculate a hash for
- * @hash: input hash of previous nodes
+ * @inhash: input hash of previous nodes
*
* This function prepares an authentication node for writing onto flash.
* It creates a HMAC from the given input hash and writes it to the node.
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 31288d8fa2ce..ebff43f8009c 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -1123,6 +1123,7 @@ int dbg_check_dir(struct ubifs_info *c, const struct inode *dir)
err = PTR_ERR(dent);
if (err == -ENOENT)
break;
+ kfree(pdent);
return err;
}
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index 62cb3db44e6e..a4aaeea63893 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -57,10 +57,6 @@
/**
* switch_gc_head - switch the garbage collection journal head.
* @c: UBIFS file-system description object
- * @buf: buffer to write
- * @len: length of the buffer to write
- * @lnum: LEB number written is returned here
- * @offs: offset written is returned here
*
* This function switch the GC head to the next LEB which is reserved in
* @c->gc_lnum. Returns %0 in case of success, %-EAGAIN if commit is required,
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
index 3df9be2c684c..4363d85a3fd4 100644
--- a/fs/ubifs/ioctl.c
+++ b/fs/ubifs/ioctl.c
@@ -134,7 +134,6 @@ static int setflags(struct inode *inode, int flags)
return err;
out_unlock:
- ubifs_err(c, "can't modify inode %lu attributes", inode->i_ino);
mutex_unlock(&ui->ui_mutex);
ubifs_release_budget(c, &req);
return err;
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 4a5b06f8d812..091c2ad8f211 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -894,6 +894,7 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
if (err == -ENOENT)
break;
+ kfree(pxent);
goto out_release;
}
@@ -906,6 +907,7 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
ubifs_err(c, "dead directory entry '%s', error %d",
xent->name, err);
ubifs_ro_mode(c, err);
+ kfree(pxent);
kfree(xent);
goto out_release;
}
@@ -936,8 +938,6 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
inode->i_ino);
release_head(c, BASEHD);
- ubifs_add_auth_dirt(c, lnum);
-
if (last_reference) {
err = ubifs_tnc_remove_ino(c, inode->i_ino);
if (err)
@@ -947,6 +947,8 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
} else {
union ubifs_key key;
+ ubifs_add_auth_dirt(c, lnum);
+
ino_key_init(c, &key, inode->i_ino);
err = ubifs_tnc_add(c, &key, lnum, offs, ilen, hash);
}
@@ -1798,7 +1800,6 @@ int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode,
u8 hash[UBIFS_HASH_ARR_SZ];
dbg_jnl("ino %lu, ino %lu", host->i_ino, inode->i_ino);
- ubifs_assert(c, host->i_nlink > 0);
ubifs_assert(c, inode->i_nlink > 0);
ubifs_assert(c, mutex_is_locked(&host_ui->ui_mutex));
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index 2c294085ffed..0fb61956146d 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -173,6 +173,7 @@ int ubifs_add_orphan(struct ubifs_info *c, ino_t inum)
err = PTR_ERR(xent);
if (err == -ENOENT)
break;
+ kfree(pxent);
return err;
}
@@ -182,6 +183,7 @@ int ubifs_add_orphan(struct ubifs_info *c, ino_t inum)
xattr_orphan = orphan_add(c, xattr_inum, orphan);
if (IS_ERR(xattr_orphan)) {
+ kfree(pxent);
kfree(xent);
return PTR_ERR(xattr_orphan);
}
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index b69ffac7e415..2f8d8f4f411a 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -931,8 +931,6 @@ out:
* validate_ref - validate a reference node.
* @c: UBIFS file-system description object
* @ref: the reference node to validate
- * @ref_lnum: LEB number of the reference node
- * @ref_offs: reference node offset
*
* This function returns %1 if a bud reference already exists for the LEB. %0 is
* returned if the reference node is new, otherwise %-EINVAL is returned if
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index fbddb2a1c03f..cb3acfb7dd1f 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1110,14 +1110,20 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options,
break;
}
case Opt_auth_key:
- c->auth_key_name = kstrdup(args[0].from, GFP_KERNEL);
- if (!c->auth_key_name)
- return -ENOMEM;
+ if (!is_remount) {
+ c->auth_key_name = kstrdup(args[0].from,
+ GFP_KERNEL);
+ if (!c->auth_key_name)
+ return -ENOMEM;
+ }
break;
case Opt_auth_hash_name:
- c->auth_hash_name = kstrdup(args[0].from, GFP_KERNEL);
- if (!c->auth_hash_name)
- return -ENOMEM;
+ if (!is_remount) {
+ c->auth_hash_name = kstrdup(args[0].from,
+ GFP_KERNEL);
+ if (!c->auth_hash_name)
+ return -ENOMEM;
+ }
break;
case Opt_ignore:
break;
@@ -1141,6 +1147,18 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options,
return 0;
}
+/*
+ * ubifs_release_options - release mount parameters which have been dumped.
+ * @c: UBIFS file-system description object
+ */
+static void ubifs_release_options(struct ubifs_info *c)
+{
+ kfree(c->auth_key_name);
+ c->auth_key_name = NULL;
+ kfree(c->auth_hash_name);
+ c->auth_hash_name = NULL;
+}
+
/**
* destroy_journal - destroy journal data structures.
* @c: UBIFS file-system description object
@@ -1313,7 +1331,7 @@ static int mount_ubifs(struct ubifs_info *c)
err = ubifs_read_superblock(c);
if (err)
- goto out_free;
+ goto out_auth;
c->probing = 0;
@@ -1325,18 +1343,18 @@ static int mount_ubifs(struct ubifs_info *c)
ubifs_err(c, "'compressor \"%s\" is not compiled in",
ubifs_compr_name(c, c->default_compr));
err = -ENOTSUPP;
- goto out_free;
+ goto out_auth;
}
err = init_constants_sb(c);
if (err)
- goto out_free;
+ goto out_auth;
sz = ALIGN(c->max_idx_node_sz, c->min_io_size) * 2;
c->cbuf = kmalloc(sz, GFP_NOFS);
if (!c->cbuf) {
err = -ENOMEM;
- goto out_free;
+ goto out_auth;
}
err = alloc_wbufs(c);
@@ -1611,6 +1629,8 @@ out_wbufs:
free_wbufs(c);
out_cbuf:
kfree(c->cbuf);
+out_auth:
+ ubifs_exit_authentication(c);
out_free:
kfree(c->write_reserve_buf);
kfree(c->bu.buf);
@@ -1650,8 +1670,7 @@ static void ubifs_umount(struct ubifs_info *c)
ubifs_lpt_free(c, 0);
ubifs_exit_authentication(c);
- kfree(c->auth_key_name);
- kfree(c->auth_hash_name);
+ ubifs_release_options(c);
kfree(c->cbuf);
kfree(c->rcvrd_mst_node);
kfree(c->mst_node);
@@ -2221,6 +2240,7 @@ out_umount:
out_unlock:
mutex_unlock(&c->umount_mutex);
out_close:
+ ubifs_release_options(c);
ubi_close_volume(c->ubi);
out:
return err;
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index f609f6cdde70..894f1ab14616 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -360,7 +360,6 @@ static int lnc_add_directly(struct ubifs_info *c, struct ubifs_zbranch *zbr,
/**
* lnc_free - remove a leaf node from the leaf node cache.
* @zbr: zbranch of leaf node
- * @node: leaf node
*/
static void lnc_free(struct ubifs_zbranch *zbr)
{
@@ -2885,6 +2884,7 @@ int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum)
err = PTR_ERR(xent);
if (err == -ENOENT)
break;
+ kfree(pxent);
return err;
}
@@ -2898,6 +2898,7 @@ int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum)
fname_len(&nm) = le16_to_cpu(xent->nlen);
err = ubifs_tnc_remove_nm(c, &key1, &nm);
if (err) {
+ kfree(pxent);
kfree(xent);
return err;
}
@@ -2906,6 +2907,7 @@ int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum)
highest_ino_key(c, &key2, xattr_inum);
err = ubifs_tnc_remove_range(c, &key1, &key2);
if (err) {
+ kfree(pxent);
kfree(xent);
return err;
}
@@ -3466,7 +3468,7 @@ out_unlock:
/**
* dbg_check_inode_size - check if inode size is correct.
* @c: UBIFS file-system description object
- * @inum: inode number
+ * @inode: inode to check
* @size: inode size
*
* This function makes sure that the inode size (@size) is correct and it does
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 9aefbb60074f..a0b9b349efe6 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -522,6 +522,7 @@ int ubifs_purge_xattrs(struct inode *host)
xent->name, err);
ubifs_ro_mode(c, err);
kfree(pxent);
+ kfree(xent);
return err;
}
@@ -531,6 +532,7 @@ int ubifs_purge_xattrs(struct inode *host)
err = remove_xattr(c, host, xino, &nm);
if (err) {
kfree(pxent);
+ kfree(xent);
iput(xino);
ubifs_err(c, "cannot remove xattr, error %d", err);
return err;
diff --git a/fs/unicode/utf8-core.c b/fs/unicode/utf8-core.c
index 2a878b739115..dc25823bfed9 100644
--- a/fs/unicode/utf8-core.c
+++ b/fs/unicode/utf8-core.c
@@ -6,6 +6,7 @@
#include <linux/parser.h>
#include <linux/errno.h>
#include <linux/unicode.h>
+#include <linux/stringhash.h>
#include "utf8n.h"
@@ -122,9 +123,29 @@ int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
}
return -EINVAL;
}
-
EXPORT_SYMBOL(utf8_casefold);
+int utf8_casefold_hash(const struct unicode_map *um, const void *salt,
+ struct qstr *str)
+{
+ const struct utf8data *data = utf8nfdicf(um->version);
+ struct utf8cursor cur;
+ int c;
+ unsigned long hash = init_name_hash(salt);
+
+ if (utf8ncursor(&cur, data, str->name, str->len) < 0)
+ return -EINVAL;
+
+ while ((c = utf8byte(&cur))) {
+ if (c < 0)
+ return -EINVAL;
+ hash = partial_name_hash((unsigned char)c, hash);
+ }
+ str->hash = end_name_hash(hash);
+ return 0;
+}
+EXPORT_SYMBOL(utf8_casefold_hash);
+
int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
unsigned char *dest, size_t dlen)
{
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 0e4a3837da52..000b457ad087 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -601,8 +601,6 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
/* the various vma->vm_userfaultfd_ctx still points to it */
mmap_write_lock(mm);
- /* no task can run (and in turn coredump) yet */
- VM_WARN_ON(!mmget_still_valid(mm));
for (vma = mm->mmap; vma; vma = vma->vm_next)
if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) {
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
@@ -842,7 +840,6 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
/* len == 0 means wake all */
struct userfaultfd_wake_range range = { .len = 0, };
unsigned long new_flags;
- bool still_valid;
WRITE_ONCE(ctx->released, true);
@@ -858,7 +855,6 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
* taking the mmap_lock for writing.
*/
mmap_write_lock(mm);
- still_valid = mmget_still_valid(mm);
prev = NULL;
for (vma = mm->mmap; vma; vma = vma->vm_next) {
cond_resched();
@@ -869,17 +865,15 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
continue;
}
new_flags = vma->vm_flags & ~(VM_UFFD_MISSING | VM_UFFD_WP);
- if (still_valid) {
- prev = vma_merge(mm, prev, vma->vm_start, vma->vm_end,
- new_flags, vma->anon_vma,
- vma->vm_file, vma->vm_pgoff,
- vma_policy(vma),
- NULL_VM_UFFD_CTX);
- if (prev)
- vma = prev;
- else
- prev = vma;
- }
+ prev = vma_merge(mm, prev, vma->vm_start, vma->vm_end,
+ new_flags, vma->anon_vma,
+ vma->vm_file, vma->vm_pgoff,
+ vma_policy(vma),
+ NULL_VM_UFFD_CTX);
+ if (prev)
+ vma = prev;
+ else
+ prev = vma;
vma->vm_flags = new_flags;
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
}
@@ -1309,8 +1303,6 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
goto out;
mmap_write_lock(mm);
- if (!mmget_still_valid(mm))
- goto out_unlock;
vma = find_vma_prev(mm, start, &prev);
if (!vma)
goto out_unlock;
@@ -1511,8 +1503,6 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
goto out;
mmap_write_lock(mm);
- if (!mmget_still_valid(mm))
- goto out_unlock;
vma = find_vma_prev(mm, start, &prev);
if (!vma)
goto out_unlock;
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index e685299eb3d2..9fac5ea8d0e4 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -22,6 +22,31 @@ config XFS_FS
system of your root partition is compiled as a module, you'll need
to use an initial ramdisk (initrd) to boot.
+config XFS_SUPPORT_V4
+ bool "Support deprecated V4 (crc=0) format"
+ depends on XFS_FS
+ default y
+ help
+ The V4 filesystem format lacks certain features that are supported
+ by the V5 format, such as metadata checksumming, strengthened
+ metadata verification, and the ability to store timestamps past the
+ year 2038. Because of this, the V4 format is deprecated. All users
+ should upgrade by backing up their files, reformatting, and restoring
+ from the backup.
+
+ Administrators and users can detect a V4 filesystem by running
+ xfs_info against a filesystem mountpoint and checking for a string
+ beginning with "crc=". If the string "crc=0" is found, the
+ filesystem is a V4 filesystem. If no such string is found, please
+ upgrade xfsprogs to the latest version and try again.
+
+ This option will become default N in September 2025. Support for the
+ V4 format will be removed entirely in September 2030. Distributors
+ can say N here to withdraw support earlier.
+
+ To continue supporting the old V4 format (crc=0), say Y.
+ To close off an attack surface, say N.
+
config XFS_QUOTA
bool "XFS Quota support"
depends on XFS_FS
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index 3f80cede7406..48d8e9caf86f 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -96,8 +96,6 @@ xfs_attr3_rmt_verify(
{
struct xfs_attr3_rmt_hdr *rmt = ptr;
- if (!xfs_sb_version_hascrc(&mp->m_sb))
- return __this_address;
if (!xfs_verify_magic(bp, rmt->rm_magic))
return __this_address;
if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid))
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 1b0a01b06a05..d9a692484eae 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5046,20 +5046,25 @@ xfs_bmap_del_extent_real(
flags = XFS_ILOG_CORE;
if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
- xfs_fsblock_t bno;
xfs_filblks_t len;
xfs_extlen_t mod;
- bno = div_u64_rem(del->br_startblock, mp->m_sb.sb_rextsize,
- &mod);
- ASSERT(mod == 0);
len = div_u64_rem(del->br_blockcount, mp->m_sb.sb_rextsize,
&mod);
ASSERT(mod == 0);
- error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
- if (error)
- goto done;
+ if (!(bflags & XFS_BMAPI_REMAP)) {
+ xfs_fsblock_t bno;
+
+ bno = div_u64_rem(del->br_startblock,
+ mp->m_sb.sb_rextsize, &mod);
+ ASSERT(mod == 0);
+
+ error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
+ if (error)
+ goto done;
+ }
+
do_fx = 0;
nblks = len * mp->m_sb.sb_rextsize;
qfield = XFS_TRANS_DQ_RTBCOUNT;
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
index b40a4e80f5ee..b876b44c0204 100644
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -15,8 +15,8 @@
*/
#define XFS_DA_NODE_MAGIC 0xfebe /* magic number: non-leaf blocks */
#define XFS_ATTR_LEAF_MAGIC 0xfbee /* magic number: attribute leaf blks */
-#define XFS_DIR2_LEAF1_MAGIC 0xd2f1 /* magic number: v2 dirlf single blks */
-#define XFS_DIR2_LEAFN_MAGIC 0xd2ff /* magic number: v2 dirlf multi blks */
+#define XFS_DIR2_LEAF1_MAGIC 0xd2f1 /* magic number: v2 dirlf single blks */
+#define XFS_DIR2_LEAFN_MAGIC 0xd2ff /* magic number: v2 dirlf multi blks */
typedef struct xfs_da_blkinfo {
__be32 forw; /* previous block in list */
@@ -35,8 +35,8 @@ typedef struct xfs_da_blkinfo {
*/
#define XFS_DA3_NODE_MAGIC 0x3ebe /* magic number: non-leaf blocks */
#define XFS_ATTR3_LEAF_MAGIC 0x3bee /* magic number: attribute leaf blks */
-#define XFS_DIR3_LEAF1_MAGIC 0x3df1 /* magic number: v2 dirlf single blks */
-#define XFS_DIR3_LEAFN_MAGIC 0x3dff /* magic number: v2 dirlf multi blks */
+#define XFS_DIR3_LEAF1_MAGIC 0x3df1 /* magic number: v3 dirlf single blks */
+#define XFS_DIR3_LEAFN_MAGIC 0x3dff /* magic number: v3 dirlf multi blks */
struct xfs_da3_blkinfo {
/*
@@ -61,7 +61,7 @@ struct xfs_da3_blkinfo {
* Since we have duplicate keys, use a binary search but always follow
* all match in the block, not just the first match found.
*/
-#define XFS_DA_NODE_MAXDEPTH 5 /* max depth of Btree */
+#define XFS_DA_NODE_MAXDEPTH 5 /* max depth of Btree */
typedef struct xfs_da_node_hdr {
struct xfs_da_blkinfo info; /* block type, links, etc. */
@@ -746,14 +746,14 @@ xfs_attr3_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx)
*/
static inline int xfs_attr_leaf_entsize_remote(int nlen)
{
- return ((uint)sizeof(xfs_attr_leaf_name_remote_t) - 1 + (nlen) + \
- XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1);
+ return round_up(sizeof(struct xfs_attr_leaf_name_remote) - 1 +
+ nlen, XFS_ATTR_LEAF_NAME_ALIGN);
}
static inline int xfs_attr_leaf_entsize_local(int nlen, int vlen)
{
- return ((uint)sizeof(xfs_attr_leaf_name_local_t) - 1 + (nlen) + (vlen) +
- XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1);
+ return round_up(sizeof(struct xfs_attr_leaf_name_local) - 1 +
+ nlen + vlen, XFS_ATTR_LEAF_NAME_ALIGN);
}
static inline int xfs_attr_leaf_entsize_local_max(int bsize)
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index d8f586256add..eff4a127188e 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -16,6 +16,8 @@
#include "xfs_inode.h"
#include "xfs_inode_item.h"
#include "xfs_trace.h"
+#include "xfs_icache.h"
+#include "xfs_log.h"
/*
* Deferred Operations in XFS
@@ -186,8 +188,9 @@ xfs_defer_create_intent(
{
const struct xfs_defer_op_type *ops = defer_op_types[dfp->dfp_type];
- dfp->dfp_intent = ops->create_intent(tp, &dfp->dfp_work,
- dfp->dfp_count, sort);
+ if (!dfp->dfp_intent)
+ dfp->dfp_intent = ops->create_intent(tp, &dfp->dfp_work,
+ dfp->dfp_count, sort);
}
/*
@@ -312,22 +315,6 @@ xfs_defer_trans_roll(
}
/*
- * Reset an already used dfops after finish.
- */
-static void
-xfs_defer_reset(
- struct xfs_trans *tp)
-{
- ASSERT(list_empty(&tp->t_dfops));
-
- /*
- * Low mode state transfers across transaction rolls to mirror dfops
- * lifetime. Clear it now that dfops is reset.
- */
- tp->t_flags &= ~XFS_TRANS_LOWMODE;
-}
-
-/*
* Free up any items left in the list.
*/
static void
@@ -360,6 +347,58 @@ xfs_defer_cancel_list(
}
/*
+ * Prevent a log intent item from pinning the tail of the log by logging a
+ * done item to release the intent item; and then log a new intent item.
+ * The caller should provide a fresh transaction and roll it after we're done.
+ */
+static int
+xfs_defer_relog(
+ struct xfs_trans **tpp,
+ struct list_head *dfops)
+{
+ struct xlog *log = (*tpp)->t_mountp->m_log;
+ struct xfs_defer_pending *dfp;
+ xfs_lsn_t threshold_lsn = NULLCOMMITLSN;
+
+
+ ASSERT((*tpp)->t_flags & XFS_TRANS_PERM_LOG_RES);
+
+ list_for_each_entry(dfp, dfops, dfp_list) {
+ /*
+ * If the log intent item for this deferred op is not a part of
+ * the current log checkpoint, relog the intent item to keep
+ * the log tail moving forward. We're ok with this being racy
+ * because an incorrect decision means we'll be a little slower
+ * at pushing the tail.
+ */
+ if (dfp->dfp_intent == NULL ||
+ xfs_log_item_in_current_chkpt(dfp->dfp_intent))
+ continue;
+
+ /*
+ * Figure out where we need the tail to be in order to maintain
+ * the minimum required free space in the log. Only sample
+ * the log threshold once per call.
+ */
+ if (threshold_lsn == NULLCOMMITLSN) {
+ threshold_lsn = xlog_grant_push_threshold(log, 0);
+ if (threshold_lsn == NULLCOMMITLSN)
+ break;
+ }
+ if (XFS_LSN_CMP(dfp->dfp_intent->li_lsn, threshold_lsn) >= 0)
+ continue;
+
+ trace_xfs_defer_relog_intent((*tpp)->t_mountp, dfp);
+ XFS_STATS_INC((*tpp)->t_mountp, defer_relog);
+ dfp->dfp_intent = xfs_trans_item_relog(dfp->dfp_intent, *tpp);
+ }
+
+ if ((*tpp)->t_flags & XFS_TRANS_DIRTY)
+ return xfs_defer_trans_roll(tpp);
+ return 0;
+}
+
+/*
* Log an intent-done item for the first pending intent, and finish the work
* items.
*/
@@ -390,6 +429,7 @@ xfs_defer_finish_one(
list_add(li, &dfp->dfp_work);
dfp->dfp_count++;
dfp->dfp_done = NULL;
+ dfp->dfp_intent = NULL;
xfs_defer_create_intent(tp, dfp, false);
}
@@ -428,13 +468,27 @@ xfs_defer_finish_noroll(
/* Until we run out of pending work to finish... */
while (!list_empty(&dop_pending) || !list_empty(&(*tp)->t_dfops)) {
+ /*
+ * Deferred items that are created in the process of finishing
+ * other deferred work items should be queued at the head of
+ * the pending list, which puts them ahead of the deferred work
+ * that was created by the caller. This keeps the number of
+ * pending work items to a minimum, which decreases the amount
+ * of time that any one intent item can stick around in memory,
+ * pinning the log tail.
+ */
xfs_defer_create_intents(*tp);
- list_splice_tail_init(&(*tp)->t_dfops, &dop_pending);
+ list_splice_init(&(*tp)->t_dfops, &dop_pending);
error = xfs_defer_trans_roll(tp);
if (error)
goto out_shutdown;
+ /* Possibly relog intent items to keep the log moving. */
+ error = xfs_defer_relog(tp, &dop_pending);
+ if (error)
+ goto out_shutdown;
+
dfp = list_first_entry(&dop_pending, struct xfs_defer_pending,
dfp_list);
error = xfs_defer_finish_one(*tp, dfp);
@@ -475,7 +529,10 @@ xfs_defer_finish(
return error;
}
}
- xfs_defer_reset(*tp);
+
+ /* Reset LOWMODE now that we've finished all the dfops. */
+ ASSERT(list_empty(&(*tp)->t_dfops));
+ (*tp)->t_flags &= ~XFS_TRANS_LOWMODE;
return 0;
}
@@ -549,6 +606,139 @@ xfs_defer_move(
* that behavior.
*/
dtp->t_flags |= (stp->t_flags & XFS_TRANS_LOWMODE);
+ stp->t_flags &= ~XFS_TRANS_LOWMODE;
+}
+
+/*
+ * Prepare a chain of fresh deferred ops work items to be completed later. Log
+ * recovery requires the ability to put off until later the actual finishing
+ * work so that it can process unfinished items recovered from the log in
+ * correct order.
+ *
+ * Create and log intent items for all the work that we're capturing so that we
+ * can be assured that the items will get replayed if the system goes down
+ * before log recovery gets a chance to finish the work it put off. The entire
+ * deferred ops state is transferred to the capture structure and the
+ * transaction is then ready for the caller to commit it. If there are no
+ * intent items to capture, this function returns NULL.
+ *
+ * If capture_ip is not NULL, the capture structure will obtain an extra
+ * reference to the inode.
+ */
+static struct xfs_defer_capture *
+xfs_defer_ops_capture(
+ struct xfs_trans *tp,
+ struct xfs_inode *capture_ip)
+{
+ struct xfs_defer_capture *dfc;
+
+ if (list_empty(&tp->t_dfops))
+ return NULL;
+
+ /* Create an object to capture the defer ops. */
+ dfc = kmem_zalloc(sizeof(*dfc), KM_NOFS);
+ INIT_LIST_HEAD(&dfc->dfc_list);
+ INIT_LIST_HEAD(&dfc->dfc_dfops);
+
+ xfs_defer_create_intents(tp);
+
+ /* Move the dfops chain and transaction state to the capture struct. */
+ list_splice_init(&tp->t_dfops, &dfc->dfc_dfops);
+ dfc->dfc_tpflags = tp->t_flags & XFS_TRANS_LOWMODE;
+ tp->t_flags &= ~XFS_TRANS_LOWMODE;
+
+ /* Capture the remaining block reservations along with the dfops. */
+ dfc->dfc_blkres = tp->t_blk_res - tp->t_blk_res_used;
+ dfc->dfc_rtxres = tp->t_rtx_res - tp->t_rtx_res_used;
+
+ /* Preserve the log reservation size. */
+ dfc->dfc_logres = tp->t_log_res;
+
+ /*
+ * Grab an extra reference to this inode and attach it to the capture
+ * structure.
+ */
+ if (capture_ip) {
+ ihold(VFS_I(capture_ip));
+ dfc->dfc_capture_ip = capture_ip;
+ }
+
+ return dfc;
+}
+
+/* Release all resources that we used to capture deferred ops. */
+void
+xfs_defer_ops_release(
+ struct xfs_mount *mp,
+ struct xfs_defer_capture *dfc)
+{
+ xfs_defer_cancel_list(mp, &dfc->dfc_dfops);
+ if (dfc->dfc_capture_ip)
+ xfs_irele(dfc->dfc_capture_ip);
+ kmem_free(dfc);
+}
+
+/*
+ * Capture any deferred ops and commit the transaction. This is the last step
+ * needed to finish a log intent item that we recovered from the log. If any
+ * of the deferred ops operate on an inode, the caller must pass in that inode
+ * so that the reference can be transferred to the capture structure. The
+ * caller must hold ILOCK_EXCL on the inode, and must unlock it before calling
+ * xfs_defer_ops_continue.
+ */
+int
+xfs_defer_ops_capture_and_commit(
+ struct xfs_trans *tp,
+ struct xfs_inode *capture_ip,
+ struct list_head *capture_list)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_defer_capture *dfc;
+ int error;
+
+ ASSERT(!capture_ip || xfs_isilocked(capture_ip, XFS_ILOCK_EXCL));
+
+ /* If we don't capture anything, commit transaction and exit. */
+ dfc = xfs_defer_ops_capture(tp, capture_ip);
+ if (!dfc)
+ return xfs_trans_commit(tp);
+
+ /* Commit the transaction and add the capture structure to the list. */
+ error = xfs_trans_commit(tp);
+ if (error) {
+ xfs_defer_ops_release(mp, dfc);
+ return error;
+ }
+
+ list_add_tail(&dfc->dfc_list, capture_list);
+ return 0;
+}
+
+/*
+ * Attach a chain of captured deferred ops to a new transaction and free the
+ * capture structure. If an inode was captured, it will be passed back to the
+ * caller with ILOCK_EXCL held and joined to the transaction with lockflags==0.
+ * The caller now owns the inode reference.
+ */
+void
+xfs_defer_ops_continue(
+ struct xfs_defer_capture *dfc,
+ struct xfs_trans *tp,
+ struct xfs_inode **captured_ipp)
+{
+ ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
+ ASSERT(!(tp->t_flags & XFS_TRANS_DIRTY));
+
+ /* Lock and join the captured inode to the new transaction. */
+ if (dfc->dfc_capture_ip) {
+ xfs_ilock(dfc->dfc_capture_ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, dfc->dfc_capture_ip, 0);
+ }
+ *captured_ipp = dfc->dfc_capture_ip;
+
+ /* Move captured dfops chain and state to the transaction. */
+ list_splice_init(&dfc->dfc_dfops, &tp->t_dfops);
+ tp->t_flags |= dfc->dfc_tpflags;
- xfs_defer_reset(stp);
+ kmem_free(dfc);
}
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index 6b2ca580f2b0..05472f71fffe 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -8,6 +8,7 @@
struct xfs_btree_cur;
struct xfs_defer_op_type;
+struct xfs_defer_capture;
/*
* Header for deferred operation list.
@@ -63,4 +64,40 @@ extern const struct xfs_defer_op_type xfs_rmap_update_defer_type;
extern const struct xfs_defer_op_type xfs_extent_free_defer_type;
extern const struct xfs_defer_op_type xfs_agfl_free_defer_type;
+/*
+ * This structure enables a dfops user to detach the chain of deferred
+ * operations from a transaction so that they can be continued later.
+ */
+struct xfs_defer_capture {
+ /* List of other capture structures. */
+ struct list_head dfc_list;
+
+ /* Deferred ops state saved from the transaction. */
+ struct list_head dfc_dfops;
+ unsigned int dfc_tpflags;
+
+ /* Block reservations for the data and rt devices. */
+ unsigned int dfc_blkres;
+ unsigned int dfc_rtxres;
+
+ /* Log reservation saved from the transaction. */
+ unsigned int dfc_logres;
+
+ /*
+ * An inode reference that must be maintained to complete the deferred
+ * work.
+ */
+ struct xfs_inode *dfc_capture_ip;
+};
+
+/*
+ * Functions to capture a chain of deferred operations and continue them later.
+ * This doesn't normally happen except log recovery.
+ */
+int xfs_defer_ops_capture_and_commit(struct xfs_trans *tp,
+ struct xfs_inode *capture_ip, struct list_head *capture_list);
+void xfs_defer_ops_continue(struct xfs_defer_capture *d, struct xfs_trans *tp,
+ struct xfs_inode **captured_ipp);
+void xfs_defer_ops_release(struct xfs_mount *mp, struct xfs_defer_capture *d);
+
#endif /* __XFS_DEFER_H__ */
diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h
index 536666143fe7..ef5eaf33d146 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.h
+++ b/fs/xfs/libxfs/xfs_inode_buf.h
@@ -17,7 +17,7 @@ struct xfs_dinode;
*/
struct xfs_icdinode {
uint16_t di_flushiter; /* incremented on flush */
- uint32_t di_projid; /* owner's project id */
+ prid_t di_projid; /* owner's project id */
xfs_fsize_t di_size; /* number of bytes in file */
xfs_rfsblock_t di_nblocks; /* # of direct & btree blocks used */
xfs_extlen_t di_extsize; /* basic/minimum extent size for file */
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 27c39268c31f..340c83f76c80 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -2505,12 +2505,15 @@ xfs_rmap_map_extent(
int whichfork,
struct xfs_bmbt_irec *PREV)
{
+ enum xfs_rmap_intent_type type = XFS_RMAP_MAP;
+
if (!xfs_rmap_update_is_needed(tp->t_mountp, whichfork))
return;
- __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ?
- XFS_RMAP_MAP_SHARED : XFS_RMAP_MAP, ip->i_ino,
- whichfork, PREV);
+ if (whichfork != XFS_ATTR_FORK && xfs_is_reflink_inode(ip))
+ type = XFS_RMAP_MAP_SHARED;
+
+ __xfs_rmap_add(tp, type, ip->i_ino, whichfork, PREV);
}
/* Unmap an extent out of a file. */
@@ -2521,12 +2524,15 @@ xfs_rmap_unmap_extent(
int whichfork,
struct xfs_bmbt_irec *PREV)
{
+ enum xfs_rmap_intent_type type = XFS_RMAP_UNMAP;
+
if (!xfs_rmap_update_is_needed(tp->t_mountp, whichfork))
return;
- __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ?
- XFS_RMAP_UNMAP_SHARED : XFS_RMAP_UNMAP, ip->i_ino,
- whichfork, PREV);
+ if (whichfork != XFS_ATTR_FORK && xfs_is_reflink_inode(ip))
+ type = XFS_RMAP_UNMAP_SHARED;
+
+ __xfs_rmap_add(tp, type, ip->i_ino, whichfork, PREV);
}
/*
@@ -2543,12 +2549,15 @@ xfs_rmap_convert_extent(
int whichfork,
struct xfs_bmbt_irec *PREV)
{
+ enum xfs_rmap_intent_type type = XFS_RMAP_CONVERT;
+
if (!xfs_rmap_update_is_needed(mp, whichfork))
return;
- __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ?
- XFS_RMAP_CONVERT_SHARED : XFS_RMAP_CONVERT, ip->i_ino,
- whichfork, PREV);
+ if (whichfork != XFS_ATTR_FORK && xfs_is_reflink_inode(ip))
+ type = XFS_RMAP_CONVERT_SHARED;
+
+ __xfs_rmap_add(tp, type, ip->i_ino, whichfork, PREV);
}
/* Schedule the creation of an rmap for non-file data. */
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index 1d9fa8a300f1..6c1aba16113c 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -1018,7 +1018,6 @@ xfs_rtalloc_query_range(
struct xfs_mount *mp = tp->t_mountp;
xfs_rtblock_t rtstart;
xfs_rtblock_t rtend;
- xfs_rtblock_t rem;
int is_free;
int error = 0;
@@ -1027,13 +1026,12 @@ xfs_rtalloc_query_range(
if (low_rec->ar_startext >= mp->m_sb.sb_rextents ||
low_rec->ar_startext == high_rec->ar_startext)
return 0;
- if (high_rec->ar_startext > mp->m_sb.sb_rextents)
- high_rec->ar_startext = mp->m_sb.sb_rextents;
+ high_rec->ar_startext = min(high_rec->ar_startext,
+ mp->m_sb.sb_rextents - 1);
/* Iterate the bitmap, looking for discrepancies. */
rtstart = low_rec->ar_startext;
- rem = high_rec->ar_startext - rtstart;
- while (rem) {
+ while (rtstart <= high_rec->ar_startext) {
/* Is the first block free? */
error = xfs_rtcheck_range(mp, tp, rtstart, 1, 1, &rtend,
&is_free);
@@ -1042,7 +1040,7 @@ xfs_rtalloc_query_range(
/* How long does the extent go for? */
error = xfs_rtfind_forw(mp, tp, rtstart,
- high_rec->ar_startext - 1, &rtend);
+ high_rec->ar_startext, &rtend);
if (error)
break;
@@ -1055,7 +1053,6 @@ xfs_rtalloc_query_range(
break;
}
- rem -= rtend - rtstart + 1;
rtstart = rtend + 1;
}
diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c
index e56786f0a13c..653f3280e1c1 100644
--- a/fs/xfs/scrub/dabtree.c
+++ b/fs/xfs/scrub/dabtree.c
@@ -441,6 +441,20 @@ xchk_da_btree_block(
goto out_freebp;
}
+ /*
+ * If we've been handed a block that is below the dabtree root, does
+ * its hashval match what the parent block expected to see?
+ */
+ if (level > 0) {
+ struct xfs_da_node_entry *key;
+
+ key = xchk_da_btree_node_entry(ds, level - 1);
+ if (be32_to_cpu(key->hashval) != blk->hashval) {
+ xchk_da_set_corrupt(ds, level);
+ goto out_freebp;
+ }
+ }
+
out:
return error;
out_freebp:
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index ec3691372e7c..9e16a4d0f97c 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -24,6 +24,7 @@
#include "xfs_error.h"
#include "xfs_log_priv.h"
#include "xfs_log_recover.h"
+#include "xfs_quota.h"
kmem_zone_t *xfs_bui_zone;
kmem_zone_t *xfs_bud_zone;
@@ -423,30 +424,26 @@ const struct xfs_defer_op_type xfs_bmap_update_defer_type = {
STATIC int
xfs_bui_item_recover(
struct xfs_log_item *lip,
- struct xfs_trans *parent_tp)
+ struct list_head *capture_list)
{
struct xfs_bmbt_irec irec;
struct xfs_bui_log_item *buip = BUI_ITEM(lip);
struct xfs_trans *tp;
struct xfs_inode *ip = NULL;
- struct xfs_mount *mp = parent_tp->t_mountp;
+ struct xfs_mount *mp = lip->li_mountp;
struct xfs_map_extent *bmap;
struct xfs_bud_log_item *budp;
xfs_fsblock_t startblock_fsb;
xfs_fsblock_t inode_fsb;
xfs_filblks_t count;
xfs_exntst_t state;
- enum xfs_bmap_intent_type type;
- bool op_ok;
unsigned int bui_type;
int whichfork;
int error = 0;
/* Only one mapping operation per BUI... */
- if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) {
- xfs_bui_release(buip);
+ if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS)
return -EFSCORRUPTED;
- }
/*
* First check the validity of the extent described by the
@@ -457,76 +454,58 @@ xfs_bui_item_recover(
XFS_FSB_TO_DADDR(mp, bmap->me_startblock));
inode_fsb = XFS_BB_TO_FSB(mp, XFS_FSB_TO_DADDR(mp,
XFS_INO_TO_FSB(mp, bmap->me_owner)));
- switch (bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK) {
+ state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ?
+ XFS_EXT_UNWRITTEN : XFS_EXT_NORM;
+ whichfork = (bmap->me_flags & XFS_BMAP_EXTENT_ATTR_FORK) ?
+ XFS_ATTR_FORK : XFS_DATA_FORK;
+ bui_type = bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK;
+ switch (bui_type) {
case XFS_BMAP_MAP:
case XFS_BMAP_UNMAP:
- op_ok = true;
break;
default:
- op_ok = false;
- break;
+ return -EFSCORRUPTED;
}
- if (!op_ok || startblock_fsb == 0 ||
+ if (startblock_fsb == 0 ||
bmap->me_len == 0 ||
inode_fsb == 0 ||
startblock_fsb >= mp->m_sb.sb_dblocks ||
bmap->me_len >= mp->m_sb.sb_agblocks ||
inode_fsb >= mp->m_sb.sb_dblocks ||
- (bmap->me_flags & ~XFS_BMAP_EXTENT_FLAGS)) {
- /*
- * This will pull the BUI from the AIL and
- * free the memory associated with it.
- */
- xfs_bui_release(buip);
+ (bmap->me_flags & ~XFS_BMAP_EXTENT_FLAGS))
return -EFSCORRUPTED;
- }
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
- XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp);
+ /* Grab the inode. */
+ error = xfs_iget(mp, NULL, bmap->me_owner, 0, 0, &ip);
if (error)
return error;
- /*
- * Recovery stashes all deferred ops during intent processing and
- * finishes them on completion. Transfer current dfops state to this
- * transaction and transfer the result back before we return.
- */
- xfs_defer_move(tp, parent_tp);
- budp = xfs_trans_get_bud(tp, buip);
- /* Grab the inode. */
- error = xfs_iget(mp, tp, bmap->me_owner, 0, XFS_ILOCK_EXCL, &ip);
+ error = xfs_qm_dqattach(ip);
if (error)
- goto err_inode;
+ goto err_rele;
if (VFS_I(ip)->i_nlink == 0)
xfs_iflags_set(ip, XFS_IRECOVERY);
- /* Process deferred bmap item. */
- state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ?
- XFS_EXT_UNWRITTEN : XFS_EXT_NORM;
- whichfork = (bmap->me_flags & XFS_BMAP_EXTENT_ATTR_FORK) ?
- XFS_ATTR_FORK : XFS_DATA_FORK;
- bui_type = bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK;
- switch (bui_type) {
- case XFS_BMAP_MAP:
- case XFS_BMAP_UNMAP:
- type = bui_type;
- break;
- default:
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
- error = -EFSCORRUPTED;
- goto err_inode;
- }
+ /* Allocate transaction and do the work. */
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
+ XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp);
+ if (error)
+ goto err_rele;
+
+ budp = xfs_trans_get_bud(tp, buip);
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0);
count = bmap->me_len;
- error = xfs_trans_log_finish_bmap_update(tp, budp, type, ip, whichfork,
- bmap->me_startoff, bmap->me_startblock, &count, state);
+ error = xfs_trans_log_finish_bmap_update(tp, budp, bui_type, ip,
+ whichfork, bmap->me_startoff, bmap->me_startblock,
+ &count, state);
if (error)
- goto err_inode;
+ goto err_cancel;
if (count > 0) {
- ASSERT(type == XFS_BMAP_UNMAP);
+ ASSERT(bui_type == XFS_BMAP_UNMAP);
irec.br_startblock = bmap->me_startblock;
irec.br_blockcount = count;
irec.br_startoff = bmap->me_startoff;
@@ -534,20 +513,24 @@ xfs_bui_item_recover(
xfs_bmap_unmap_extent(tp, ip, &irec);
}
- xfs_defer_move(parent_tp, tp);
- error = xfs_trans_commit(tp);
+ /*
+ * Commit transaction, which frees the transaction and saves the inode
+ * for later replay activities.
+ */
+ error = xfs_defer_ops_capture_and_commit(tp, ip, capture_list);
+ if (error)
+ goto err_unlock;
+
xfs_iunlock(ip, XFS_ILOCK_EXCL);
xfs_irele(ip);
+ return 0;
- return error;
-
-err_inode:
- xfs_defer_move(parent_tp, tp);
+err_cancel:
xfs_trans_cancel(tp);
- if (ip) {
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- xfs_irele(ip);
- }
+err_unlock:
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+err_rele:
+ xfs_irele(ip);
return error;
}
@@ -559,6 +542,32 @@ xfs_bui_item_match(
return BUI_ITEM(lip)->bui_format.bui_id == intent_id;
}
+/* Relog an intent item to push the log tail forward. */
+static struct xfs_log_item *
+xfs_bui_item_relog(
+ struct xfs_log_item *intent,
+ struct xfs_trans *tp)
+{
+ struct xfs_bud_log_item *budp;
+ struct xfs_bui_log_item *buip;
+ struct xfs_map_extent *extp;
+ unsigned int count;
+
+ count = BUI_ITEM(intent)->bui_format.bui_nextents;
+ extp = BUI_ITEM(intent)->bui_format.bui_extents;
+
+ tp->t_flags |= XFS_TRANS_DIRTY;
+ budp = xfs_trans_get_bud(tp, BUI_ITEM(intent));
+ set_bit(XFS_LI_DIRTY, &budp->bud_item.li_flags);
+
+ buip = xfs_bui_init(tp->t_mountp);
+ memcpy(buip->bui_format.bui_extents, extp, count * sizeof(*extp));
+ atomic_set(&buip->bui_next_extent, count);
+ xfs_trans_add_item(tp, &buip->bui_item);
+ set_bit(XFS_LI_DIRTY, &buip->bui_item.li_flags);
+ return &buip->bui_item;
+}
+
static const struct xfs_item_ops xfs_bui_item_ops = {
.iop_size = xfs_bui_item_size,
.iop_format = xfs_bui_item_format,
@@ -566,6 +575,7 @@ static const struct xfs_item_ops xfs_bui_item_ops = {
.iop_release = xfs_bui_item_release,
.iop_recover = xfs_bui_item_recover,
.iop_match = xfs_bui_item_match,
+ .iop_relog = xfs_bui_item_relog,
};
/*
diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
index 24c7a8d11e1a..d44e8b4a3391 100644
--- a/fs/xfs/xfs_buf_item_recover.c
+++ b/fs/xfs/xfs_buf_item_recover.c
@@ -719,6 +719,8 @@ xlog_recover_get_buf_lsn(
case XFS_ABTC_MAGIC:
case XFS_RMAP_CRC_MAGIC:
case XFS_REFC_CRC_MAGIC:
+ case XFS_FIBT_CRC_MAGIC:
+ case XFS_FIBT_MAGIC:
case XFS_IBT_CRC_MAGIC:
case XFS_IBT_MAGIC: {
struct xfs_btree_block *btb = blk;
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 3072814e407d..1d95ed387d66 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -831,8 +831,8 @@ xfs_qm_dqget_checks(
}
/*
- * Given the file system, id, and type (UDQUOT/GDQUOT), return a locked
- * dquot, doing an allocation (if requested) as needed.
+ * Given the file system, id, and type (UDQUOT/GDQUOT/PDQUOT), return a
+ * locked dquot, doing an allocation (if requested) as needed.
*/
int
xfs_qm_dqget(
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 6cb8cd11072a..6c11bfc3d452 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -585,10 +585,10 @@ const struct xfs_defer_op_type xfs_agfl_free_defer_type = {
STATIC int
xfs_efi_item_recover(
struct xfs_log_item *lip,
- struct xfs_trans *parent_tp)
+ struct list_head *capture_list)
{
struct xfs_efi_log_item *efip = EFI_ITEM(lip);
- struct xfs_mount *mp = parent_tp->t_mountp;
+ struct xfs_mount *mp = lip->li_mountp;
struct xfs_efd_log_item *efdp;
struct xfs_trans *tp;
struct xfs_extent *extp;
@@ -608,14 +608,8 @@ xfs_efi_item_recover(
if (startblock_fsb == 0 ||
extp->ext_len == 0 ||
startblock_fsb >= mp->m_sb.sb_dblocks ||
- extp->ext_len >= mp->m_sb.sb_agblocks) {
- /*
- * This will pull the EFI from the AIL and
- * free the memory associated with it.
- */
- xfs_efi_release(efip);
+ extp->ext_len >= mp->m_sb.sb_agblocks)
return -EFSCORRUPTED;
- }
}
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
@@ -633,8 +627,7 @@ xfs_efi_item_recover(
}
- error = xfs_trans_commit(tp);
- return error;
+ return xfs_defer_ops_capture_and_commit(tp, NULL, capture_list);
abort_error:
xfs_trans_cancel(tp);
@@ -649,6 +642,34 @@ xfs_efi_item_match(
return EFI_ITEM(lip)->efi_format.efi_id == intent_id;
}
+/* Relog an intent item to push the log tail forward. */
+static struct xfs_log_item *
+xfs_efi_item_relog(
+ struct xfs_log_item *intent,
+ struct xfs_trans *tp)
+{
+ struct xfs_efd_log_item *efdp;
+ struct xfs_efi_log_item *efip;
+ struct xfs_extent *extp;
+ unsigned int count;
+
+ count = EFI_ITEM(intent)->efi_format.efi_nextents;
+ extp = EFI_ITEM(intent)->efi_format.efi_extents;
+
+ tp->t_flags |= XFS_TRANS_DIRTY;
+ efdp = xfs_trans_get_efd(tp, EFI_ITEM(intent), count);
+ efdp->efd_next_extent = count;
+ memcpy(efdp->efd_format.efd_extents, extp, count * sizeof(*extp));
+ set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags);
+
+ efip = xfs_efi_init(tp->t_mountp, count);
+ memcpy(efip->efi_format.efi_extents, extp, count * sizeof(*extp));
+ atomic_set(&efip->efi_next_extent, count);
+ xfs_trans_add_item(tp, &efip->efi_item);
+ set_bit(XFS_LI_DIRTY, &efip->efi_item.li_flags);
+ return &efip->efi_item;
+}
+
static const struct xfs_item_ops xfs_efi_item_ops = {
.iop_size = xfs_efi_item_size,
.iop_format = xfs_efi_item_format,
@@ -656,6 +677,7 @@ static const struct xfs_item_ops xfs_efi_item_ops = {
.iop_release = xfs_efi_item_release,
.iop_recover = xfs_efi_item_recover,
.iop_match = xfs_efi_item_match,
+ .iop_relog = xfs_efi_item_relog,
};
/*
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 1a88025e68a3..db23e455eb91 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -33,39 +33,7 @@ enum xfs_fstrm_alloc {
/*
* Allocation group filestream associations are tracked with per-ag atomic
* counters. These counters allow xfs_filestream_pick_ag() to tell whether a
- * particular AG already has active filestreams associated with it. The mount
- * point's m_peraglock is used to protect these counters from per-ag array
- * re-allocation during a growfs operation. When xfs_growfs_data_private() is
- * about to reallocate the array, it calls xfs_filestream_flush() with the
- * m_peraglock held in write mode.
- *
- * Since xfs_mru_cache_flush() guarantees that all the free functions for all
- * the cache elements have finished executing before it returns, it's safe for
- * the free functions to use the atomic counters without m_peraglock protection.
- * This allows the implementation of xfs_fstrm_free_func() to be agnostic about
- * whether it was called with the m_peraglock held in read mode, write mode or
- * not held at all. The race condition this addresses is the following:
- *
- * - The work queue scheduler fires and pulls a filestream directory cache
- * element off the LRU end of the cache for deletion, then gets pre-empted.
- * - A growfs operation grabs the m_peraglock in write mode, flushes all the
- * remaining items from the cache and reallocates the mount point's per-ag
- * array, resetting all the counters to zero.
- * - The work queue thread resumes and calls the free function for the element
- * it started cleaning up earlier. In the process it decrements the
- * filestreams counter for an AG that now has no references.
- *
- * With a shrinkfs feature, the above scenario could panic the system.
- *
- * All other uses of the following macros should be protected by either the
- * m_peraglock held in read mode, or the cache's internal locking exposed by the
- * interval between a call to xfs_mru_cache_lookup() and a call to
- * xfs_mru_cache_done(). In addition, the m_peraglock must be held in read mode
- * when new elements are added to the cache.
- *
- * Combined, these locking rules ensure that no associations will ever exist in
- * the cache that reference per-ag array elements that have since been
- * reallocated.
+ * particular AG already has active filestreams associated with it.
*/
int
xfs_filestream_peek_ag(
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index 4eebcec4aae6..9ce5e7d5bf8f 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -26,7 +26,7 @@
#include "xfs_rtalloc.h"
/* Convert an xfs_fsmap to an fsmap. */
-void
+static void
xfs_fsmap_from_internal(
struct fsmap *dest,
struct xfs_fsmap *src)
@@ -155,8 +155,7 @@ xfs_fsmap_owner_from_rmap(
/* getfsmap query state */
struct xfs_getfsmap_info {
struct xfs_fsmap_head *head;
- xfs_fsmap_format_t formatter; /* formatting fn */
- void *format_arg; /* format buffer */
+ struct fsmap *fsmap_recs; /* mapping records */
struct xfs_buf *agf_bp; /* AGF, for refcount queries */
xfs_daddr_t next_daddr; /* next daddr we expect */
u64 missing_owner; /* owner of holes */
@@ -224,6 +223,20 @@ xfs_getfsmap_is_shared(
return 0;
}
+static inline void
+xfs_getfsmap_format(
+ struct xfs_mount *mp,
+ struct xfs_fsmap *xfm,
+ struct xfs_getfsmap_info *info)
+{
+ struct fsmap *rec;
+
+ trace_xfs_getfsmap_mapping(mp, xfm);
+
+ rec = &info->fsmap_recs[info->head->fmh_entries++];
+ xfs_fsmap_from_internal(rec, xfm);
+}
+
/*
* Format a reverse mapping for getfsmap, having translated rm_startblock
* into the appropriate daddr units.
@@ -256,6 +269,9 @@ xfs_getfsmap_helper(
/* Are we just counting mappings? */
if (info->head->fmh_count == 0) {
+ if (info->head->fmh_entries == UINT_MAX)
+ return -ECANCELED;
+
if (rec_daddr > info->next_daddr)
info->head->fmh_entries++;
@@ -285,10 +301,7 @@ xfs_getfsmap_helper(
fmr.fmr_offset = 0;
fmr.fmr_length = rec_daddr - info->next_daddr;
fmr.fmr_flags = FMR_OF_SPECIAL_OWNER;
- error = info->formatter(&fmr, info->format_arg);
- if (error)
- return error;
- info->head->fmh_entries++;
+ xfs_getfsmap_format(mp, &fmr, info);
}
if (info->last)
@@ -320,11 +333,8 @@ xfs_getfsmap_helper(
if (shared)
fmr.fmr_flags |= FMR_OF_SHARED;
}
- error = info->formatter(&fmr, info->format_arg);
- if (error)
- return error;
- info->head->fmh_entries++;
+ xfs_getfsmap_format(mp, &fmr, info);
out:
rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
if (info->next_daddr < rec_daddr)
@@ -792,11 +802,11 @@ xfs_getfsmap_check_keys(
#endif /* CONFIG_XFS_RT */
/*
- * Get filesystem's extents as described in head, and format for
- * output. Calls formatter to fill the user's buffer until all
- * extents are mapped, until the passed-in head->fmh_count slots have
- * been filled, or until the formatter short-circuits the loop, if it
- * is tracking filled-in extents on its own.
+ * Get filesystem's extents as described in head, and format for output. Fills
+ * in the supplied records array until there are no more reverse mappings to
+ * return or head.fmh_entries == head.fmh_count. In the second case, this
+ * function returns -ECANCELED to indicate that more records would have been
+ * returned.
*
* Key to Confusion
* ----------------
@@ -816,8 +826,7 @@ int
xfs_getfsmap(
struct xfs_mount *mp,
struct xfs_fsmap_head *head,
- xfs_fsmap_format_t formatter,
- void *arg)
+ struct fsmap *fsmap_recs)
{
struct xfs_trans *tp = NULL;
struct xfs_fsmap dkeys[2]; /* per-dev keys */
@@ -892,8 +901,7 @@ xfs_getfsmap(
info.next_daddr = head->fmh_keys[0].fmr_physical +
head->fmh_keys[0].fmr_length;
- info.formatter = formatter;
- info.format_arg = arg;
+ info.fsmap_recs = fsmap_recs;
info.head = head;
/*
diff --git a/fs/xfs/xfs_fsmap.h b/fs/xfs/xfs_fsmap.h
index c6c57739b862..a0775788e7b1 100644
--- a/fs/xfs/xfs_fsmap.h
+++ b/fs/xfs/xfs_fsmap.h
@@ -27,13 +27,9 @@ struct xfs_fsmap_head {
struct xfs_fsmap fmh_keys[2]; /* low and high keys */
};
-void xfs_fsmap_from_internal(struct fsmap *dest, struct xfs_fsmap *src);
void xfs_fsmap_to_internal(struct xfs_fsmap *dest, struct fsmap *src);
-/* fsmap to userspace formatter - copy to user & advance pointer */
-typedef int (*xfs_fsmap_format_t)(struct xfs_fsmap *, void *);
-
int xfs_getfsmap(struct xfs_mount *mp, struct xfs_fsmap_head *head,
- xfs_fsmap_format_t formatter, void *arg);
+ struct fsmap *out_recs);
#endif /* __XFS_FSMAP_H__ */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 49624973eecc..2bfbcf28b1bd 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -698,6 +698,68 @@ out_unlock:
return error;
}
+/* Propagate di_flags from a parent inode to a child inode. */
+static void
+xfs_inode_inherit_flags(
+ struct xfs_inode *ip,
+ const struct xfs_inode *pip)
+{
+ unsigned int di_flags = 0;
+ umode_t mode = VFS_I(ip)->i_mode;
+
+ if (S_ISDIR(mode)) {
+ if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
+ di_flags |= XFS_DIFLAG_RTINHERIT;
+ if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
+ di_flags |= XFS_DIFLAG_EXTSZINHERIT;
+ ip->i_d.di_extsize = pip->i_d.di_extsize;
+ }
+ if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
+ di_flags |= XFS_DIFLAG_PROJINHERIT;
+ } else if (S_ISREG(mode)) {
+ if ((pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) &&
+ xfs_sb_version_hasrealtime(&ip->i_mount->m_sb))
+ di_flags |= XFS_DIFLAG_REALTIME;
+ if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
+ di_flags |= XFS_DIFLAG_EXTSIZE;
+ ip->i_d.di_extsize = pip->i_d.di_extsize;
+ }
+ }
+ if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) &&
+ xfs_inherit_noatime)
+ di_flags |= XFS_DIFLAG_NOATIME;
+ if ((pip->i_d.di_flags & XFS_DIFLAG_NODUMP) &&
+ xfs_inherit_nodump)
+ di_flags |= XFS_DIFLAG_NODUMP;
+ if ((pip->i_d.di_flags & XFS_DIFLAG_SYNC) &&
+ xfs_inherit_sync)
+ di_flags |= XFS_DIFLAG_SYNC;
+ if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) &&
+ xfs_inherit_nosymlinks)
+ di_flags |= XFS_DIFLAG_NOSYMLINKS;
+ if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) &&
+ xfs_inherit_nodefrag)
+ di_flags |= XFS_DIFLAG_NODEFRAG;
+ if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM)
+ di_flags |= XFS_DIFLAG_FILESTREAM;
+
+ ip->i_d.di_flags |= di_flags;
+}
+
+/* Propagate di_flags2 from a parent inode to a child inode. */
+static void
+xfs_inode_inherit_flags2(
+ struct xfs_inode *ip,
+ const struct xfs_inode *pip)
+{
+ if (pip->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) {
+ ip->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
+ ip->i_d.di_cowextsize = pip->i_d.di_cowextsize;
+ }
+ if (pip->i_d.di_flags2 & XFS_DIFLAG2_DAX)
+ ip->i_d.di_flags2 |= XFS_DIFLAG2_DAX;
+}
+
/*
* Allocate an inode on disk and return a copy of its in-core version.
* The in-core inode is locked exclusively. Set mode, nlink, and rdev
@@ -841,54 +903,10 @@ xfs_ialloc(
break;
case S_IFREG:
case S_IFDIR:
- if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
- uint di_flags = 0;
-
- if (S_ISDIR(mode)) {
- if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
- di_flags |= XFS_DIFLAG_RTINHERIT;
- if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
- di_flags |= XFS_DIFLAG_EXTSZINHERIT;
- ip->i_d.di_extsize = pip->i_d.di_extsize;
- }
- if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
- di_flags |= XFS_DIFLAG_PROJINHERIT;
- } else if (S_ISREG(mode)) {
- if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
- di_flags |= XFS_DIFLAG_REALTIME;
- if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
- di_flags |= XFS_DIFLAG_EXTSIZE;
- ip->i_d.di_extsize = pip->i_d.di_extsize;
- }
- }
- if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) &&
- xfs_inherit_noatime)
- di_flags |= XFS_DIFLAG_NOATIME;
- if ((pip->i_d.di_flags & XFS_DIFLAG_NODUMP) &&
- xfs_inherit_nodump)
- di_flags |= XFS_DIFLAG_NODUMP;
- if ((pip->i_d.di_flags & XFS_DIFLAG_SYNC) &&
- xfs_inherit_sync)
- di_flags |= XFS_DIFLAG_SYNC;
- if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) &&
- xfs_inherit_nosymlinks)
- di_flags |= XFS_DIFLAG_NOSYMLINKS;
- if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) &&
- xfs_inherit_nodefrag)
- di_flags |= XFS_DIFLAG_NODEFRAG;
- if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM)
- di_flags |= XFS_DIFLAG_FILESTREAM;
-
- ip->i_d.di_flags |= di_flags;
- }
- if (pip && (pip->i_d.di_flags2 & XFS_DIFLAG2_ANY)) {
- if (pip->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) {
- ip->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
- ip->i_d.di_cowextsize = pip->i_d.di_cowextsize;
- }
- if (pip->i_d.di_flags2 & XFS_DIFLAG2_DAX)
- ip->i_d.di_flags2 |= XFS_DIFLAG2_DAX;
- }
+ if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY))
+ xfs_inode_inherit_flags(ip, pip);
+ if (pip && (pip->i_d.di_flags2 & XFS_DIFLAG2_ANY))
+ xfs_inode_inherit_flags2(ip, pip);
/* FALLTHROUGH */
case S_IFLNK:
ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
@@ -1516,17 +1534,10 @@ xfs_itruncate_extents_flags(
if (error)
goto out;
- /*
- * Duplicate the transaction that has the permanent
- * reservation and commit the old transaction.
- */
+ /* free the just unmapped extents */
error = xfs_defer_finish(&tp);
if (error)
goto out;
-
- error = xfs_trans_roll_inode(&tp, ip);
- if (error)
- goto out;
}
if (whichfork == XFS_DATA_FORK) {
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index bca7659fb5c6..3fbd98f61ea5 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1716,39 +1716,17 @@ out_free_buf:
return error;
}
-struct getfsmap_info {
- struct xfs_mount *mp;
- struct fsmap_head __user *data;
- unsigned int idx;
- __u32 last_flags;
-};
-
-STATIC int
-xfs_getfsmap_format(struct xfs_fsmap *xfm, void *priv)
-{
- struct getfsmap_info *info = priv;
- struct fsmap fm;
-
- trace_xfs_getfsmap_mapping(info->mp, xfm);
-
- info->last_flags = xfm->fmr_flags;
- xfs_fsmap_from_internal(&fm, xfm);
- if (copy_to_user(&info->data->fmh_recs[info->idx++], &fm,
- sizeof(struct fsmap)))
- return -EFAULT;
-
- return 0;
-}
-
STATIC int
xfs_ioc_getfsmap(
struct xfs_inode *ip,
struct fsmap_head __user *arg)
{
- struct getfsmap_info info = { NULL };
struct xfs_fsmap_head xhead = {0};
struct fsmap_head head;
- bool aborted = false;
+ struct fsmap *recs;
+ unsigned int count;
+ __u32 last_flags = 0;
+ bool done = false;
int error;
if (copy_from_user(&head, arg, sizeof(struct fsmap_head)))
@@ -1760,38 +1738,112 @@ xfs_ioc_getfsmap(
sizeof(head.fmh_keys[1].fmr_reserved)))
return -EINVAL;
+ /*
+ * Use an internal memory buffer so that we don't have to copy fsmap
+ * data to userspace while holding locks. Start by trying to allocate
+ * up to 128k for the buffer, but fall back to a single page if needed.
+ */
+ count = min_t(unsigned int, head.fmh_count,
+ 131072 / sizeof(struct fsmap));
+ recs = kvzalloc(count * sizeof(struct fsmap), GFP_KERNEL);
+ if (!recs) {
+ count = min_t(unsigned int, head.fmh_count,
+ PAGE_SIZE / sizeof(struct fsmap));
+ recs = kvzalloc(count * sizeof(struct fsmap), GFP_KERNEL);
+ if (!recs)
+ return -ENOMEM;
+ }
+
xhead.fmh_iflags = head.fmh_iflags;
- xhead.fmh_count = head.fmh_count;
xfs_fsmap_to_internal(&xhead.fmh_keys[0], &head.fmh_keys[0]);
xfs_fsmap_to_internal(&xhead.fmh_keys[1], &head.fmh_keys[1]);
trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]);
trace_xfs_getfsmap_high_key(ip->i_mount, &xhead.fmh_keys[1]);
- info.mp = ip->i_mount;
- info.data = arg;
- error = xfs_getfsmap(ip->i_mount, &xhead, xfs_getfsmap_format, &info);
- if (error == -ECANCELED) {
- error = 0;
- aborted = true;
- } else if (error)
- return error;
+ head.fmh_entries = 0;
+ do {
+ struct fsmap __user *user_recs;
+ struct fsmap *last_rec;
+
+ user_recs = &arg->fmh_recs[head.fmh_entries];
+ xhead.fmh_entries = 0;
+ xhead.fmh_count = min_t(unsigned int, count,
+ head.fmh_count - head.fmh_entries);
+
+ /* Run query, record how many entries we got. */
+ error = xfs_getfsmap(ip->i_mount, &xhead, recs);
+ switch (error) {
+ case 0:
+ /*
+ * There are no more records in the result set. Copy
+ * whatever we got to userspace and break out.
+ */
+ done = true;
+ break;
+ case -ECANCELED:
+ /*
+ * The internal memory buffer is full. Copy whatever
+ * records we got to userspace and go again if we have
+ * not yet filled the userspace buffer.
+ */
+ error = 0;
+ break;
+ default:
+ goto out_free;
+ }
+ head.fmh_entries += xhead.fmh_entries;
+ head.fmh_oflags = xhead.fmh_oflags;
- /* If we didn't abort, set the "last" flag in the last fmx */
- if (!aborted && info.idx) {
- info.last_flags |= FMR_OF_LAST;
- if (copy_to_user(&info.data->fmh_recs[info.idx - 1].fmr_flags,
- &info.last_flags, sizeof(info.last_flags)))
- return -EFAULT;
+ /*
+ * If the caller wanted a record count or there aren't any
+ * new records to return, we're done.
+ */
+ if (head.fmh_count == 0 || xhead.fmh_entries == 0)
+ break;
+
+ /* Copy all the records we got out to userspace. */
+ if (copy_to_user(user_recs, recs,
+ xhead.fmh_entries * sizeof(struct fsmap))) {
+ error = -EFAULT;
+ goto out_free;
+ }
+
+ /* Remember the last record flags we copied to userspace. */
+ last_rec = &recs[xhead.fmh_entries - 1];
+ last_flags = last_rec->fmr_flags;
+
+ /* Set up the low key for the next iteration. */
+ xfs_fsmap_to_internal(&xhead.fmh_keys[0], last_rec);
+ trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]);
+ } while (!done && head.fmh_entries < head.fmh_count);
+
+ /*
+ * If there are no more records in the query result set and we're not
+ * in counting mode, mark the last record returned with the LAST flag.
+ */
+ if (done && head.fmh_count > 0 && head.fmh_entries > 0) {
+ struct fsmap __user *user_rec;
+
+ last_flags |= FMR_OF_LAST;
+ user_rec = &arg->fmh_recs[head.fmh_entries - 1];
+
+ if (copy_to_user(&user_rec->fmr_flags, &last_flags,
+ sizeof(last_flags))) {
+ error = -EFAULT;
+ goto out_free;
+ }
}
/* copy back header */
- head.fmh_entries = xhead.fmh_entries;
- head.fmh_oflags = xhead.fmh_oflags;
- if (copy_to_user(arg, &head, sizeof(struct fsmap_head)))
- return -EFAULT;
+ if (copy_to_user(arg, &head, sizeof(struct fsmap_head))) {
+ error = -EFAULT;
+ goto out_free;
+ }
- return 0;
+out_free:
+ kmem_free(recs);
+ return error;
}
STATIC int
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 80a13c8561d8..5e165456da68 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -237,7 +237,7 @@ xfs_vn_create(
umode_t mode,
bool flags)
{
- return xfs_vn_mknod(dir, dentry, mode, 0);
+ return xfs_generic_create(dir, dentry, mode, 0, false);
}
STATIC int
@@ -246,7 +246,7 @@ xfs_vn_mkdir(
struct dentry *dentry,
umode_t mode)
{
- return xfs_vn_mknod(dir, dentry, mode|S_IFDIR, 0);
+ return xfs_generic_create(dir, dentry, mode | S_IFDIR, 0, false);
}
STATIC struct dentry *
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index ab737fed7b12..ad1009778d33 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -123,7 +123,6 @@ typedef __u32 xfs_nlink_t;
#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */
#define EFSBADCRC EBADMSG /* Bad CRC detected */
-#define SYNCHRONIZE() barrier()
#define __return_address __builtin_return_address(0)
/*
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index ad0c69ee8947..fa2d05e65ff1 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1475,14 +1475,14 @@ xlog_commit_record(
}
/*
- * Push on the buffer cache code if we ever use more than 75% of the on-disk
- * log space. This code pushes on the lsn which would supposedly free up
- * the 25% which we want to leave free. We may need to adopt a policy which
- * pushes on an lsn which is further along in the log once we reach the high
- * water mark. In this manner, we would be creating a low water mark.
+ * Compute the LSN that we'd need to push the log tail towards in order to have
+ * (a) enough on-disk log space to log the number of bytes specified, (b) at
+ * least 25% of the log space free, and (c) at least 256 blocks free. If the
+ * log free space already meets all three thresholds, this function returns
+ * NULLCOMMITLSN.
*/
-STATIC void
-xlog_grant_push_ail(
+xfs_lsn_t
+xlog_grant_push_threshold(
struct xlog *log,
int need_bytes)
{
@@ -1508,7 +1508,7 @@ xlog_grant_push_ail(
free_threshold = max(free_threshold, (log->l_logBBsize >> 2));
free_threshold = max(free_threshold, 256);
if (free_blocks >= free_threshold)
- return;
+ return NULLCOMMITLSN;
xlog_crack_atomic_lsn(&log->l_tail_lsn, &threshold_cycle,
&threshold_block);
@@ -1528,13 +1528,33 @@ xlog_grant_push_ail(
if (XFS_LSN_CMP(threshold_lsn, last_sync_lsn) > 0)
threshold_lsn = last_sync_lsn;
+ return threshold_lsn;
+}
+
+/*
+ * Push the tail of the log if we need to do so to maintain the free log space
+ * thresholds set out by xlog_grant_push_threshold. We may need to adopt a
+ * policy which pushes on an lsn which is further along in the log once we
+ * reach the high water mark. In this manner, we would be creating a low water
+ * mark.
+ */
+STATIC void
+xlog_grant_push_ail(
+ struct xlog *log,
+ int need_bytes)
+{
+ xfs_lsn_t threshold_lsn;
+
+ threshold_lsn = xlog_grant_push_threshold(log, need_bytes);
+ if (threshold_lsn == NULLCOMMITLSN || XLOG_FORCED_SHUTDOWN(log))
+ return;
+
/*
* Get the transaction layer to kick the dirty buffers out to
* disk asynchronously. No point in trying to do this if
* the filesystem is shutting down.
*/
- if (!XLOG_FORCED_SHUTDOWN(log))
- xfs_ail_push(log->l_ailp, threshold_lsn);
+ xfs_ail_push(log->l_ailp, threshold_lsn);
}
/*
@@ -1604,9 +1624,7 @@ xlog_cksum(
int i;
int xheads;
- xheads = size / XLOG_HEADER_CYCLE_SIZE;
- if (size % XLOG_HEADER_CYCLE_SIZE)
- xheads++;
+ xheads = DIV_ROUND_UP(size, XLOG_HEADER_CYCLE_SIZE);
for (i = 1; i < xheads; i++) {
crc = crc32c(crc, &xhdr[i].hic_xheader,
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 1412d6993f1e..58c3fcbec94a 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -141,4 +141,6 @@ void xfs_log_quiesce(struct xfs_mount *mp);
bool xfs_log_check_lsn(struct xfs_mount *, xfs_lsn_t);
bool xfs_log_in_recovery(struct xfs_mount *);
+xfs_lsn_t xlog_grant_push_threshold(struct xlog *log, int need_bytes);
+
#endif /* __XFS_LOG_H__ */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index a17d788921d6..a8289adc1b29 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -371,6 +371,19 @@ out:
return error;
}
+static inline int
+xlog_logrec_hblks(struct xlog *log, struct xlog_rec_header *rh)
+{
+ if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
+ int h_size = be32_to_cpu(rh->h_size);
+
+ if ((be32_to_cpu(rh->h_version) & XLOG_VERSION_2) &&
+ h_size > XLOG_HEADER_CYCLE_SIZE)
+ return DIV_ROUND_UP(h_size, XLOG_HEADER_CYCLE_SIZE);
+ }
+ return 1;
+}
+
/*
* Potentially backup over partial log record write.
*
@@ -463,15 +476,7 @@ xlog_find_verify_log_record(
* reset last_blk. Only when last_blk points in the middle of a log
* record do we update last_blk.
*/
- if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
- uint h_size = be32_to_cpu(head->h_size);
-
- xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE;
- if (h_size % XLOG_HEADER_CYCLE_SIZE)
- xhdrs++;
- } else {
- xhdrs = 1;
- }
+ xhdrs = xlog_logrec_hblks(log, head);
if (*last_blk - i + extra_bblks !=
BTOBB(be32_to_cpu(head->h_len)) + xhdrs)
@@ -1158,22 +1163,7 @@ xlog_check_unmount_rec(
* below. We won't want to clear the unmount record if there is one, so
* we pass the lsn of the unmount record rather than the block after it.
*/
- if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
- int h_size = be32_to_cpu(rhead->h_size);
- int h_version = be32_to_cpu(rhead->h_version);
-
- if ((h_version & XLOG_VERSION_2) &&
- (h_size > XLOG_HEADER_CYCLE_SIZE)) {
- hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
- if (h_size % XLOG_HEADER_CYCLE_SIZE)
- hblks++;
- } else {
- hblks = 1;
- }
- } else {
- hblks = 1;
- }
-
+ hblks = xlog_logrec_hblks(log, rhead);
after_umount_blk = xlog_wrap_logbno(log,
rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len)));
@@ -2444,44 +2434,66 @@ xlog_recover_process_data(
/* Take all the collected deferred ops and finish them in order. */
static int
xlog_finish_defer_ops(
- struct xfs_trans *parent_tp)
+ struct xfs_mount *mp,
+ struct list_head *capture_list)
{
- struct xfs_mount *mp = parent_tp->t_mountp;
+ struct xfs_defer_capture *dfc, *next;
struct xfs_trans *tp;
- int64_t freeblks;
- uint resblks;
- int error;
+ struct xfs_inode *ip;
+ int error = 0;
- /*
- * We're finishing the defer_ops that accumulated as a result of
- * recovering unfinished intent items during log recovery. We
- * reserve an itruncate transaction because it is the largest
- * permanent transaction type. Since we're the only user of the fs
- * right now, take 93% (15/16) of the available free blocks. Use
- * weird math to avoid a 64-bit division.
- */
- freeblks = percpu_counter_sum(&mp->m_fdblocks);
- if (freeblks <= 0)
- return -ENOSPC;
- resblks = min_t(int64_t, UINT_MAX, freeblks);
- resblks = (resblks * 15) >> 4;
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, resblks,
- 0, XFS_TRANS_RESERVE, &tp);
- if (error)
- return error;
- /* transfer all collected dfops to this transaction */
- xfs_defer_move(tp, parent_tp);
+ list_for_each_entry_safe(dfc, next, capture_list, dfc_list) {
+ struct xfs_trans_res resv;
+
+ /*
+ * Create a new transaction reservation from the captured
+ * information. Set logcount to 1 to force the new transaction
+ * to regrant every roll so that we can make forward progress
+ * in recovery no matter how full the log might be.
+ */
+ resv.tr_logres = dfc->dfc_logres;
+ resv.tr_logcount = 1;
+ resv.tr_logflags = XFS_TRANS_PERM_LOG_RES;
+
+ error = xfs_trans_alloc(mp, &resv, dfc->dfc_blkres,
+ dfc->dfc_rtxres, XFS_TRANS_RESERVE, &tp);
+ if (error)
+ return error;
- return xfs_trans_commit(tp);
+ /*
+ * Transfer to this new transaction all the dfops we captured
+ * from recovering a single intent item.
+ */
+ list_del_init(&dfc->dfc_list);
+ xfs_defer_ops_continue(dfc, tp, &ip);
+
+ error = xfs_trans_commit(tp);
+ if (ip) {
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ xfs_irele(ip);
+ }
+ if (error)
+ return error;
+ }
+
+ ASSERT(list_empty(capture_list));
+ return 0;
}
-/* Is this log item a deferred action intent? */
-static inline bool xlog_item_is_intent(struct xfs_log_item *lip)
+/* Release all the captured defer ops and capture structures in this list. */
+static void
+xlog_abort_defer_ops(
+ struct xfs_mount *mp,
+ struct list_head *capture_list)
{
- return lip->li_ops->iop_recover != NULL &&
- lip->li_ops->iop_match != NULL;
-}
+ struct xfs_defer_capture *dfc;
+ struct xfs_defer_capture *next;
+ list_for_each_entry_safe(dfc, next, capture_list, dfc_list) {
+ list_del_init(&dfc->dfc_list);
+ xfs_defer_ops_release(mp, dfc);
+ }
+}
/*
* When this is called, all of the log intent items which did not have
* corresponding log done items should be in the AIL. What we do now
@@ -2502,35 +2514,23 @@ STATIC int
xlog_recover_process_intents(
struct xlog *log)
{
- struct xfs_trans *parent_tp;
+ LIST_HEAD(capture_list);
struct xfs_ail_cursor cur;
struct xfs_log_item *lip;
struct xfs_ail *ailp;
- int error;
+ int error = 0;
#if defined(DEBUG) || defined(XFS_WARN)
xfs_lsn_t last_lsn;
#endif
- /*
- * The intent recovery handlers commit transactions to complete recovery
- * for individual intents, but any new deferred operations that are
- * queued during that process are held off until the very end. The
- * purpose of this transaction is to serve as a container for deferred
- * operations. Each intent recovery handler must transfer dfops here
- * before its local transaction commits, and we'll finish the entire
- * list below.
- */
- error = xfs_trans_alloc_empty(log->l_mp, &parent_tp);
- if (error)
- return error;
-
ailp = log->l_ailp;
spin_lock(&ailp->ail_lock);
- lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
#if defined(DEBUG) || defined(XFS_WARN)
last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block);
#endif
- while (lip != NULL) {
+ for (lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
+ lip != NULL;
+ lip = xfs_trans_ail_cursor_next(ailp, &cur)) {
/*
* We're done when we see something other than an intent.
* There should be no intents left in the AIL now.
@@ -2552,26 +2552,29 @@ xlog_recover_process_intents(
/*
* NOTE: If your intent processing routine can create more
- * deferred ops, you /must/ attach them to the transaction in
- * this routine or else those subsequent intents will get
+ * deferred ops, you /must/ attach them to the capture list in
+ * the recover routine or else those subsequent intents will be
* replayed in the wrong order!
*/
- if (!test_and_set_bit(XFS_LI_RECOVERED, &lip->li_flags)) {
- spin_unlock(&ailp->ail_lock);
- error = lip->li_ops->iop_recover(lip, parent_tp);
- spin_lock(&ailp->ail_lock);
- }
+ spin_unlock(&ailp->ail_lock);
+ error = lip->li_ops->iop_recover(lip, &capture_list);
+ spin_lock(&ailp->ail_lock);
if (error)
- goto out;
- lip = xfs_trans_ail_cursor_next(ailp, &cur);
+ break;
}
-out:
+
xfs_trans_ail_cursor_done(&cur);
spin_unlock(&ailp->ail_lock);
- if (!error)
- error = xlog_finish_defer_ops(parent_tp);
- xfs_trans_cancel(parent_tp);
+ if (error)
+ goto err;
+
+ error = xlog_finish_defer_ops(log->l_mp, &capture_list);
+ if (error)
+ goto err;
+ return 0;
+err:
+ xlog_abort_defer_ops(log->l_mp, &capture_list);
return error;
}
@@ -2878,7 +2881,8 @@ STATIC int
xlog_valid_rec_header(
struct xlog *log,
struct xlog_rec_header *rhead,
- xfs_daddr_t blkno)
+ xfs_daddr_t blkno,
+ int bufsize)
{
int hlen;
@@ -2894,10 +2898,14 @@ xlog_valid_rec_header(
return -EFSCORRUPTED;
}
- /* LR body must have data or it wouldn't have been written */
+ /*
+ * LR body must have data (or it wouldn't have been written)
+ * and h_len must not be greater than LR buffer size.
+ */
hlen = be32_to_cpu(rhead->h_len);
- if (XFS_IS_CORRUPT(log->l_mp, hlen <= 0 || hlen > INT_MAX))
+ if (XFS_IS_CORRUPT(log->l_mp, hlen <= 0 || hlen > bufsize))
return -EFSCORRUPTED;
+
if (XFS_IS_CORRUPT(log->l_mp,
blkno > log->l_logBBsize || blkno > INT_MAX))
return -EFSCORRUPTED;
@@ -2958,9 +2966,6 @@ xlog_do_recovery_pass(
goto bread_err1;
rhead = (xlog_rec_header_t *)offset;
- error = xlog_valid_rec_header(log, rhead, tail_blk);
- if (error)
- goto bread_err1;
/*
* xfsprogs has a bug where record length is based on lsunit but
@@ -2975,30 +2980,22 @@ xlog_do_recovery_pass(
*/
h_size = be32_to_cpu(rhead->h_size);
h_len = be32_to_cpu(rhead->h_len);
- if (h_len > h_size) {
- if (h_len <= log->l_mp->m_logbsize &&
- be32_to_cpu(rhead->h_num_logops) == 1) {
- xfs_warn(log->l_mp,
+ if (h_len > h_size && h_len <= log->l_mp->m_logbsize &&
+ rhead->h_num_logops == cpu_to_be32(1)) {
+ xfs_warn(log->l_mp,
"invalid iclog size (%d bytes), using lsunit (%d bytes)",
- h_size, log->l_mp->m_logbsize);
- h_size = log->l_mp->m_logbsize;
- } else {
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW,
- log->l_mp);
- error = -EFSCORRUPTED;
- goto bread_err1;
- }
+ h_size, log->l_mp->m_logbsize);
+ h_size = log->l_mp->m_logbsize;
}
- if ((be32_to_cpu(rhead->h_version) & XLOG_VERSION_2) &&
- (h_size > XLOG_HEADER_CYCLE_SIZE)) {
- hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
- if (h_size % XLOG_HEADER_CYCLE_SIZE)
- hblks++;
+ error = xlog_valid_rec_header(log, rhead, tail_blk, h_size);
+ if (error)
+ goto bread_err1;
+
+ hblks = xlog_logrec_hblks(log, rhead);
+ if (hblks != 1) {
kmem_free(hbp);
hbp = xlog_alloc_buffer(log, hblks);
- } else {
- hblks = 1;
}
} else {
ASSERT(log->l_sectBBsize == 1);
@@ -3070,7 +3067,7 @@ xlog_do_recovery_pass(
}
rhead = (xlog_rec_header_t *)offset;
error = xlog_valid_rec_header(log, rhead,
- split_hblks ? blk_no : 0);
+ split_hblks ? blk_no : 0, h_size);
if (error)
goto bread_err2;
@@ -3151,7 +3148,7 @@ xlog_do_recovery_pass(
goto bread_err2;
rhead = (xlog_rec_header_t *)offset;
- error = xlog_valid_rec_header(log, rhead, blk_no);
+ error = xlog_valid_rec_header(log, rhead, blk_no, h_size);
if (error)
goto bread_err2;
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 3f82e0c92c2d..b2a9abee8b2b 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -249,7 +249,6 @@ xfs_qm_unmount_quotas(
STATIC int
xfs_qm_dqattach_one(
struct xfs_inode *ip,
- xfs_dqid_t id,
xfs_dqtype_t type,
bool doalloc,
struct xfs_dquot **IO_idqpp)
@@ -330,23 +329,23 @@ xfs_qm_dqattach_locked(
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
if (XFS_IS_UQUOTA_ON(mp) && !ip->i_udquot) {
- error = xfs_qm_dqattach_one(ip, i_uid_read(VFS_I(ip)),
- XFS_DQTYPE_USER, doalloc, &ip->i_udquot);
+ error = xfs_qm_dqattach_one(ip, XFS_DQTYPE_USER,
+ doalloc, &ip->i_udquot);
if (error)
goto done;
ASSERT(ip->i_udquot);
}
if (XFS_IS_GQUOTA_ON(mp) && !ip->i_gdquot) {
- error = xfs_qm_dqattach_one(ip, i_gid_read(VFS_I(ip)),
- XFS_DQTYPE_GROUP, doalloc, &ip->i_gdquot);
+ error = xfs_qm_dqattach_one(ip, XFS_DQTYPE_GROUP,
+ doalloc, &ip->i_gdquot);
if (error)
goto done;
ASSERT(ip->i_gdquot);
}
if (XFS_IS_PQUOTA_ON(mp) && !ip->i_pdquot) {
- error = xfs_qm_dqattach_one(ip, ip->i_d.di_projid, XFS_DQTYPE_PROJ,
+ error = xfs_qm_dqattach_one(ip, XFS_DQTYPE_PROJ,
doalloc, &ip->i_pdquot);
if (error)
goto done;
@@ -1663,6 +1662,7 @@ xfs_qm_vop_dqalloc(
}
if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) {
+ ASSERT(O_udqpp);
if (!uid_eq(inode->i_uid, uid)) {
/*
* What we need is the dquot that has this uid, and
@@ -1696,6 +1696,7 @@ xfs_qm_vop_dqalloc(
}
}
if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
+ ASSERT(O_gdqpp);
if (!gid_eq(inode->i_gid, gid)) {
xfs_iunlock(ip, lockflags);
error = xfs_qm_dqget(mp, from_kgid(user_ns, gid),
@@ -1713,9 +1714,10 @@ xfs_qm_vop_dqalloc(
}
}
if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
+ ASSERT(O_pdqpp);
if (ip->i_d.di_projid != prid) {
xfs_iunlock(ip, lockflags);
- error = xfs_qm_dqget(mp, (xfs_dqid_t)prid,
+ error = xfs_qm_dqget(mp, prid,
XFS_DQTYPE_PROJ, true, &pq);
if (error) {
ASSERT(error != -ENOENT);
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index ca93b6488377..7529eb63ce94 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -424,7 +424,7 @@ const struct xfs_defer_op_type xfs_refcount_update_defer_type = {
STATIC int
xfs_cui_item_recover(
struct xfs_log_item *lip,
- struct xfs_trans *parent_tp)
+ struct list_head *capture_list)
{
struct xfs_bmbt_irec irec;
struct xfs_cui_log_item *cuip = CUI_ITEM(lip);
@@ -432,7 +432,7 @@ xfs_cui_item_recover(
struct xfs_cud_log_item *cudp;
struct xfs_trans *tp;
struct xfs_btree_cur *rcur = NULL;
- struct xfs_mount *mp = parent_tp->t_mountp;
+ struct xfs_mount *mp = lip->li_mountp;
xfs_fsblock_t startblock_fsb;
xfs_fsblock_t new_fsb;
xfs_extlen_t new_len;
@@ -467,14 +467,8 @@ xfs_cui_item_recover(
refc->pe_len == 0 ||
startblock_fsb >= mp->m_sb.sb_dblocks ||
refc->pe_len >= mp->m_sb.sb_agblocks ||
- (refc->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS)) {
- /*
- * This will pull the CUI from the AIL and
- * free the memory associated with it.
- */
- xfs_cui_release(cuip);
+ (refc->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS))
return -EFSCORRUPTED;
- }
}
/*
@@ -493,12 +487,7 @@ xfs_cui_item_recover(
mp->m_refc_maxlevels * 2, 0, XFS_TRANS_RESERVE, &tp);
if (error)
return error;
- /*
- * Recovery stashes all deferred ops during intent processing and
- * finishes them on completion. Transfer current dfops state to this
- * transaction and transfer the result back before we return.
- */
- xfs_defer_move(tp, parent_tp);
+
cudp = xfs_trans_get_cud(tp, cuip);
for (i = 0; i < cuip->cui_format.cui_nextents; i++) {
@@ -555,13 +544,10 @@ xfs_cui_item_recover(
}
xfs_refcount_finish_one_cleanup(tp, rcur, error);
- xfs_defer_move(parent_tp, tp);
- error = xfs_trans_commit(tp);
- return error;
+ return xfs_defer_ops_capture_and_commit(tp, NULL, capture_list);
abort_error:
xfs_refcount_finish_one_cleanup(tp, rcur, error);
- xfs_defer_move(parent_tp, tp);
xfs_trans_cancel(tp);
return error;
}
@@ -574,6 +560,32 @@ xfs_cui_item_match(
return CUI_ITEM(lip)->cui_format.cui_id == intent_id;
}
+/* Relog an intent item to push the log tail forward. */
+static struct xfs_log_item *
+xfs_cui_item_relog(
+ struct xfs_log_item *intent,
+ struct xfs_trans *tp)
+{
+ struct xfs_cud_log_item *cudp;
+ struct xfs_cui_log_item *cuip;
+ struct xfs_phys_extent *extp;
+ unsigned int count;
+
+ count = CUI_ITEM(intent)->cui_format.cui_nextents;
+ extp = CUI_ITEM(intent)->cui_format.cui_extents;
+
+ tp->t_flags |= XFS_TRANS_DIRTY;
+ cudp = xfs_trans_get_cud(tp, CUI_ITEM(intent));
+ set_bit(XFS_LI_DIRTY, &cudp->cud_item.li_flags);
+
+ cuip = xfs_cui_init(tp->t_mountp, count);
+ memcpy(cuip->cui_format.cui_extents, extp, count * sizeof(*extp));
+ atomic_set(&cuip->cui_next_extent, count);
+ xfs_trans_add_item(tp, &cuip->cui_item);
+ set_bit(XFS_LI_DIRTY, &cuip->cui_item.li_flags);
+ return &cuip->cui_item;
+}
+
static const struct xfs_item_ops xfs_cui_item_ops = {
.iop_size = xfs_cui_item_size,
.iop_format = xfs_cui_item_format,
@@ -581,6 +593,7 @@ static const struct xfs_item_ops xfs_cui_item_ops = {
.iop_release = xfs_cui_item_release,
.iop_recover = xfs_cui_item_recover,
.iop_match = xfs_cui_item_match,
+ .iop_relog = xfs_cui_item_relog,
};
/*
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index dc5b0753cd51..7adc996ca6e3 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -467,14 +467,14 @@ const struct xfs_defer_op_type xfs_rmap_update_defer_type = {
STATIC int
xfs_rui_item_recover(
struct xfs_log_item *lip,
- struct xfs_trans *parent_tp)
+ struct list_head *capture_list)
{
struct xfs_rui_log_item *ruip = RUI_ITEM(lip);
struct xfs_map_extent *rmap;
struct xfs_rud_log_item *rudp;
struct xfs_trans *tp;
struct xfs_btree_cur *rcur = NULL;
- struct xfs_mount *mp = parent_tp->t_mountp;
+ struct xfs_mount *mp = lip->li_mountp;
xfs_fsblock_t startblock_fsb;
enum xfs_rmap_intent_type type;
xfs_exntst_t state;
@@ -511,14 +511,8 @@ xfs_rui_item_recover(
rmap->me_len == 0 ||
startblock_fsb >= mp->m_sb.sb_dblocks ||
rmap->me_len >= mp->m_sb.sb_agblocks ||
- (rmap->me_flags & ~XFS_RMAP_EXTENT_FLAGS)) {
- /*
- * This will pull the RUI from the AIL and
- * free the memory associated with it.
- */
- xfs_rui_release(ruip);
+ (rmap->me_flags & ~XFS_RMAP_EXTENT_FLAGS))
return -EFSCORRUPTED;
- }
}
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
@@ -573,8 +567,7 @@ xfs_rui_item_recover(
}
xfs_rmap_finish_one_cleanup(tp, rcur, error);
- error = xfs_trans_commit(tp);
- return error;
+ return xfs_defer_ops_capture_and_commit(tp, NULL, capture_list);
abort_error:
xfs_rmap_finish_one_cleanup(tp, rcur, error);
@@ -590,6 +583,32 @@ xfs_rui_item_match(
return RUI_ITEM(lip)->rui_format.rui_id == intent_id;
}
+/* Relog an intent item to push the log tail forward. */
+static struct xfs_log_item *
+xfs_rui_item_relog(
+ struct xfs_log_item *intent,
+ struct xfs_trans *tp)
+{
+ struct xfs_rud_log_item *rudp;
+ struct xfs_rui_log_item *ruip;
+ struct xfs_map_extent *extp;
+ unsigned int count;
+
+ count = RUI_ITEM(intent)->rui_format.rui_nextents;
+ extp = RUI_ITEM(intent)->rui_format.rui_extents;
+
+ tp->t_flags |= XFS_TRANS_DIRTY;
+ rudp = xfs_trans_get_rud(tp, RUI_ITEM(intent));
+ set_bit(XFS_LI_DIRTY, &rudp->rud_item.li_flags);
+
+ ruip = xfs_rui_init(tp->t_mountp, count);
+ memcpy(ruip->rui_format.rui_extents, extp, count * sizeof(*extp));
+ atomic_set(&ruip->rui_next_extent, count);
+ xfs_trans_add_item(tp, &ruip->rui_item);
+ set_bit(XFS_LI_DIRTY, &ruip->rui_item.li_flags);
+ return &ruip->rui_item;
+}
+
static const struct xfs_item_ops xfs_rui_item_ops = {
.iop_size = xfs_rui_item_size,
.iop_format = xfs_rui_item_format,
@@ -597,6 +616,7 @@ static const struct xfs_item_ops xfs_rui_item_ops = {
.iop_release = xfs_rui_item_release,
.iop_recover = xfs_rui_item_recover,
.iop_match = xfs_rui_item_match,
+ .iop_relog = xfs_rui_item_relog,
};
/*
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 5b89c12f1566..ede1baf31413 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -18,7 +18,7 @@
#include "xfs_trans_space.h"
#include "xfs_icache.h"
#include "xfs_rtalloc.h"
-
+#include "xfs_sb.h"
/*
* Read and return the summary information for a given extent size,
@@ -778,8 +778,14 @@ xfs_growfs_rt_alloc(
struct xfs_bmbt_irec map; /* block map output */
int nmap; /* number of block maps */
int resblks; /* space reservation */
+ enum xfs_blft buf_type;
struct xfs_trans *tp;
+ if (ip == mp->m_rsumip)
+ buf_type = XFS_BLFT_RTSUMMARY_BUF;
+ else
+ buf_type = XFS_BLFT_RTBITMAP_BUF;
+
/*
* Allocate space to the file, as necessary.
*/
@@ -841,6 +847,9 @@ xfs_growfs_rt_alloc(
mp->m_bsize, 0, &bp);
if (error)
goto out_trans_cancel;
+
+ xfs_trans_buf_set_type(tp, bp, buf_type);
+ bp->b_ops = &xfs_rtbuf_ops;
memset(bp->b_addr, 0, mp->m_sb.sb_blocksize);
xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
/*
@@ -1015,23 +1024,29 @@ xfs_growfs_rt(
/*
* Lock out other callers by grabbing the bitmap inode lock.
*/
- xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
+ xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP);
xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
/*
- * Update the bitmap inode's size.
+ * Update the bitmap inode's size ondisk and incore. We need
+ * to update the incore size so that inode inactivation won't
+ * punch what it thinks are "posteof" blocks.
*/
mp->m_rbmip->i_d.di_size =
nsbp->sb_rbmblocks * nsbp->sb_blocksize;
+ i_size_write(VFS_I(mp->m_rbmip), mp->m_rbmip->i_d.di_size);
xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
/*
* Get the summary inode into the transaction.
*/
- xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL);
+ xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL | XFS_ILOCK_RTSUM);
xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
/*
- * Update the summary inode's size.
+ * Update the summary inode's size. We need to update the
+ * incore size so that inode inactivation won't punch what it
+ * thinks are "posteof" blocks.
*/
mp->m_rsumip->i_d.di_size = nmp->m_rsumsize;
+ i_size_write(VFS_I(mp->m_rsumip), mp->m_rsumip->i_d.di_size);
xfs_trans_log_inode(tp, mp->m_rsumip, XFS_ILOG_CORE);
/*
* Copy summary data from old to new sizes.
@@ -1087,7 +1102,13 @@ error_cancel:
if (error)
break;
}
+ if (error)
+ goto out_free;
+
+ /* Update secondary superblocks now the physical grow has completed */
+ error = xfs_update_secondary_sbs(mp);
+out_free:
/*
* Free the fake mp structure.
*/
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
index f70f1255220b..20e0534a772c 100644
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -23,6 +23,7 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
uint64_t xs_xstrat_bytes = 0;
uint64_t xs_write_bytes = 0;
uint64_t xs_read_bytes = 0;
+ uint64_t defer_relog = 0;
static const struct xstats_entry {
char *desc;
@@ -70,10 +71,13 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
xs_xstrat_bytes += per_cpu_ptr(stats, i)->s.xs_xstrat_bytes;
xs_write_bytes += per_cpu_ptr(stats, i)->s.xs_write_bytes;
xs_read_bytes += per_cpu_ptr(stats, i)->s.xs_read_bytes;
+ defer_relog += per_cpu_ptr(stats, i)->s.defer_relog;
}
len += scnprintf(buf + len, PATH_MAX-len, "xpc %Lu %Lu %Lu\n",
xs_xstrat_bytes, xs_write_bytes, xs_read_bytes);
+ len += scnprintf(buf + len, PATH_MAX-len, "defer_relog %llu\n",
+ defer_relog);
len += scnprintf(buf + len, PATH_MAX-len, "debug %u\n",
#if defined(DEBUG)
1);
diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h
index 34d704f703d2..43ffba74f045 100644
--- a/fs/xfs/xfs_stats.h
+++ b/fs/xfs/xfs_stats.h
@@ -137,6 +137,7 @@ struct __xfsstats {
uint64_t xs_xstrat_bytes;
uint64_t xs_write_bytes;
uint64_t xs_read_bytes;
+ uint64_t defer_relog;
};
#define xfsstats_offset(f) (offsetof(struct __xfsstats, f)/sizeof(uint32_t))
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index baf5de30eebb..d1b5f2d2a245 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1234,25 +1234,12 @@ xfs_fc_parse_param(
case Opt_nouuid:
mp->m_flags |= XFS_MOUNT_NOUUID;
return 0;
- case Opt_ikeep:
- mp->m_flags |= XFS_MOUNT_IKEEP;
- return 0;
- case Opt_noikeep:
- mp->m_flags &= ~XFS_MOUNT_IKEEP;
- return 0;
case Opt_largeio:
mp->m_flags |= XFS_MOUNT_LARGEIO;
return 0;
case Opt_nolargeio:
mp->m_flags &= ~XFS_MOUNT_LARGEIO;
return 0;
- case Opt_attr2:
- mp->m_flags |= XFS_MOUNT_ATTR2;
- return 0;
- case Opt_noattr2:
- mp->m_flags &= ~XFS_MOUNT_ATTR2;
- mp->m_flags |= XFS_MOUNT_NOATTR2;
- return 0;
case Opt_filestreams:
mp->m_flags |= XFS_MOUNT_FILESTREAMS;
return 0;
@@ -1304,6 +1291,24 @@ xfs_fc_parse_param(
xfs_mount_set_dax_mode(mp, result.uint_32);
return 0;
#endif
+ /* Following mount options will be removed in September 2025 */
+ case Opt_ikeep:
+ xfs_warn(mp, "%s mount option is deprecated.", param->key);
+ mp->m_flags |= XFS_MOUNT_IKEEP;
+ return 0;
+ case Opt_noikeep:
+ xfs_warn(mp, "%s mount option is deprecated.", param->key);
+ mp->m_flags &= ~XFS_MOUNT_IKEEP;
+ return 0;
+ case Opt_attr2:
+ xfs_warn(mp, "%s mount option is deprecated.", param->key);
+ mp->m_flags |= XFS_MOUNT_ATTR2;
+ return 0;
+ case Opt_noattr2:
+ xfs_warn(mp, "%s mount option is deprecated.", param->key);
+ mp->m_flags &= ~XFS_MOUNT_ATTR2;
+ mp->m_flags |= XFS_MOUNT_NOATTR2;
+ return 0;
default:
xfs_warn(mp, "unknown mount option [%s].", param->key);
return -EINVAL;
@@ -1450,6 +1455,19 @@ xfs_fc_fill_super(
if (error)
goto out_free_sb;
+ /* V4 support is undergoing deprecation. */
+ if (!xfs_sb_version_hascrc(&mp->m_sb)) {
+#ifdef CONFIG_XFS_SUPPORT_V4
+ xfs_warn_once(mp,
+ "Deprecated V4 format (crc=0) will not be supported after September 2030.");
+#else
+ xfs_warn(mp,
+ "Deprecated V4 format (crc=0) not supported by kernel.");
+ error = -EINVAL;
+ goto out_free_sb;
+#endif
+ }
+
/*
* XFS block mappings use 54 bits to store the logical block offset.
* This should suffice to handle the maximum file size that the VFS
diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c
index 021ef96d0542..fac9de7ee6d0 100644
--- a/fs/xfs/xfs_sysctl.c
+++ b/fs/xfs/xfs_sysctl.c
@@ -50,13 +50,45 @@ xfs_panic_mask_proc_handler(
}
#endif /* CONFIG_PROC_FS */
+STATIC int
+xfs_deprecate_irix_sgid_inherit_proc_handler(
+ struct ctl_table *ctl,
+ int write,
+ void *buffer,
+ size_t *lenp,
+ loff_t *ppos)
+{
+ if (write) {
+ printk_once(KERN_WARNING
+ "XFS: " "%s sysctl option is deprecated.\n",
+ ctl->procname);
+ }
+ return proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
+}
+
+STATIC int
+xfs_deprecate_irix_symlink_mode_proc_handler(
+ struct ctl_table *ctl,
+ int write,
+ void *buffer,
+ size_t *lenp,
+ loff_t *ppos)
+{
+ if (write) {
+ printk_once(KERN_WARNING
+ "XFS: " "%s sysctl option is deprecated.\n",
+ ctl->procname);
+ }
+ return proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
+}
+
static struct ctl_table xfs_table[] = {
{
.procname = "irix_sgid_inherit",
.data = &xfs_params.sgid_inherit.val,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = xfs_deprecate_irix_sgid_inherit_proc_handler,
.extra1 = &xfs_params.sgid_inherit.min,
.extra2 = &xfs_params.sgid_inherit.max
},
@@ -65,7 +97,7 @@ static struct ctl_table xfs_table[] = {
.data = &xfs_params.symlink_mode.val,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = xfs_deprecate_irix_symlink_mode_proc_handler,
.extra1 = &xfs_params.symlink_mode.min,
.extra2 = &xfs_params.symlink_mode.max
},
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index dcdcf99cfa5d..86951652d3ed 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -2533,6 +2533,7 @@ DEFINE_DEFER_PENDING_EVENT(xfs_defer_create_intent);
DEFINE_DEFER_PENDING_EVENT(xfs_defer_cancel_list);
DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_finish);
DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_abort);
+DEFINE_DEFER_PENDING_EVENT(xfs_defer_relog_intent);
#define DEFINE_BMAP_FREE_DEFERRED_EVENT DEFINE_PHYS_EXTENT_DEFERRED_EVENT
DEFINE_BMAP_FREE_DEFERRED_EVENT(xfs_bmap_free_defer);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index ca18a040336a..c94e71f741b6 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -959,7 +959,7 @@ xfs_trans_cancel(
struct xfs_log_item *lip;
list_for_each_entry(lip, &tp->t_items, li_trans)
- ASSERT(!(lip->li_type == XFS_LI_EFD));
+ ASSERT(!xlog_item_is_intent_done(lip));
}
#endif
xfs_trans_unreserve_and_mod_sb(tp);
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index f46534b75236..084658946cc8 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -55,14 +55,12 @@ struct xfs_log_item {
#define XFS_LI_ABORTED 1
#define XFS_LI_FAILED 2
#define XFS_LI_DIRTY 3 /* log item dirty in transaction */
-#define XFS_LI_RECOVERED 4 /* log intent item has been recovered */
#define XFS_LI_FLAGS \
{ (1 << XFS_LI_IN_AIL), "IN_AIL" }, \
{ (1 << XFS_LI_ABORTED), "ABORTED" }, \
{ (1 << XFS_LI_FAILED), "FAILED" }, \
- { (1 << XFS_LI_DIRTY), "DIRTY" }, \
- { (1 << XFS_LI_RECOVERED), "RECOVERED" }
+ { (1 << XFS_LI_DIRTY), "DIRTY" }
struct xfs_item_ops {
unsigned flags;
@@ -74,10 +72,29 @@ struct xfs_item_ops {
void (*iop_committing)(struct xfs_log_item *, xfs_lsn_t commit_lsn);
void (*iop_release)(struct xfs_log_item *);
xfs_lsn_t (*iop_committed)(struct xfs_log_item *, xfs_lsn_t);
- int (*iop_recover)(struct xfs_log_item *lip, struct xfs_trans *tp);
+ int (*iop_recover)(struct xfs_log_item *lip,
+ struct list_head *capture_list);
bool (*iop_match)(struct xfs_log_item *item, uint64_t id);
+ struct xfs_log_item *(*iop_relog)(struct xfs_log_item *intent,
+ struct xfs_trans *tp);
};
+/* Is this log item a deferred action intent? */
+static inline bool
+xlog_item_is_intent(struct xfs_log_item *lip)
+{
+ return lip->li_ops->iop_recover != NULL &&
+ lip->li_ops->iop_match != NULL;
+}
+
+/* Is this a log intent-done item? */
+static inline bool
+xlog_item_is_intent_done(struct xfs_log_item *lip)
+{
+ return lip->li_ops->iop_unpin == NULL &&
+ lip->li_ops->iop_push == NULL;
+}
+
/*
* Release the log item as soon as committed. This is for items just logging
* intents that never need to be written back in place.
@@ -243,4 +260,12 @@ void xfs_trans_buf_copy_type(struct xfs_buf *dst_bp,
extern kmem_zone_t *xfs_trans_zone;
+static inline struct xfs_log_item *
+xfs_trans_item_relog(
+ struct xfs_log_item *lip,
+ struct xfs_trans *tp)
+{
+ return lip->li_ops->iop_relog(lip, tp);
+}
+
#endif /* __XFS_TRANS_H__ */
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index 133fc6fc3edd..fe45b0c3970c 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -221,36 +221,27 @@ xfs_trans_mod_dquot(
}
switch (field) {
-
- /*
- * regular disk blk reservation
- */
- case XFS_TRANS_DQ_RES_BLKS:
+ /* regular disk blk reservation */
+ case XFS_TRANS_DQ_RES_BLKS:
qtrx->qt_blk_res += delta;
break;
- /*
- * inode reservation
- */
- case XFS_TRANS_DQ_RES_INOS:
+ /* inode reservation */
+ case XFS_TRANS_DQ_RES_INOS:
qtrx->qt_ino_res += delta;
break;
- /*
- * disk blocks used.
- */
- case XFS_TRANS_DQ_BCOUNT:
+ /* disk blocks used. */
+ case XFS_TRANS_DQ_BCOUNT:
qtrx->qt_bcount_delta += delta;
break;
- case XFS_TRANS_DQ_DELBCOUNT:
+ case XFS_TRANS_DQ_DELBCOUNT:
qtrx->qt_delbcnt_delta += delta;
break;
- /*
- * Inode Count
- */
- case XFS_TRANS_DQ_ICOUNT:
+ /* Inode Count */
+ case XFS_TRANS_DQ_ICOUNT:
if (qtrx->qt_ino_res && delta > 0) {
qtrx->qt_ino_res_used += delta;
ASSERT(qtrx->qt_ino_res >= qtrx->qt_ino_res_used);
@@ -258,17 +249,13 @@ xfs_trans_mod_dquot(
qtrx->qt_icount_delta += delta;
break;
- /*
- * rtblk reservation
- */
- case XFS_TRANS_DQ_RES_RTBLKS:
+ /* rtblk reservation */
+ case XFS_TRANS_DQ_RES_RTBLKS:
qtrx->qt_rtblk_res += delta;
break;
- /*
- * rtblk count
- */
- case XFS_TRANS_DQ_RTBCOUNT:
+ /* rtblk count */
+ case XFS_TRANS_DQ_RTBCOUNT:
if (qtrx->qt_rtblk_res && delta > 0) {
qtrx->qt_rtblk_res_used += delta;
ASSERT(qtrx->qt_rtblk_res >= qtrx->qt_rtblk_res_used);
@@ -276,11 +263,11 @@ xfs_trans_mod_dquot(
qtrx->qt_rtbcount_delta += delta;
break;
- case XFS_TRANS_DQ_DELRTBCOUNT:
+ case XFS_TRANS_DQ_DELRTBCOUNT:
qtrx->qt_delrtb_delta += delta;
break;
- default:
+ default:
ASSERT(0);
}
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index 8ec7c8f109d7..64cc2a9c38c8 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -24,6 +24,39 @@
#include "zonefs.h"
+static inline int zonefs_zone_mgmt(struct inode *inode,
+ enum req_opf op)
+{
+ struct zonefs_inode_info *zi = ZONEFS_I(inode);
+ int ret;
+
+ lockdep_assert_held(&zi->i_truncate_mutex);
+
+ ret = blkdev_zone_mgmt(inode->i_sb->s_bdev, op, zi->i_zsector,
+ zi->i_zone_size >> SECTOR_SHIFT, GFP_NOFS);
+ if (ret) {
+ zonefs_err(inode->i_sb,
+ "Zone management operation %s at %llu failed %d\n",
+ blk_op_str(op), zi->i_zsector, ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static inline void zonefs_i_size_write(struct inode *inode, loff_t isize)
+{
+ struct zonefs_inode_info *zi = ZONEFS_I(inode);
+
+ i_size_write(inode, isize);
+ /*
+ * A full zone is no longer open/active and does not need
+ * explicit closing.
+ */
+ if (isize >= zi->i_max_size)
+ zi->i_flags &= ~ZONEFS_ZONE_OPEN;
+}
+
static int zonefs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
unsigned int flags, struct iomap *iomap,
struct iomap *srcmap)
@@ -302,6 +335,17 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
}
/*
+ * If the filesystem is mounted with the explicit-open mount option, we
+ * need to clear the ZONEFS_ZONE_OPEN flag if the zone transitioned to
+ * the read-only or offline condition, to avoid attempting an explicit
+ * close of the zone when the inode file is closed.
+ */
+ if ((sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) &&
+ (zone->cond == BLK_ZONE_COND_OFFLINE ||
+ zone->cond == BLK_ZONE_COND_READONLY))
+ zi->i_flags &= ~ZONEFS_ZONE_OPEN;
+
+ /*
* If error=remount-ro was specified, any error result in remounting
* the volume as read-only.
*/
@@ -315,7 +359,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
* invalid data.
*/
zonefs_update_stats(inode, data_size);
- i_size_write(inode, data_size);
+ zonefs_i_size_write(inode, data_size);
zi->i_wpoffset = data_size;
return 0;
@@ -328,7 +372,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
* eventually correct the file size and zonefs inode write pointer offset
* (which can be out of sync with the drive due to partial write failures).
*/
-static void zonefs_io_error(struct inode *inode, bool write)
+static void __zonefs_io_error(struct inode *inode, bool write)
{
struct zonefs_inode_info *zi = ZONEFS_I(inode);
struct super_block *sb = inode->i_sb;
@@ -342,8 +386,6 @@ static void zonefs_io_error(struct inode *inode, bool write)
};
int ret;
- mutex_lock(&zi->i_truncate_mutex);
-
/*
* Memory allocations in blkdev_report_zones() can trigger a memory
* reclaim which may in turn cause a recursion into zonefs as well as
@@ -359,7 +401,14 @@ static void zonefs_io_error(struct inode *inode, bool write)
zonefs_err(sb, "Get inode %lu zone information failed %d\n",
inode->i_ino, ret);
memalloc_noio_restore(noio_flag);
+}
+static void zonefs_io_error(struct inode *inode, bool write)
+{
+ struct zonefs_inode_info *zi = ZONEFS_I(inode);
+
+ mutex_lock(&zi->i_truncate_mutex);
+ __zonefs_io_error(inode, write);
mutex_unlock(&zi->i_truncate_mutex);
}
@@ -397,13 +446,27 @@ static int zonefs_file_truncate(struct inode *inode, loff_t isize)
if (isize == old_isize)
goto unlock;
- ret = blkdev_zone_mgmt(inode->i_sb->s_bdev, op, zi->i_zsector,
- zi->i_zone_size >> SECTOR_SHIFT, GFP_NOFS);
- if (ret) {
- zonefs_err(inode->i_sb,
- "Zone management operation at %llu failed %d",
- zi->i_zsector, ret);
+ ret = zonefs_zone_mgmt(inode, op);
+ if (ret)
goto unlock;
+
+ /*
+ * If the mount option ZONEFS_MNTOPT_EXPLICIT_OPEN is set,
+ * take care of open zones.
+ */
+ if (zi->i_flags & ZONEFS_ZONE_OPEN) {
+ /*
+ * Truncating a zone to EMPTY or FULL is the equivalent of
+ * closing the zone. For a truncation to 0, we need to
+ * re-open the zone to ensure new writes can be processed.
+ * For a truncation to the maximum file size, the zone is
+ * closed and writes cannot be accepted anymore, so clear
+ * the open flag.
+ */
+ if (!isize)
+ ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_OPEN);
+ else
+ zi->i_flags &= ~ZONEFS_ZONE_OPEN;
}
zonefs_update_stats(inode, isize);
@@ -584,7 +647,7 @@ static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
mutex_lock(&zi->i_truncate_mutex);
if (i_size_read(inode) < iocb->ki_pos + size) {
zonefs_update_stats(inode, iocb->ki_pos + size);
- i_size_write(inode, iocb->ki_pos + size);
+ zonefs_i_size_write(inode, iocb->ki_pos + size);
}
mutex_unlock(&zi->i_truncate_mutex);
}
@@ -865,8 +928,128 @@ inode_unlock:
return ret;
}
+static inline bool zonefs_file_use_exp_open(struct inode *inode, struct file *file)
+{
+ struct zonefs_inode_info *zi = ZONEFS_I(inode);
+ struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
+
+ if (!(sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN))
+ return false;
+
+ if (zi->i_ztype != ZONEFS_ZTYPE_SEQ)
+ return false;
+
+ if (!(file->f_mode & FMODE_WRITE))
+ return false;
+
+ return true;
+}
+
+static int zonefs_open_zone(struct inode *inode)
+{
+ struct zonefs_inode_info *zi = ZONEFS_I(inode);
+ struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
+ int ret = 0;
+
+ mutex_lock(&zi->i_truncate_mutex);
+
+ zi->i_wr_refcnt++;
+ if (zi->i_wr_refcnt == 1) {
+
+ if (atomic_inc_return(&sbi->s_open_zones) > sbi->s_max_open_zones) {
+ atomic_dec(&sbi->s_open_zones);
+ ret = -EBUSY;
+ goto unlock;
+ }
+
+ if (i_size_read(inode) < zi->i_max_size) {
+ ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_OPEN);
+ if (ret) {
+ zi->i_wr_refcnt--;
+ atomic_dec(&sbi->s_open_zones);
+ goto unlock;
+ }
+ zi->i_flags |= ZONEFS_ZONE_OPEN;
+ }
+ }
+
+unlock:
+ mutex_unlock(&zi->i_truncate_mutex);
+
+ return ret;
+}
+
+static int zonefs_file_open(struct inode *inode, struct file *file)
+{
+ int ret;
+
+ ret = generic_file_open(inode, file);
+ if (ret)
+ return ret;
+
+ if (zonefs_file_use_exp_open(inode, file))
+ return zonefs_open_zone(inode);
+
+ return 0;
+}
+
+static void zonefs_close_zone(struct inode *inode)
+{
+ struct zonefs_inode_info *zi = ZONEFS_I(inode);
+ int ret = 0;
+
+ mutex_lock(&zi->i_truncate_mutex);
+ zi->i_wr_refcnt--;
+ if (!zi->i_wr_refcnt) {
+ struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
+ struct super_block *sb = inode->i_sb;
+
+ /*
+ * If the file zone is full, it is not open anymore and we only
+ * need to decrement the open count.
+ */
+ if (!(zi->i_flags & ZONEFS_ZONE_OPEN))
+ goto dec;
+
+ ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_CLOSE);
+ if (ret) {
+ __zonefs_io_error(inode, false);
+ /*
+ * Leaving zones explicitly open may lead to a state
+ * where most zones cannot be written (zone resources
+ * exhausted). So take preventive action by remounting
+ * read-only.
+ */
+ if (zi->i_flags & ZONEFS_ZONE_OPEN &&
+ !(sb->s_flags & SB_RDONLY)) {
+ zonefs_warn(sb, "closing zone failed, remounting filesystem read-only\n");
+ sb->s_flags |= SB_RDONLY;
+ }
+ }
+ zi->i_flags &= ~ZONEFS_ZONE_OPEN;
+dec:
+ atomic_dec(&sbi->s_open_zones);
+ }
+ mutex_unlock(&zi->i_truncate_mutex);
+}
+
+static int zonefs_file_release(struct inode *inode, struct file *file)
+{
+ /*
+ * If we explicitly open a zone we must close it again as well, but the
+ * zone management operation can fail (either due to an IO error or as
+ * the zone has gone offline or read-only). Make sure we don't fail the
+ * close(2) for user-space.
+ */
+ if (zonefs_file_use_exp_open(inode, file))
+ zonefs_close_zone(inode);
+
+ return 0;
+}
+
static const struct file_operations zonefs_file_operations = {
- .open = generic_file_open,
+ .open = zonefs_file_open,
+ .release = zonefs_file_release,
.fsync = zonefs_file_fsync,
.mmap = zonefs_file_mmap,
.llseek = zonefs_file_llseek,
@@ -890,6 +1073,7 @@ static struct inode *zonefs_alloc_inode(struct super_block *sb)
inode_init_once(&zi->i_vnode);
mutex_init(&zi->i_truncate_mutex);
init_rwsem(&zi->i_mmap_sem);
+ zi->i_wr_refcnt = 0;
return &zi->i_vnode;
}
@@ -940,7 +1124,7 @@ static int zonefs_statfs(struct dentry *dentry, struct kstatfs *buf)
enum {
Opt_errors_ro, Opt_errors_zro, Opt_errors_zol, Opt_errors_repair,
- Opt_err,
+ Opt_explicit_open, Opt_err,
};
static const match_table_t tokens = {
@@ -948,6 +1132,7 @@ static const match_table_t tokens = {
{ Opt_errors_zro, "errors=zone-ro"},
{ Opt_errors_zol, "errors=zone-offline"},
{ Opt_errors_repair, "errors=repair"},
+ { Opt_explicit_open, "explicit-open" },
{ Opt_err, NULL}
};
@@ -984,6 +1169,9 @@ static int zonefs_parse_options(struct super_block *sb, char *options)
sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK;
sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_REPAIR;
break;
+ case Opt_explicit_open:
+ sbi->s_mount_opts |= ZONEFS_MNTOPT_EXPLICIT_OPEN;
+ break;
default:
return -EINVAL;
}
@@ -1403,6 +1591,13 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_gid = GLOBAL_ROOT_GID;
sbi->s_perm = 0640;
sbi->s_mount_opts = ZONEFS_MNTOPT_ERRORS_RO;
+ sbi->s_max_open_zones = bdev_max_open_zones(sb->s_bdev);
+ atomic_set(&sbi->s_open_zones, 0);
+ if (!sbi->s_max_open_zones &&
+ sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) {
+ zonefs_info(sb, "No open zones limit. Ignoring explicit_open mount option\n");
+ sbi->s_mount_opts &= ~ZONEFS_MNTOPT_EXPLICIT_OPEN;
+ }
ret = zonefs_read_super(sb);
if (ret)
diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h
index 55b39970acb2..51141907097c 100644
--- a/fs/zonefs/zonefs.h
+++ b/fs/zonefs/zonefs.h
@@ -38,6 +38,8 @@ static inline enum zonefs_ztype zonefs_zone_type(struct blk_zone *zone)
return ZONEFS_ZTYPE_SEQ;
}
+#define ZONEFS_ZONE_OPEN (1 << 0)
+
/*
* In-memory inode data.
*/
@@ -74,6 +76,10 @@ struct zonefs_inode_info {
*/
struct mutex i_truncate_mutex;
struct rw_semaphore i_mmap_sem;
+
+ /* guarded by i_truncate_mutex */
+ unsigned int i_wr_refcnt;
+ unsigned int i_flags;
};
static inline struct zonefs_inode_info *ZONEFS_I(struct inode *inode)
@@ -154,6 +160,7 @@ enum zonefs_features {
#define ZONEFS_MNTOPT_ERRORS_MASK \
(ZONEFS_MNTOPT_ERRORS_RO | ZONEFS_MNTOPT_ERRORS_ZRO | \
ZONEFS_MNTOPT_ERRORS_ZOL | ZONEFS_MNTOPT_ERRORS_REPAIR)
+#define ZONEFS_MNTOPT_EXPLICIT_OPEN (1 << 4) /* Explicit open/close of zones on open/close */
/*
* In-memory Super block information.
@@ -175,6 +182,9 @@ struct zonefs_sb_info {
loff_t s_blocks;
loff_t s_used_blocks;
+
+ unsigned int s_max_open_zones;
+ atomic_t s_open_zones;
};
static inline struct zonefs_sb_info *ZONEFS_SB(struct super_block *sb)
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index e1843976754a..cd14444bf600 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -734,7 +734,8 @@
THERMAL_TABLE(governor) \
EARLYCON_TABLE() \
LSM_TABLE() \
- EARLY_LSM_TABLE()
+ EARLY_LSM_TABLE() \
+ KUNIT_TABLE()
#define INIT_TEXT \
*(.init.text .init.text.*) \
@@ -932,6 +933,13 @@
KEEP(*(.con_initcall.init)) \
__con_initcall_end = .;
+/* Alignment must be consistent with (kunit_suite *) in include/kunit/test.h */
+#define KUNIT_TABLE() \
+ . = ALIGN(8); \
+ __kunit_suites_start = .; \
+ KEEP(*(.kunit_test_suites)) \
+ __kunit_suites_end = .;
+
#ifdef CONFIG_BLK_DEV_INITRD
#define INIT_RAM_FS \
. = ALIGN(4); \
diff --git a/include/dt-bindings/power/summit,smb347-charger.h b/include/dt-bindings/power/summit,smb347-charger.h
new file mode 100644
index 000000000000..d918bf321a71
--- /dev/null
+++ b/include/dt-bindings/power/summit,smb347-charger.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: (GPL-2.0-or-later or MIT) */
+/*
+ * Author: David Heidelberg <david@ixit.cz>
+ */
+
+#ifndef _DT_BINDINGS_SMB347_CHARGER_H
+#define _DT_BINDINGS_SMB347_CHARGER_H
+
+/* Charging compensation method */
+#define SMB3XX_SOFT_TEMP_COMPENSATE_NONE 0
+#define SMB3XX_SOFT_TEMP_COMPENSATE_CURRENT 1
+#define SMB3XX_SOFT_TEMP_COMPENSATE_VOLTAGE 2
+
+/* Charging enable control */
+#define SMB3XX_CHG_ENABLE_SW 0
+#define SMB3XX_CHG_ENABLE_PIN_ACTIVE_LOW 1
+#define SMB3XX_CHG_ENABLE_PIN_ACTIVE_HIGH 2
+
+#endif
diff --git a/include/kunit/test.h b/include/kunit/test.h
index 3391f38389f8..a423fffefea0 100644
--- a/include/kunit/test.h
+++ b/include/kunit/test.h
@@ -25,6 +25,7 @@ typedef void (*kunit_resource_free_t)(struct kunit_resource *);
/**
* struct kunit_resource - represents a *test managed resource*
* @data: for the user to store arbitrary data.
+ * @name: optional name
* @free: a user supplied function to free the resource. Populated by
* kunit_resource_alloc().
*
@@ -80,10 +81,10 @@ typedef void (*kunit_resource_free_t)(struct kunit_resource *);
*/
struct kunit_resource {
void *data;
- const char *name; /* optional name */
+ const char *name;
+ kunit_resource_free_t free;
/* private: internal use only. */
- kunit_resource_free_t free;
struct kref refcount;
struct list_head node;
};
@@ -238,10 +239,19 @@ size_t kunit_suite_num_test_cases(struct kunit_suite *suite);
unsigned int kunit_test_case_num(struct kunit_suite *suite,
struct kunit_case *test_case);
-int __kunit_test_suites_init(struct kunit_suite **suites);
+int __kunit_test_suites_init(struct kunit_suite * const * const suites);
void __kunit_test_suites_exit(struct kunit_suite **suites);
+#if IS_BUILTIN(CONFIG_KUNIT)
+int kunit_run_all_tests(void);
+#else
+static inline int kunit_run_all_tests(void)
+{
+ return 0;
+}
+#endif /* IS_BUILTIN(CONFIG_KUNIT) */
+
/**
* kunit_test_suites() - used to register one or more &struct kunit_suite
* with KUnit.
@@ -251,34 +261,57 @@ void __kunit_test_suites_exit(struct kunit_suite **suites);
* Registers @suites_list with the test framework. See &struct kunit_suite for
* more information.
*
- * When builtin, KUnit tests are all run as late_initcalls; this means
- * that they cannot test anything where tests must run at a different init
- * phase. One significant restriction resulting from this is that KUnit
- * cannot reliably test anything that is initialize in the late_init phase;
- * another is that KUnit is useless to test things that need to be run in
- * an earlier init phase.
- *
- * An alternative is to build the tests as a module. Because modules
- * do not support multiple late_initcall()s, we need to initialize an
- * array of suites for a module.
- *
- * TODO(brendanhiggins@google.com): Don't run all KUnit tests as
- * late_initcalls. I have some future work planned to dispatch all KUnit
- * tests from the same place, and at the very least to do so after
- * everything else is definitely initialized.
+ * If a test suite is built-in, module_init() gets translated into
+ * an initcall which we don't want as the idea is that for builtins
+ * the executor will manage execution. So ensure we do not define
+ * module_{init|exit} functions for the builtin case when registering
+ * suites via kunit_test_suites() below.
*/
-#define kunit_test_suites(suites_list...) \
- static struct kunit_suite *suites[] = {suites_list, NULL}; \
- static int kunit_test_suites_init(void) \
+#ifdef MODULE
+#define kunit_test_suites_for_module(__suites) \
+ static int __init kunit_test_suites_init(void) \
{ \
- return __kunit_test_suites_init(suites); \
+ return __kunit_test_suites_init(__suites); \
} \
- late_initcall(kunit_test_suites_init); \
+ module_init(kunit_test_suites_init); \
+ \
static void __exit kunit_test_suites_exit(void) \
{ \
- return __kunit_test_suites_exit(suites); \
+ return __kunit_test_suites_exit(__suites); \
} \
module_exit(kunit_test_suites_exit)
+#else
+#define kunit_test_suites_for_module(__suites)
+#endif /* MODULE */
+
+#define __kunit_test_suites(unique_array, unique_suites, ...) \
+ static struct kunit_suite *unique_array[] = { __VA_ARGS__, NULL }; \
+ kunit_test_suites_for_module(unique_array); \
+ static struct kunit_suite **unique_suites \
+ __used __section(.kunit_test_suites) = unique_array
+
+/**
+ * kunit_test_suites() - used to register one or more &struct kunit_suite
+ * with KUnit.
+ *
+ * @suites: a statically allocated list of &struct kunit_suite.
+ *
+ * Registers @suites with the test framework. See &struct kunit_suite for
+ * more information.
+ *
+ * When builtin, KUnit tests are all run via executor; this is done
+ * by placing the array of struct kunit_suite * in the .kunit_test_suites
+ * ELF section.
+ *
+ * An alternative is to build the tests as a module. Because modules do not
+ * support multiple initcall()s, we need to initialize an array of suites for a
+ * module.
+ *
+ */
+#define kunit_test_suites(...) \
+ __kunit_test_suites(__UNIQUE_ID(array), \
+ __UNIQUE_ID(suites), \
+ __VA_ARGS__)
#define kunit_test_suite(suite) kunit_test_suites(&suite)
@@ -348,6 +381,7 @@ static inline void kunit_put_resource(struct kunit_resource *res)
* none is supplied, the resource data value is simply set to @data.
* If an init function is supplied, @data is passed to it instead.
* @free: a user-supplied function to free the resource (if needed).
+ * @res: The resource.
* @data: value to pass to init function or set in resource data field.
*/
int kunit_add_resource(struct kunit *test,
@@ -361,7 +395,9 @@ int kunit_add_resource(struct kunit *test,
* @test: The test context object.
* @init: a user-supplied function to initialize the resource data, if needed.
* @free: a user-supplied function to free the resource data, if needed.
- * @name_data: name and data to be set for resource.
+ * @res: The resource.
+ * @name: name to be set for resource.
+ * @data: value to pass to init function or set in resource data field.
*/
int kunit_add_named_resource(struct kunit *test,
kunit_resource_init_t init,
@@ -499,8 +535,8 @@ static inline int kunit_destroy_named_resource(struct kunit *test,
}
/**
- * kunit_remove_resource: remove resource from resource list associated with
- * test.
+ * kunit_remove_resource() - remove resource from resource list associated with
+ * test.
* @test: The test context object.
* @res: The resource to be removed.
*
diff --git a/include/linux/bcm47xx_sprom.h b/include/linux/bcm47xx_sprom.h
index b0f4424f34fc..f8254fd53e15 100644
--- a/include/linux/bcm47xx_sprom.h
+++ b/include/linux/bcm47xx_sprom.h
@@ -9,9 +9,19 @@
#include <linux/kernel.h>
#include <linux/vmalloc.h>
+struct ssb_sprom;
+
#ifdef CONFIG_BCM47XX_SPROM
+void bcm47xx_fill_sprom(struct ssb_sprom *sprom, const char *prefix,
+ bool fallback);
int bcm47xx_sprom_register_fallbacks(void);
#else
+static inline void bcm47xx_fill_sprom(struct ssb_sprom *sprom,
+ const char *prefix,
+ bool fallback)
+{
+}
+
static inline int bcm47xx_sprom_register_fallbacks(void)
{
return -ENOTSUPP;
diff --git a/include/linux/bcm963xx_tag.h b/include/linux/bcm963xx_tag.h
index b87945cb6946..7edb809a2586 100644
--- a/include/linux/bcm963xx_tag.h
+++ b/include/linux/bcm963xx_tag.h
@@ -84,7 +84,7 @@ struct bcm_tag {
char flash_layout_ver[FLASHLAYOUTVER_LEN];
/* 196-199: kernel+rootfs CRC32 */
__u32 fskernel_crc;
- /* 200-215: Unused except on Alice Gate where is is information */
+ /* 200-215: Unused except on Alice Gate where it is information */
char information2[TAGINFO2_LEN];
/* 216-219: CRC32 of image less imagetag (kernel for Alice Gate) */
__u32 image_crc;
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 99f2ac30b1d9..5b74bdf159d6 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -188,12 +188,10 @@ static inline unsigned fls_long(unsigned long l)
static inline int get_count_order(unsigned int count)
{
- int order;
+ if (count == 0)
+ return -1;
- order = fls(count) - 1;
- if (count & (count - 1))
- order++;
- return order;
+ return fls(--count);
}
/**
@@ -206,10 +204,7 @@ static inline int get_count_order_long(unsigned long l)
{
if (l == 0UL)
return -1;
- else if (l & (l - 1UL))
- return (int)fls_long(l);
- else
- return (int)fls_long(l) - 1;
+ return (int)fls_long(--l);
}
/**
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c09375e0a0eb..639cae2c158b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -8,6 +8,7 @@
#include <linux/genhd.h>
#include <linux/list.h>
#include <linux/llist.h>
+#include <linux/minmax.h>
#include <linux/timer.h>
#include <linux/workqueue.h>
#include <linux/pagemap.h>
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index dd74503f7e5e..2efec10bf792 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -7,10 +7,14 @@
#ifndef __LINUX_BVEC_ITER_H
#define __LINUX_BVEC_ITER_H
-#include <linux/kernel.h>
#include <linux/bug.h>
#include <linux/errno.h>
+#include <linux/limits.h>
+#include <linux/minmax.h>
#include <linux/mm.h>
+#include <linux/types.h>
+
+struct page;
/**
* struct bio_vec - a contiguous range of physical memory addresses
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index 7a899e83835d..e58e8c207782 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -7,6 +7,12 @@
#include <linux/fs.h>
#include <asm/siginfo.h>
+struct core_vma_metadata {
+ unsigned long start, end;
+ unsigned long flags;
+ unsigned long dump_size;
+};
+
/*
* These are the only things you should do on a core-file: use only these
* functions to write out all the necessary info.
@@ -16,6 +22,11 @@ extern int dump_skip(struct coredump_params *cprm, size_t nr);
extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr);
extern int dump_align(struct coredump_params *cprm, int align);
extern void dump_truncate(struct coredump_params *cprm);
+int dump_user_range(struct coredump_params *cprm, unsigned long start,
+ unsigned long len);
+int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count,
+ struct core_vma_metadata **vma_meta,
+ size_t *vma_data_size_ptr);
#ifdef CONFIG_COREDUMP
extern void do_coredump(const kernel_siginfo_t *siginfo);
#else
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 6f524bbf71a2..bc56287a1ed1 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -183,6 +183,7 @@ enum cpuhp_state {
CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE,
CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE,
CPUHP_AP_PERF_POWERPC_HV_24x7_ONLINE,
+ CPUHP_AP_PERF_POWERPC_HV_GPCI_ONLINE,
CPUHP_AP_WATCHDOG_ONLINE,
CPUHP_AP_WORKQUEUE_ONLINE,
CPUHP_AP_RCUTREE_ONLINE,
diff --git a/include/linux/dax.h b/include/linux/dax.h
index e15357223565..b52f084aa643 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -149,6 +149,7 @@ int dax_writeback_mapping_range(struct address_space *mapping,
struct dax_device *dax_dev, struct writeback_control *wbc);
struct page *dax_layout_busy_page(struct address_space *mapping);
+struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t start, loff_t end);
dax_entry_t dax_lock_page(struct page *page);
void dax_unlock_page(struct page *page, dax_entry_t cookie);
#else
@@ -179,6 +180,11 @@ static inline struct page *dax_layout_busy_page(struct address_space *mapping)
return NULL;
}
+static inline struct page *dax_layout_busy_page_range(struct address_space *mapping, pgoff_t start, pgoff_t nr_pages)
+{
+ return NULL;
+}
+
static inline int dax_writeback_mapping_range(struct address_space *mapping,
struct dax_device *dax_dev, struct writeback_control *wbc)
{
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index 2f4a74efa6be..121a2430d7f7 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -228,12 +228,7 @@ int devfreq_resume_device(struct devfreq *devfreq);
void devfreq_suspend(void);
void devfreq_resume(void);
-/**
- * update_devfreq() - Reevaluate the device and configure frequency
- * @devfreq: the devfreq device
- *
- * Note: devfreq->lock must be held
- */
+/* update_devfreq() - Reevaluate the device and configure frequency */
int update_devfreq(struct devfreq *devfreq);
/* Helper functions for devfreq user device driver with OPP. */
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 3c383ddd92dd..a5dbb57a687f 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -38,9 +38,6 @@
#define F2FS_MAX_QUOTAS 3
#define F2FS_ENC_UTF8_12_1 1
-#define F2FS_ENC_STRICT_MODE_FL (1 << 0)
-#define f2fs_has_strict_mode(sbi) \
- (sbi->s_encoding_flags & F2FS_ENC_STRICT_MODE_FL)
#define F2FS_IO_SIZE(sbi) (1 << F2FS_OPTION(sbi).write_io_size_bits) /* Blocks */
#define F2FS_IO_SIZE_KB(sbi) (1 << (F2FS_OPTION(sbi).write_io_size_bits + 2)) /* KB */
diff --git a/include/linux/fault-inject-usercopy.h b/include/linux/fault-inject-usercopy.h
new file mode 100644
index 000000000000..56c3a693fdd9
--- /dev/null
+++ b/include/linux/fault-inject-usercopy.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_FAULT_INJECT_USERCOPY_H__
+#define __LINUX_FAULT_INJECT_USERCOPY_H__
+
+/*
+ * This header provides a wrapper for injecting failures to user space memory
+ * access functions.
+ */
+
+#include <linux/types.h>
+
+#ifdef CONFIG_FAULT_INJECTION_USERCOPY
+
+bool should_fail_usercopy(void);
+
+#else
+
+static inline bool should_fail_usercopy(void) { return false; }
+
+#endif /* CONFIG_FAULT_INJECTION_USERCOPY */
+
+#endif /* __LINUX_FAULT_INJECT_USERCOPY_H__ */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ae97d87a00d2..c4ae9cafbbba 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1366,6 +1366,12 @@ extern int send_sigurg(struct fown_struct *fown);
#define SB_ACTIVE (1<<30)
#define SB_NOUSER (1<<31)
+/* These flags relate to encoding and casefolding */
+#define SB_ENC_STRICT_MODE_FL (1 << 0)
+
+#define sb_has_strict_encoding(sb) \
+ (sb->s_encoding_flags & SB_ENC_STRICT_MODE_FL)
+
/*
* Umount options
*/
@@ -1436,6 +1442,10 @@ struct super_block {
#ifdef CONFIG_FS_VERITY
const struct fsverity_operations *s_vop;
#endif
+#ifdef CONFIG_UNICODE
+ struct unicode_map *s_encoding;
+ __u16 s_encoding_flags;
+#endif
struct hlist_bl_head s_roots; /* alternate root dentries for NFS */
struct list_head s_mounts; /* list of mounts; _not_ for fs use */
struct block_device *s_bdev;
@@ -2209,6 +2219,7 @@ struct file_system_type {
#define FS_HAS_SUBTYPE 4
#define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */
#define FS_DISALLOW_NOTIFY_PERM 16 /* Disable fanotify permission events */
+#define FS_THP_SUPPORT 8192 /* Remove once all fs converted */
#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */
int (*init_fs_context)(struct fs_context *);
const struct fs_parameter_spec *parameters;
@@ -2696,33 +2707,6 @@ static inline errseq_t file_sample_sb_err(struct file *file)
return errseq_sample(&file->f_path.dentry->d_sb->s_wb_err);
}
-static inline int filemap_nr_thps(struct address_space *mapping)
-{
-#ifdef CONFIG_READ_ONLY_THP_FOR_FS
- return atomic_read(&mapping->nr_thps);
-#else
- return 0;
-#endif
-}
-
-static inline void filemap_nr_thps_inc(struct address_space *mapping)
-{
-#ifdef CONFIG_READ_ONLY_THP_FOR_FS
- atomic_inc(&mapping->nr_thps);
-#else
- WARN_ON_ONCE(1);
-#endif
-}
-
-static inline void filemap_nr_thps_dec(struct address_space *mapping)
-{
-#ifdef CONFIG_READ_ONLY_THP_FOR_FS
- atomic_dec(&mapping->nr_thps);
-#else
- WARN_ON_ONCE(1);
-#endif
-}
-
extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end,
int datasync);
extern int vfs_fsync(struct file *file, int datasync);
@@ -3215,6 +3199,12 @@ extern int generic_file_fsync(struct file *, loff_t, loff_t, int);
extern int generic_check_addressable(unsigned, u64);
+#ifdef CONFIG_UNICODE
+extern int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str);
+extern int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
+ const char *str, const struct qstr *name);
+#endif
+
#ifdef CONFIG_MIGRATION
extern int buffer_migrate_page(struct address_space *,
struct page *, struct page *,
diff --git a/include/linux/idle_inject.h b/include/linux/idle_inject.h
index 91a8612b8bf9..fb88e23a99d3 100644
--- a/include/linux/idle_inject.h
+++ b/include/linux/idle_inject.h
@@ -28,6 +28,6 @@ void idle_inject_get_duration(struct idle_inject_device *ii_dev,
unsigned int *idle_duration_us);
void idle_inject_set_latency(struct idle_inject_device *ii_dev,
- unsigned int latency_ns);
+ unsigned int latency_us);
#endif /* __IDLE_INJECT_H__ */
diff --git a/include/linux/idr.h b/include/linux/idr.h
index 3ade03e5c7af..a0dce14090a9 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -263,7 +263,8 @@ void ida_destroy(struct ida *ida);
*
* Allocate an ID between 0 and %INT_MAX, inclusive.
*
- * Context: Any context.
+ * Context: Any context. It is safe to call this function without
+ * locking in your code.
* Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
* or %-ENOSPC if there are no free IDs.
*/
@@ -280,7 +281,8 @@ static inline int ida_alloc(struct ida *ida, gfp_t gfp)
*
* Allocate an ID between @min and %INT_MAX, inclusive.
*
- * Context: Any context.
+ * Context: Any context. It is safe to call this function without
+ * locking in your code.
* Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
* or %-ENOSPC if there are no free IDs.
*/
@@ -297,7 +299,8 @@ static inline int ida_alloc_min(struct ida *ida, unsigned int min, gfp_t gfp)
*
* Allocate an ID between 0 and @max, inclusive.
*
- * Context: Any context.
+ * Context: Any context. It is safe to call this function without
+ * locking in your code.
* Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
* or %-ENOSPC if there are no free IDs.
*/
@@ -311,6 +314,10 @@ static inline void ida_init(struct ida *ida)
xa_init_flags(&ida->xa, IDA_INIT_FLAGS);
}
+/*
+ * ida_simple_get() and ida_simple_remove() are deprecated. Use
+ * ida_alloc() and ida_free() instead respectively.
+ */
#define ida_simple_get(ida, start, end, gfp) \
ida_alloc_range(ida, start, (end) - 1, gfp)
#define ida_simple_remove(ida, id) ida_free(ida, id)
diff --git a/include/linux/input/sparse-keymap.h b/include/linux/input/sparse-keymap.h
index d25d1452dc6e..d0dddc14ebc8 100644
--- a/include/linux/input/sparse-keymap.h
+++ b/include/linux/input/sparse-keymap.h
@@ -20,6 +20,7 @@
* private definitions.
* @code: Device-specific data identifying the button/switch
* @keycode: KEY_* code assigned to a key/button
+ * @sw: struct with code/value used by KE_SW and KE_VSW
* @sw.code: SW_* code assigned to a switch
* @sw.value: Value that should be sent in an input even when KE_SW
* switch is toggled. KE_VSW switches ignore this field and
diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
index 96315cfaf6d1..868364cea3b7 100644
--- a/include/linux/io_uring.h
+++ b/include/linux/io_uring.h
@@ -4,18 +4,33 @@
#include <linux/sched.h>
#include <linux/xarray.h>
-#include <linux/percpu-refcount.h>
+
+struct io_identity {
+ struct files_struct *files;
+ struct mm_struct *mm;
+#ifdef CONFIG_BLK_CGROUP
+ struct cgroup_subsys_state *blkcg_css;
+#endif
+ const struct cred *creds;
+ struct nsproxy *nsproxy;
+ struct fs_struct *fs;
+ unsigned long fsize;
+#ifdef CONFIG_AUDIT
+ kuid_t loginuid;
+ unsigned int sessionid;
+#endif
+ refcount_t count;
+};
struct io_uring_task {
/* submission side */
struct xarray xa;
struct wait_queue_head wait;
struct file *last;
- atomic_long_t req_issue;
-
- /* completion side */
- bool in_idle ____cacheline_aligned_in_smp;
- atomic_long_t req_complete;
+ struct percpu_counter inflight;
+ struct io_identity __identity;
+ struct io_identity *identity;
+ bool in_idle;
};
#if defined(CONFIG_IO_URING)
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 6c2b06fe8beb..5135d4b86cd6 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -58,6 +58,10 @@ struct resource {
#define IORESOURCE_EXT_TYPE_BITS 0x01000000 /* Resource extended types */
#define IORESOURCE_SYSRAM 0x01000000 /* System RAM (modifier) */
+/* IORESOURCE_SYSRAM specific bits. */
+#define IORESOURCE_SYSRAM_DRIVER_MANAGED 0x02000000 /* Always detected via a driver. */
+#define IORESOURCE_SYSRAM_MERGEABLE 0x04000000 /* Resource can be merged. */
+
#define IORESOURCE_EXCLUSIVE 0x08000000 /* Userland may not map this resource */
#define IORESOURCE_DISABLED 0x10000000
@@ -103,7 +107,6 @@ struct resource {
#define IORESOURCE_MEM_32BIT (3<<3)
#define IORESOURCE_MEM_SHADOWABLE (1<<5) /* dup: IORESOURCE_SHADOWABLE */
#define IORESOURCE_MEM_EXPANSIONROM (1<<6)
-#define IORESOURCE_MEM_DRIVER_MANAGED (1<<7)
/* PnP I/O specific bits (IORESOURCE_BITS) */
#define IORESOURCE_IO_16BIT_ADDR (1<<0)
@@ -248,8 +251,10 @@ extern struct resource * __request_region(struct resource *,
extern void __release_region(struct resource *, resource_size_t,
resource_size_t);
#ifdef CONFIG_MEMORY_HOTREMOVE
-extern int release_mem_region_adjustable(struct resource *, resource_size_t,
- resource_size_t);
+extern void release_mem_region_adjustable(resource_size_t, resource_size_t);
+#endif
+#ifdef CONFIG_MEMORY_HOTPLUG
+extern void merge_system_ram_resource(struct resource *res);
#endif
/* Wrappers for managed devices */
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index fed6ba96c527..5e13f801c902 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -3,8 +3,9 @@
#define _LINUX_JIFFIES_H
#include <linux/cache.h>
+#include <linux/limits.h>
#include <linux/math64.h>
-#include <linux/kernel.h>
+#include <linux/minmax.h>
#include <linux/types.h>
#include <linux/time.h>
#include <linux/timex.h>
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index e4aa29b1ad62..c629215fdad9 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -11,6 +11,7 @@
#include <linux/compiler.h>
#include <linux/bitops.h>
#include <linux/log2.h>
+#include <linux/minmax.h>
#include <linux/typecheck.h>
#include <linux/printk.h>
#include <linux/build_bug.h>
@@ -833,155 +834,6 @@ ftrace_vprintk(const char *fmt, va_list ap)
static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
#endif /* CONFIG_TRACING */
-/*
- * min()/max()/clamp() macros must accomplish three things:
- *
- * - avoid multiple evaluations of the arguments (so side-effects like
- * "x++" happen only once) when non-constant.
- * - perform strict type-checking (to generate warnings instead of
- * nasty runtime surprises). See the "unnecessary" pointer comparison
- * in __typecheck().
- * - retain result as a constant expressions when called with only
- * constant expressions (to avoid tripping VLA warnings in stack
- * allocation usage).
- */
-#define __typecheck(x, y) \
- (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1)))
-
-/*
- * This returns a constant expression while determining if an argument is
- * a constant expression, most importantly without evaluating the argument.
- * Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de>
- */
-#define __is_constexpr(x) \
- (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8)))
-
-#define __no_side_effects(x, y) \
- (__is_constexpr(x) && __is_constexpr(y))
-
-#define __safe_cmp(x, y) \
- (__typecheck(x, y) && __no_side_effects(x, y))
-
-#define __cmp(x, y, op) ((x) op (y) ? (x) : (y))
-
-#define __cmp_once(x, y, unique_x, unique_y, op) ({ \
- typeof(x) unique_x = (x); \
- typeof(y) unique_y = (y); \
- __cmp(unique_x, unique_y, op); })
-
-#define __careful_cmp(x, y, op) \
- __builtin_choose_expr(__safe_cmp(x, y), \
- __cmp(x, y, op), \
- __cmp_once(x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y), op))
-
-/**
- * min - return minimum of two values of the same or compatible types
- * @x: first value
- * @y: second value
- */
-#define min(x, y) __careful_cmp(x, y, <)
-
-/**
- * max - return maximum of two values of the same or compatible types
- * @x: first value
- * @y: second value
- */
-#define max(x, y) __careful_cmp(x, y, >)
-
-/**
- * min3 - return minimum of three values
- * @x: first value
- * @y: second value
- * @z: third value
- */
-#define min3(x, y, z) min((typeof(x))min(x, y), z)
-
-/**
- * max3 - return maximum of three values
- * @x: first value
- * @y: second value
- * @z: third value
- */
-#define max3(x, y, z) max((typeof(x))max(x, y), z)
-
-/**
- * min_not_zero - return the minimum that is _not_ zero, unless both are zero
- * @x: value1
- * @y: value2
- */
-#define min_not_zero(x, y) ({ \
- typeof(x) __x = (x); \
- typeof(y) __y = (y); \
- __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
-
-/**
- * clamp - return a value clamped to a given range with strict typechecking
- * @val: current value
- * @lo: lowest allowable value
- * @hi: highest allowable value
- *
- * This macro does strict typechecking of @lo/@hi to make sure they are of the
- * same type as @val. See the unnecessary pointer comparisons.
- */
-#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
-
-/*
- * ..and if you can't take the strict
- * types, you can specify one yourself.
- *
- * Or not use min/max/clamp at all, of course.
- */
-
-/**
- * min_t - return minimum of two values, using the specified type
- * @type: data type to use
- * @x: first value
- * @y: second value
- */
-#define min_t(type, x, y) __careful_cmp((type)(x), (type)(y), <)
-
-/**
- * max_t - return maximum of two values, using the specified type
- * @type: data type to use
- * @x: first value
- * @y: second value
- */
-#define max_t(type, x, y) __careful_cmp((type)(x), (type)(y), >)
-
-/**
- * clamp_t - return a value clamped to a given range using a given type
- * @type: the type of variable to use
- * @val: current value
- * @lo: minimum allowable value
- * @hi: maximum allowable value
- *
- * This macro does no typechecking and uses temporary variables of type
- * @type to make all the comparisons.
- */
-#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi)
-
-/**
- * clamp_val - return a value clamped to a given range using val's type
- * @val: current value
- * @lo: minimum allowable value
- * @hi: maximum allowable value
- *
- * This macro does no typechecking and uses temporary variables of whatever
- * type the input argument @val is. This is useful when @val is an unsigned
- * type and @lo and @hi are literals that will otherwise be assigned a signed
- * integer type.
- */
-#define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi)
-
-
-/**
- * swap - swap values of @a and @b
- * @a: first value
- * @b: second value
- */
-#define swap(a, b) \
- do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
-
/* This counts to 12. Any more, it will return 13th argument. */
#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n
#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index 477b8b7c908f..0d6cf64c8bb1 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -16,6 +16,7 @@
#include <linux/linkage.h>
#include <linux/init.h>
#include <linux/atomic.h>
+#include <linux/kprobes.h>
#ifdef CONFIG_HAVE_ARCH_KGDB
#include <asm/kgdb.h>
#endif
@@ -335,6 +336,23 @@ extern int kgdb_nmicallin(int cpu, int trapnr, void *regs, int err_code,
atomic_t *snd_rdy);
extern void gdbstub_exit(int status);
+/*
+ * kgdb and kprobes both use the same (kprobe) blocklist (which makes sense
+ * given they are both typically hooked up to the same trap meaning on most
+ * architectures one cannot be used to debug the other)
+ *
+ * However on architectures where kprobes is not (yet) implemented we permit
+ * breakpoints everywhere rather than blocking everything by default.
+ */
+static inline bool kgdb_within_blocklist(unsigned long addr)
+{
+#ifdef CONFIG_KGDB_HONOUR_BLOCKLIST
+ return within_kprobe_blacklist(addr);
+#else
+ return false;
+#endif
+}
+
extern int kgdb_single_step;
extern atomic_t kgdb_active;
#define in_dbg_master() \
diff --git a/include/linux/list.h b/include/linux/list.h
index 0d0d17a10d25..a18c87b63376 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -610,6 +610,15 @@ static inline void list_splice_tail_init(struct list_head *list,
pos = n, n = pos->prev)
/**
+ * list_entry_is_head - test if the entry points to the head of the list
+ * @pos: the type * to cursor
+ * @head: the head for your list.
+ * @member: the name of the list_head within the struct.
+ */
+#define list_entry_is_head(pos, head, member) \
+ (&pos->member == (head))
+
+/**
* list_for_each_entry - iterate over list of given type
* @pos: the type * to use as a loop cursor.
* @head: the head for your list.
@@ -617,7 +626,7 @@ static inline void list_splice_tail_init(struct list_head *list,
*/
#define list_for_each_entry(pos, head, member) \
for (pos = list_first_entry(head, typeof(*pos), member); \
- &pos->member != (head); \
+ !list_entry_is_head(pos, head, member); \
pos = list_next_entry(pos, member))
/**
@@ -628,7 +637,7 @@ static inline void list_splice_tail_init(struct list_head *list,
*/
#define list_for_each_entry_reverse(pos, head, member) \
for (pos = list_last_entry(head, typeof(*pos), member); \
- &pos->member != (head); \
+ !list_entry_is_head(pos, head, member); \
pos = list_prev_entry(pos, member))
/**
@@ -653,7 +662,7 @@ static inline void list_splice_tail_init(struct list_head *list,
*/
#define list_for_each_entry_continue(pos, head, member) \
for (pos = list_next_entry(pos, member); \
- &pos->member != (head); \
+ !list_entry_is_head(pos, head, member); \
pos = list_next_entry(pos, member))
/**
@@ -667,7 +676,7 @@ static inline void list_splice_tail_init(struct list_head *list,
*/
#define list_for_each_entry_continue_reverse(pos, head, member) \
for (pos = list_prev_entry(pos, member); \
- &pos->member != (head); \
+ !list_entry_is_head(pos, head, member); \
pos = list_prev_entry(pos, member))
/**
@@ -679,7 +688,7 @@ static inline void list_splice_tail_init(struct list_head *list,
* Iterate over list of given type, continuing from current position.
*/
#define list_for_each_entry_from(pos, head, member) \
- for (; &pos->member != (head); \
+ for (; !list_entry_is_head(pos, head, member); \
pos = list_next_entry(pos, member))
/**
@@ -692,7 +701,7 @@ static inline void list_splice_tail_init(struct list_head *list,
* Iterate backwards over list of given type, continuing from current position.
*/
#define list_for_each_entry_from_reverse(pos, head, member) \
- for (; &pos->member != (head); \
+ for (; !list_entry_is_head(pos, head, member); \
pos = list_prev_entry(pos, member))
/**
@@ -705,7 +714,7 @@ static inline void list_splice_tail_init(struct list_head *list,
#define list_for_each_entry_safe(pos, n, head, member) \
for (pos = list_first_entry(head, typeof(*pos), member), \
n = list_next_entry(pos, member); \
- &pos->member != (head); \
+ !list_entry_is_head(pos, head, member); \
pos = n, n = list_next_entry(n, member))
/**
@@ -721,7 +730,7 @@ static inline void list_splice_tail_init(struct list_head *list,
#define list_for_each_entry_safe_continue(pos, n, head, member) \
for (pos = list_next_entry(pos, member), \
n = list_next_entry(pos, member); \
- &pos->member != (head); \
+ !list_entry_is_head(pos, head, member); \
pos = n, n = list_next_entry(n, member))
/**
@@ -736,7 +745,7 @@ static inline void list_splice_tail_init(struct list_head *list,
*/
#define list_for_each_entry_safe_from(pos, n, head, member) \
for (n = list_next_entry(pos, member); \
- &pos->member != (head); \
+ !list_entry_is_head(pos, head, member); \
pos = n, n = list_next_entry(n, member))
/**
@@ -752,7 +761,7 @@ static inline void list_splice_tail_init(struct list_head *list,
#define list_for_each_entry_safe_reverse(pos, n, head, member) \
for (pos = list_last_entry(head, typeof(*pos), member), \
n = list_prev_entry(pos, member); \
- &pos->member != (head); \
+ !list_entry_is_head(pos, head, member); \
pos = n, n = list_prev_entry(n, member))
/**
diff --git a/include/linux/math64.h b/include/linux/math64.h
index 3381d9e33c4e..66deb1fdc2ef 100644
--- a/include/linux/math64.h
+++ b/include/linux/math64.h
@@ -28,7 +28,7 @@ static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
return dividend / divisor;
}
-/**
+/*
* div_s64_rem - signed 64bit divide with 32bit divisor with remainder
* @dividend: signed 64bit dividend
* @divisor: signed 32bit divisor
@@ -42,7 +42,7 @@ static inline s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder)
return dividend / divisor;
}
-/**
+/*
* div64_u64_rem - unsigned 64bit divide with 64bit divisor and remainder
* @dividend: unsigned 64bit dividend
* @divisor: unsigned 64bit divisor
@@ -56,7 +56,7 @@ static inline u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder)
return dividend / divisor;
}
-/**
+/*
* div64_u64 - unsigned 64bit divide with 64bit divisor
* @dividend: unsigned 64bit dividend
* @divisor: unsigned 64bit divisor
@@ -68,7 +68,7 @@ static inline u64 div64_u64(u64 dividend, u64 divisor)
return dividend / divisor;
}
-/**
+/*
* div64_s64 - signed 64bit divide with 64bit divisor
* @dividend: signed 64bit dividend
* @divisor: signed 64bit divisor
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 6ef4a552e09d..e391e3c56de5 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1531,18 +1531,6 @@ static inline bool memcg_kmem_enabled(void)
return static_branch_likely(&memcg_kmem_enabled_key);
}
-static inline bool memcg_kmem_bypass(void)
-{
- if (in_interrupt())
- return true;
-
- /* Allow remote memcg charging in kthread contexts. */
- if ((!current->mm || (current->flags & PF_KTHREAD)) &&
- !current->active_memcg)
- return true;
- return false;
-}
-
static inline int memcg_kmem_charge_page(struct page *page, gfp_t gfp,
int order)
{
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index c0faa7a30c46..d65c6fdc5cfc 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -57,6 +57,19 @@ enum {
MMOP_ONLINE_MOVABLE,
};
+/* Flags for add_memory() and friends to specify memory hotplug details. */
+typedef int __bitwise mhp_t;
+
+/* No special request */
+#define MHP_NONE ((__force mhp_t)0)
+/*
+ * Allow merging of the added System RAM resource with adjacent,
+ * mergeable resources. After a successful call to add_memory_resource()
+ * with this flag set, the resource pointer must no longer be used as it
+ * might be stale, or the resource might have changed.
+ */
+#define MEMHP_MERGE_RESOURCE ((__force mhp_t)BIT(0))
+
/*
* Extended parameters for memory hotplug:
* altmap: alternative allocator for memmap array (optional)
@@ -103,8 +116,8 @@ extern int online_pages(unsigned long pfn, unsigned long nr_pages,
int online_type, int nid);
extern struct zone *test_pages_in_a_zone(unsigned long start_pfn,
unsigned long end_pfn);
-extern unsigned long __offline_isolated_pages(unsigned long start_pfn,
- unsigned long end_pfn);
+extern void __offline_isolated_pages(unsigned long start_pfn,
+ unsigned long end_pfn);
typedef void (*online_page_callback_t)(struct page *page, unsigned int order);
@@ -247,13 +260,6 @@ static inline void zone_span_writelock(struct zone *zone) {}
static inline void zone_span_writeunlock(struct zone *zone) {}
static inline void zone_seqlock_init(struct zone *zone) {}
-static inline int mhp_notimplemented(const char *func)
-{
- printk(KERN_WARNING "%s() called, with CONFIG_MEMORY_HOTPLUG disabled\n", func);
- dump_stack();
- return -ENOSYS;
-}
-
static inline void register_page_bootmem_info_node(struct pglist_data *pgdat)
{
}
@@ -344,14 +350,18 @@ static inline void __remove_memory(int nid, u64 start, u64 size) {}
extern void set_zone_contiguous(struct zone *zone);
extern void clear_zone_contiguous(struct zone *zone);
+#ifdef CONFIG_MEMORY_HOTPLUG
extern void __ref free_area_init_core_hotplug(int nid);
-extern int __add_memory(int nid, u64 start, u64 size);
-extern int add_memory(int nid, u64 start, u64 size);
-extern int add_memory_resource(int nid, struct resource *resource);
+extern int __add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags);
+extern int add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags);
+extern int add_memory_resource(int nid, struct resource *resource,
+ mhp_t mhp_flags);
extern int add_memory_driver_managed(int nid, u64 start, u64 size,
- const char *resource_name);
+ const char *resource_name,
+ mhp_t mhp_flags);
extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
- unsigned long nr_pages, struct vmem_altmap *altmap);
+ unsigned long nr_pages,
+ struct vmem_altmap *altmap, int migratetype);
extern void remove_pfn_range_from_zone(struct zone *zone,
unsigned long start_pfn,
unsigned long nr_pages);
@@ -363,8 +373,8 @@ extern void sparse_remove_section(struct mem_section *ms,
unsigned long map_offset, struct vmem_altmap *altmap);
extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
unsigned long pnum);
-extern bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages,
- int online_type);
extern struct zone *zone_for_pfn_range(int online_type, int nid, unsigned start_pfn,
unsigned long nr_pages);
+#endif /* CONFIG_MEMORY_HOTPLUG */
+
#endif /* __LINUX_MEMORY_HOTPLUG_H */
diff --git a/include/linux/minmax.h b/include/linux/minmax.h
new file mode 100644
index 000000000000..c0f57b0c64d9
--- /dev/null
+++ b/include/linux/minmax.h
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_MINMAX_H
+#define _LINUX_MINMAX_H
+
+/*
+ * min()/max()/clamp() macros must accomplish three things:
+ *
+ * - avoid multiple evaluations of the arguments (so side-effects like
+ * "x++" happen only once) when non-constant.
+ * - perform strict type-checking (to generate warnings instead of
+ * nasty runtime surprises). See the "unnecessary" pointer comparison
+ * in __typecheck().
+ * - retain result as a constant expressions when called with only
+ * constant expressions (to avoid tripping VLA warnings in stack
+ * allocation usage).
+ */
+#define __typecheck(x, y) \
+ (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1)))
+
+/*
+ * This returns a constant expression while determining if an argument is
+ * a constant expression, most importantly without evaluating the argument.
+ * Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de>
+ */
+#define __is_constexpr(x) \
+ (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8)))
+
+#define __no_side_effects(x, y) \
+ (__is_constexpr(x) && __is_constexpr(y))
+
+#define __safe_cmp(x, y) \
+ (__typecheck(x, y) && __no_side_effects(x, y))
+
+#define __cmp(x, y, op) ((x) op (y) ? (x) : (y))
+
+#define __cmp_once(x, y, unique_x, unique_y, op) ({ \
+ typeof(x) unique_x = (x); \
+ typeof(y) unique_y = (y); \
+ __cmp(unique_x, unique_y, op); })
+
+#define __careful_cmp(x, y, op) \
+ __builtin_choose_expr(__safe_cmp(x, y), \
+ __cmp(x, y, op), \
+ __cmp_once(x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y), op))
+
+/**
+ * min - return minimum of two values of the same or compatible types
+ * @x: first value
+ * @y: second value
+ */
+#define min(x, y) __careful_cmp(x, y, <)
+
+/**
+ * max - return maximum of two values of the same or compatible types
+ * @x: first value
+ * @y: second value
+ */
+#define max(x, y) __careful_cmp(x, y, >)
+
+/**
+ * min3 - return minimum of three values
+ * @x: first value
+ * @y: second value
+ * @z: third value
+ */
+#define min3(x, y, z) min((typeof(x))min(x, y), z)
+
+/**
+ * max3 - return maximum of three values
+ * @x: first value
+ * @y: second value
+ * @z: third value
+ */
+#define max3(x, y, z) max((typeof(x))max(x, y), z)
+
+/**
+ * min_not_zero - return the minimum that is _not_ zero, unless both are zero
+ * @x: value1
+ * @y: value2
+ */
+#define min_not_zero(x, y) ({ \
+ typeof(x) __x = (x); \
+ typeof(y) __y = (y); \
+ __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
+
+/**
+ * clamp - return a value clamped to a given range with strict typechecking
+ * @val: current value
+ * @lo: lowest allowable value
+ * @hi: highest allowable value
+ *
+ * This macro does strict typechecking of @lo/@hi to make sure they are of the
+ * same type as @val. See the unnecessary pointer comparisons.
+ */
+#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
+
+/*
+ * ..and if you can't take the strict
+ * types, you can specify one yourself.
+ *
+ * Or not use min/max/clamp at all, of course.
+ */
+
+/**
+ * min_t - return minimum of two values, using the specified type
+ * @type: data type to use
+ * @x: first value
+ * @y: second value
+ */
+#define min_t(type, x, y) __careful_cmp((type)(x), (type)(y), <)
+
+/**
+ * max_t - return maximum of two values, using the specified type
+ * @type: data type to use
+ * @x: first value
+ * @y: second value
+ */
+#define max_t(type, x, y) __careful_cmp((type)(x), (type)(y), >)
+
+/**
+ * clamp_t - return a value clamped to a given range using a given type
+ * @type: the type of variable to use
+ * @val: current value
+ * @lo: minimum allowable value
+ * @hi: maximum allowable value
+ *
+ * This macro does no typechecking and uses temporary variables of type
+ * @type to make all the comparisons.
+ */
+#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi)
+
+/**
+ * clamp_val - return a value clamped to a given range using val's type
+ * @val: current value
+ * @lo: minimum allowable value
+ * @hi: maximum allowable value
+ *
+ * This macro does no typechecking and uses temporary variables of whatever
+ * type the input argument @val is. This is useful when @val is an unsigned
+ * type and @lo and @hi are literals that will otherwise be assigned a signed
+ * integer type.
+ */
+#define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi)
+
+/**
+ * swap - swap values of @a and @b
+ * @a: first value
+ * @b: second value
+ */
+#define swap(a, b) \
+ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
+
+#endif /* _LINUX_MINMAX_H */
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index de1ffb4804d6..651591a2965d 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -420,7 +420,8 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
u8 reserved_at_1a[0x2];
u8 ipsec_encrypt[0x1];
u8 ipsec_decrypt[0x1];
- u8 reserved_at_1e[0x2];
+ u8 sw_owner_v2[0x1];
+ u8 reserved_at_1f[0x1];
u8 termination_table_raw_traffic[0x1];
u8 reserved_at_21[0x1];
@@ -1430,7 +1431,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 log_bf_reg_size[0x5];
- u8 reserved_at_270[0x8];
+ u8 reserved_at_270[0x6];
+ u8 lag_dct[0x2];
u8 lag_tx_port_affinity[0x1];
u8 reserved_at_279[0x2];
u8 lag_master[0x1];
diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h
index 2d45a6af52a4..23edd2db4803 100644
--- a/include/linux/mlx5/port.h
+++ b/include/linux/mlx5/port.h
@@ -125,6 +125,14 @@ enum mlx5e_connector_type {
MLX5E_CONNECTOR_TYPE_NUMBER,
};
+enum mlx5_ptys_width {
+ MLX5_PTYS_WIDTH_1X = 1 << 0,
+ MLX5_PTYS_WIDTH_2X = 1 << 1,
+ MLX5_PTYS_WIDTH_4X = 1 << 2,
+ MLX5_PTYS_WIDTH_8X = 1 << 3,
+ MLX5_PTYS_WIDTH_12X = 1 << 4,
+};
+
#define MLX5E_PROT_MASK(link_mode) (1 << link_mode)
#define MLX5_GET_ETH_PROTO(reg, out, ext, field) \
(ext ? MLX5_GET(reg, out, ext_##field) : \
@@ -133,10 +141,9 @@ enum mlx5e_connector_type {
int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps);
int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys,
int ptys_size, int proto_mask, u8 local_port);
-int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev,
- u8 *link_width_oper, u8 local_port);
-int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev,
- u8 *proto_oper, u8 local_port);
+
+int mlx5_query_ib_port_oper(struct mlx5_core_dev *dev, u16 *link_width_oper,
+ u16 *proto_oper, u8 local_port);
void mlx5_toggle_port_link(struct mlx5_core_dev *dev);
int mlx5_set_port_admin_status(struct mlx5_core_dev *dev,
enum mlx5_port_status status);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 620961e4f32b..ef360fe70aaf 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2440,7 +2440,7 @@ extern int __meminit __early_pfn_to_nid(unsigned long pfn,
extern void set_dma_reserve(unsigned long new_dma_reserve);
extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long,
- enum meminit_context, struct vmem_altmap *);
+ enum meminit_context, struct vmem_altmap *, int migratetype);
extern void setup_per_zone_wmarks(void);
extern int __meminit init_per_zone_wmark_min(void);
extern void mem_init(void);
@@ -2579,7 +2579,7 @@ extern int __do_munmap(struct mm_struct *, unsigned long, size_t,
struct list_head *uf, bool downgrade);
extern int do_munmap(struct mm_struct *, unsigned long, size_t,
struct list_head *uf);
-extern int do_madvise(unsigned long start, size_t len_in, int behavior);
+extern int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior);
#ifdef CONFIG_MMU
extern int __mm_populate(unsigned long addr, unsigned long len,
@@ -3025,8 +3025,6 @@ extern int memory_failure(unsigned long pfn, int flags);
extern void memory_failure_queue(unsigned long pfn, int flags);
extern void memory_failure_queue_kick(int cpu);
extern int unpoison_memory(unsigned long pfn);
-extern int get_hwpoison_page(struct page *page);
-#define put_hwpoison_page(page) put_page(page)
extern int sysctl_memory_failure_early_kill;
extern int sysctl_memory_failure_recovery;
extern void shake_page(struct page *p, int access);
@@ -3066,6 +3064,7 @@ enum mf_action_page_type {
MF_MSG_BUDDY,
MF_MSG_BUDDY_2ND,
MF_MSG_DAX,
+ MF_MSG_UNSPLIT_THP,
MF_MSG_UNKNOWN,
};
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index c27fb1faffe5..fb3bf696c05e 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -266,6 +266,8 @@ static inline bool is_active_lru(enum lru_list lru)
return (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE);
}
+#define ANON_AND_FILE 2
+
enum lruvec_flags {
LRUVEC_CONGESTED, /* lruvec has many dirty pages
* backed by a congested BDI
@@ -283,8 +285,8 @@ struct lruvec {
unsigned long file_cost;
/* Non-resident age, driven by LRU movement */
atomic_long_t nonresident_age;
- /* Refaults at the time of last reclaim cycle, anon=0, file=1 */
- unsigned long refaults[2];
+ /* Refaults at the time of last reclaim cycle */
+ unsigned long refaults[ANON_AND_FILE];
/* Various lruvec state flags (enum lruvec_flags) */
unsigned long flags;
#ifdef CONFIG_MEMCG
@@ -441,6 +443,8 @@ enum zone_type {
#ifndef __GENERATING_BOUNDS_H
+#define ASYNC_AND_SYNC 2
+
struct zone {
/* Read-mostly fields */
@@ -560,8 +564,8 @@ struct zone {
#if defined CONFIG_COMPACTION || defined CONFIG_CMA
/* pfn where compaction free scanner should start */
unsigned long compact_cached_free_pfn;
- /* pfn where async and sync compaction migration scanner should start */
- unsigned long compact_cached_migrate_pfn[2];
+ /* pfn where compaction migration scanner should start */
+ unsigned long compact_cached_migrate_pfn[ASYNC_AND_SYNC];
unsigned long compact_init_migrate_pfn;
unsigned long compact_init_free_pfn;
#endif
@@ -1416,7 +1420,6 @@ static inline unsigned long next_present_section_nr(unsigned long section_nr)
#define pfn_to_nid(pfn) (0)
#endif
-#define early_pfn_valid(pfn) pfn_valid(pfn)
void sparse_init(void);
#else
#define sparse_init() do {} while (0)
@@ -1436,10 +1439,6 @@ struct mminit_pfnnid_cache {
int last_nid;
};
-#ifndef early_pfn_valid
-#define early_pfn_valid(pfn) (1)
-#endif
-
/*
* If it is possible to have holes within a MAX_ORDER_NR_PAGES, then we
* need to check pfn validity within that MAX_ORDER_NR_PAGES block.
diff --git a/include/linux/mtd/hyperbus.h b/include/linux/mtd/hyperbus.h
index 2129f7d3b6eb..0ce612428aea 100644
--- a/include/linux/mtd/hyperbus.h
+++ b/include/linux/mtd/hyperbus.h
@@ -8,6 +8,17 @@
#include <linux/mtd/map.h>
+/* HyperBus command bits */
+#define HYPERBUS_RW 0x80 /* R/W# */
+#define HYPERBUS_RW_WRITE 0
+#define HYPERBUS_RW_READ 0x80
+#define HYPERBUS_AS 0x40 /* Address Space */
+#define HYPERBUS_AS_MEM 0
+#define HYPERBUS_AS_REG 0x40
+#define HYPERBUS_BT 0x20 /* Burst Type */
+#define HYPERBUS_BT_WRAPPED 0
+#define HYPERBUS_BT_LINEAR 0x20
+
enum hyperbus_memtype {
HYPERFLASH,
HYPERRAM,
@@ -20,6 +31,7 @@ enum hyperbus_memtype {
* @mtd: pointer to MTD struct
* @ctlr: pointer to HyperBus controller struct
* @memtype: type of memory device: HyperFlash or HyperRAM
+ * @priv: pointer to controller specific per device private data
*/
struct hyperbus_device {
@@ -28,6 +40,7 @@ struct hyperbus_device {
struct mtd_info *mtd;
struct hyperbus_ctlr *ctlr;
enum hyperbus_memtype memtype;
+ void *priv;
};
/**
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index af99041ceaa9..697ea2474a7c 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -83,7 +83,18 @@ struct nand_pos {
};
/**
+ * enum nand_page_io_req_type - Direction of an I/O request
+ * @NAND_PAGE_READ: from the chip, to the controller
+ * @NAND_PAGE_WRITE: from the controller, to the chip
+ */
+enum nand_page_io_req_type {
+ NAND_PAGE_READ = 0,
+ NAND_PAGE_WRITE,
+};
+
+/**
* struct nand_page_io_req - NAND I/O request object
+ * @type: the type of page I/O: read or write
* @pos: the position this I/O request is targeting
* @dataoffs: the offset within the page
* @datalen: number of data bytes to read from/write to this page
@@ -99,6 +110,7 @@ struct nand_pos {
* specific commands/operations.
*/
struct nand_page_io_req {
+ enum nand_page_io_req_type type;
struct nand_pos pos;
unsigned int dataoffs;
unsigned int datalen;
@@ -115,18 +127,77 @@ struct nand_page_io_req {
int mode;
};
+const struct mtd_ooblayout_ops *nand_get_small_page_ooblayout(void);
+const struct mtd_ooblayout_ops *nand_get_large_page_ooblayout(void);
+const struct mtd_ooblayout_ops *nand_get_large_page_hamming_ooblayout(void);
+
+/**
+ * enum nand_ecc_engine_type - NAND ECC engine type
+ * @NAND_ECC_ENGINE_TYPE_INVALID: Invalid value
+ * @NAND_ECC_ENGINE_TYPE_NONE: No ECC correction
+ * @NAND_ECC_ENGINE_TYPE_SOFT: Software ECC correction
+ * @NAND_ECC_ENGINE_TYPE_ON_HOST: On host hardware ECC correction
+ * @NAND_ECC_ENGINE_TYPE_ON_DIE: On chip hardware ECC correction
+ */
+enum nand_ecc_engine_type {
+ NAND_ECC_ENGINE_TYPE_INVALID,
+ NAND_ECC_ENGINE_TYPE_NONE,
+ NAND_ECC_ENGINE_TYPE_SOFT,
+ NAND_ECC_ENGINE_TYPE_ON_HOST,
+ NAND_ECC_ENGINE_TYPE_ON_DIE,
+};
+
+/**
+ * enum nand_ecc_placement - NAND ECC bytes placement
+ * @NAND_ECC_PLACEMENT_UNKNOWN: The actual position of the ECC bytes is unknown
+ * @NAND_ECC_PLACEMENT_OOB: The ECC bytes are located in the OOB area
+ * @NAND_ECC_PLACEMENT_INTERLEAVED: Syndrome layout, there are ECC bytes
+ * interleaved with regular data in the main
+ * area
+ */
+enum nand_ecc_placement {
+ NAND_ECC_PLACEMENT_UNKNOWN,
+ NAND_ECC_PLACEMENT_OOB,
+ NAND_ECC_PLACEMENT_INTERLEAVED,
+};
+
+/**
+ * enum nand_ecc_algo - NAND ECC algorithm
+ * @NAND_ECC_ALGO_UNKNOWN: Unknown algorithm
+ * @NAND_ECC_ALGO_HAMMING: Hamming algorithm
+ * @NAND_ECC_ALGO_BCH: Bose-Chaudhuri-Hocquenghem algorithm
+ * @NAND_ECC_ALGO_RS: Reed-Solomon algorithm
+ */
+enum nand_ecc_algo {
+ NAND_ECC_ALGO_UNKNOWN,
+ NAND_ECC_ALGO_HAMMING,
+ NAND_ECC_ALGO_BCH,
+ NAND_ECC_ALGO_RS,
+};
+
/**
* struct nand_ecc_props - NAND ECC properties
+ * @engine_type: ECC engine type
+ * @placement: OOB placement (if relevant)
+ * @algo: ECC algorithm (if relevant)
* @strength: ECC strength
* @step_size: Number of bytes per step
+ * @flags: Misc properties
*/
struct nand_ecc_props {
+ enum nand_ecc_engine_type engine_type;
+ enum nand_ecc_placement placement;
+ enum nand_ecc_algo algo;
unsigned int strength;
unsigned int step_size;
+ unsigned int flags;
};
#define NAND_ECCREQ(str, stp) { .strength = (str), .step_size = (stp) }
+/* NAND ECC misc flags */
+#define NAND_ECC_MAXIMIZE_STRENGTH BIT(0)
+
/**
* struct nand_bbt - bad block table object
* @cache: in memory BBT cache
@@ -158,10 +229,79 @@ struct nand_ops {
};
/**
+ * struct nand_ecc_context - Context for the ECC engine
+ * @conf: basic ECC engine parameters
+ * @total: total number of bytes used for storing ECC codes, this is used by
+ * generic OOB layouts
+ * @priv: ECC engine driver private data
+ */
+struct nand_ecc_context {
+ struct nand_ecc_props conf;
+ unsigned int total;
+ void *priv;
+};
+
+/**
+ * struct nand_ecc_engine_ops - ECC engine operations
+ * @init_ctx: given a desired user configuration for the pointed NAND device,
+ * requests the ECC engine driver to setup a configuration with
+ * values it supports.
+ * @cleanup_ctx: clean the context initialized by @init_ctx.
+ * @prepare_io_req: is called before reading/writing a page to prepare the I/O
+ * request to be performed with ECC correction.
+ * @finish_io_req: is called after reading/writing a page to terminate the I/O
+ * request and ensure proper ECC correction.
+ */
+struct nand_ecc_engine_ops {
+ int (*init_ctx)(struct nand_device *nand);
+ void (*cleanup_ctx)(struct nand_device *nand);
+ int (*prepare_io_req)(struct nand_device *nand,
+ struct nand_page_io_req *req);
+ int (*finish_io_req)(struct nand_device *nand,
+ struct nand_page_io_req *req);
+};
+
+/**
+ * struct nand_ecc_engine - ECC engine abstraction for NAND devices
+ * @ops: ECC engine operations
+ */
+struct nand_ecc_engine {
+ struct nand_ecc_engine_ops *ops;
+};
+
+void of_get_nand_ecc_user_config(struct nand_device *nand);
+int nand_ecc_init_ctx(struct nand_device *nand);
+void nand_ecc_cleanup_ctx(struct nand_device *nand);
+int nand_ecc_prepare_io_req(struct nand_device *nand,
+ struct nand_page_io_req *req);
+int nand_ecc_finish_io_req(struct nand_device *nand,
+ struct nand_page_io_req *req);
+bool nand_ecc_is_strong_enough(struct nand_device *nand);
+
+/**
+ * struct nand_ecc - Information relative to the ECC
+ * @defaults: Default values, depend on the underlying subsystem
+ * @requirements: ECC requirements from the NAND chip perspective
+ * @user_conf: User desires in terms of ECC parameters
+ * @ctx: ECC context for the ECC engine, derived from the device @requirements
+ * the @user_conf and the @defaults
+ * @ondie_engine: On-die ECC engine reference, if any
+ * @engine: ECC engine actually bound
+ */
+struct nand_ecc {
+ struct nand_ecc_props defaults;
+ struct nand_ecc_props requirements;
+ struct nand_ecc_props user_conf;
+ struct nand_ecc_context ctx;
+ struct nand_ecc_engine *ondie_engine;
+ struct nand_ecc_engine *engine;
+};
+
+/**
* struct nand_device - NAND device
* @mtd: MTD instance attached to the NAND device
* @memorg: memory layout
- * @eccreq: ECC requirements
+ * @ecc: NAND ECC object attached to the NAND device
* @rowconv: position to row address converter
* @bbt: bad block table info
* @ops: NAND operations attached to the NAND device
@@ -169,8 +309,8 @@ struct nand_ops {
* Generic NAND object. Specialized NAND layers (raw NAND, SPI NAND, OneNAND)
* should declare their own NAND object embedding a nand_device struct (that's
* how inheritance is done).
- * struct_nand_device->memorg and struct_nand_device->eccreq should be filled
- * at device detection time to reflect the NAND device
+ * struct_nand_device->memorg and struct_nand_device->ecc.requirements should
+ * be filled at device detection time to reflect the NAND device
* capabilities/requirements. Once this is done nanddev_init() can be called.
* It will take care of converting NAND information into MTD ones, which means
* the specialized NAND layers should never manually tweak
@@ -179,7 +319,7 @@ struct nand_ops {
struct nand_device {
struct mtd_info mtd;
struct nand_memory_organization memorg;
- struct nand_ecc_props eccreq;
+ struct nand_ecc ecc;
struct nand_row_converter rowconv;
struct nand_bbt bbt;
const struct nand_ops *ops;
@@ -383,6 +523,40 @@ nanddev_get_memorg(struct nand_device *nand)
return &nand->memorg;
}
+/**
+ * nanddev_get_ecc_conf() - Extract the ECC configuration from a NAND device
+ * @nand: NAND device
+ */
+static inline const struct nand_ecc_props *
+nanddev_get_ecc_conf(struct nand_device *nand)
+{
+ return &nand->ecc.ctx.conf;
+}
+
+/**
+ * nanddev_get_ecc_requirements() - Extract the ECC requirements from a NAND
+ * device
+ * @nand: NAND device
+ */
+static inline const struct nand_ecc_props *
+nanddev_get_ecc_requirements(struct nand_device *nand)
+{
+ return &nand->ecc.requirements;
+}
+
+/**
+ * nanddev_set_ecc_requirements() - Assign the ECC requirements of a NAND
+ * device
+ * @nand: NAND device
+ * @reqs: Requirements
+ */
+static inline void
+nanddev_set_ecc_requirements(struct nand_device *nand,
+ const struct nand_ecc_props *reqs)
+{
+ nand->ecc.requirements = *reqs;
+}
+
int nanddev_init(struct nand_device *nand, const struct nand_ops *ops,
struct module *owner);
void nanddev_cleanup(struct nand_device *nand);
@@ -624,11 +798,13 @@ static inline void nanddev_pos_next_page(struct nand_device *nand,
* layer.
*/
static inline void nanddev_io_iter_init(struct nand_device *nand,
+ enum nand_page_io_req_type reqtype,
loff_t offs, struct mtd_oob_ops *req,
struct nand_io_iter *iter)
{
struct mtd_info *mtd = nanddev_to_mtd(nand);
+ iter->req.type = reqtype;
iter->req.mode = req->mode;
iter->req.dataoffs = nanddev_offs_to_pos(nand, offs, &iter->req.pos);
iter->req.ooboffs = req->ooboffs;
@@ -698,8 +874,8 @@ static inline bool nanddev_io_iter_end(struct nand_device *nand,
*
* Should be used for iterate over pages that are contained in an MTD request.
*/
-#define nanddev_io_for_each_page(nand, start, req, iter) \
- for (nanddev_io_iter_init(nand, start, req, iter); \
+#define nanddev_io_for_each_page(nand, type, start, req, iter) \
+ for (nanddev_io_iter_init(nand, type, start, req, iter); \
!nanddev_io_iter_end(nand, iter); \
nanddev_io_iter_next_page(nand, iter))
diff --git a/include/linux/mtd/pfow.h b/include/linux/mtd/pfow.h
index 6166e7c60869..146413d4bdb7 100644
--- a/include/linux/mtd/pfow.h
+++ b/include/linux/mtd/pfow.h
@@ -121,37 +121,4 @@ static inline void send_pfow_command(struct map_info *map,
map_write(map, CMD(LPDDR_START_EXECUTION),
map->pfow_base + PFOW_COMMAND_EXECUTE);
}
-
-static inline void print_drs_error(unsigned dsr)
-{
- int prog_status = (dsr & DSR_RPS) >> 8;
-
- if (!(dsr & DSR_AVAILABLE))
- printk(KERN_NOTICE"DSR.15: (0) Device not Available\n");
- if (prog_status & 0x03)
- printk(KERN_NOTICE"DSR.9,8: (11) Attempt to program invalid "
- "half with 41h command\n");
- else if (prog_status & 0x02)
- printk(KERN_NOTICE"DSR.9,8: (10) Object Mode Program attempt "
- "in region with Control Mode data\n");
- else if (prog_status & 0x01)
- printk(KERN_NOTICE"DSR.9,8: (01) Program attempt in region "
- "with Object Mode data\n");
- if (!(dsr & DSR_READY_STATUS))
- printk(KERN_NOTICE"DSR.7: (0) Device is Busy\n");
- if (dsr & DSR_ESS)
- printk(KERN_NOTICE"DSR.6: (1) Erase Suspended\n");
- if (dsr & DSR_ERASE_STATUS)
- printk(KERN_NOTICE"DSR.5: (1) Erase/Blank check error\n");
- if (dsr & DSR_PROGRAM_STATUS)
- printk(KERN_NOTICE"DSR.4: (1) Program Error\n");
- if (dsr & DSR_VPPS)
- printk(KERN_NOTICE"DSR.3: (1) Vpp low detect, operation "
- "aborted\n");
- if (dsr & DSR_PSS)
- printk(KERN_NOTICE"DSR.2: (1) Program suspended\n");
- if (dsr & DSR_DPS)
- printk(KERN_NOTICE"DSR.1: (1) Aborted Erase/Program attempt "
- "on locked block\n");
-}
#endif /* __LINUX_MTD_PFOW_H */
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index a725b620aca2..aac07940de09 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -14,6 +14,7 @@
#define __LINUX_MTD_RAWNAND_H
#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
#include <linux/mtd/flashchip.h>
#include <linux/mtd/bbm.h>
#include <linux/mtd/jedec.h>
@@ -81,25 +82,6 @@ struct nand_chip;
#define NAND_DATA_IFACE_CHECK_ONLY -1
/*
- * Constants for ECC_MODES
- */
-enum nand_ecc_mode {
- NAND_ECC_INVALID,
- NAND_ECC_NONE,
- NAND_ECC_SOFT,
- NAND_ECC_HW,
- NAND_ECC_HW_SYNDROME,
- NAND_ECC_ON_DIE,
-};
-
-enum nand_ecc_algo {
- NAND_ECC_UNKNOWN,
- NAND_ECC_HAMMING,
- NAND_ECC_BCH,
- NAND_ECC_RS,
-};
-
-/*
* Constants for Hardware ECC
*/
/* Reset Hardware ECC for read */
@@ -116,7 +98,6 @@ enum nand_ecc_algo {
* pages and you want to rely on the default implementation.
*/
#define NAND_ECC_GENERIC_ERASED_CHECK BIT(0)
-#define NAND_ECC_MAXIMIZE BIT(1)
/*
* Option constants for bizarre disfunctionality and real
@@ -310,7 +291,8 @@ static const struct nand_ecc_caps __name = { \
/**
* struct nand_ecc_ctrl - Control structure for ECC
- * @mode: ECC mode
+ * @engine_type: ECC engine type
+ * @placement: OOB bytes placement
* @algo: ECC algorithm
* @steps: number of ECC steps per page
* @size: data bytes per ECC step
@@ -338,7 +320,7 @@ static const struct nand_ecc_caps __name = { \
* controller and always return contiguous in-band and
* out-of-band data even if they're not stored
* contiguously on the NAND chip (e.g.
- * NAND_ECC_HW_SYNDROME interleaves in-band and
+ * NAND_ECC_PLACEMENT_INTERLEAVED interleaves in-band and
* out-of-band data).
* @write_page_raw: function to write a raw page without ECC. This function
* should hide the specific layout used by the ECC
@@ -346,7 +328,7 @@ static const struct nand_ecc_caps __name = { \
* in-band and out-of-band data. ECC controller is
* responsible for doing the appropriate transformations
* to adapt to its specific layout (e.g.
- * NAND_ECC_HW_SYNDROME interleaves in-band and
+ * NAND_ECC_PLACEMENT_INTERLEAVED interleaves in-band and
* out-of-band data).
* @read_page: function to read a page according to the ECC generator
* requirements; returns maximum number of bitflips corrected in
@@ -362,7 +344,8 @@ static const struct nand_ecc_caps __name = { \
* @write_oob: function to write chip OOB data
*/
struct nand_ecc_ctrl {
- enum nand_ecc_mode mode;
+ enum nand_ecc_engine_type engine_type;
+ enum nand_ecc_placement placement;
enum nand_ecc_algo algo;
int steps;
int size;
@@ -1161,9 +1144,6 @@ struct nand_chip {
void *priv;
};
-extern const struct mtd_ooblayout_ops nand_ooblayout_sp_ops;
-extern const struct mtd_ooblayout_ops nand_ooblayout_lp_ops;
-
static inline struct nand_chip *mtd_to_nand(struct mtd_info *mtd)
{
return container_of(mtd, struct nand_chip, base.mtd);
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index b8360be141da..9dc7eeac924f 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -551,13 +551,13 @@ enum {
NFSPROC4_CLNT_LOOKUPP,
NFSPROC4_CLNT_LAYOUTERROR,
-
NFSPROC4_CLNT_COPY_NOTIFY,
NFSPROC4_CLNT_GETXATTR,
NFSPROC4_CLNT_SETXATTR,
NFSPROC4_CLNT_LISTXATTRS,
NFSPROC4_CLNT_REMOVEXATTR,
+ NFSPROC4_CLNT_READ_PLUS,
};
/* nfs41 types */
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 7eae72a8762e..38e60ec742df 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -287,5 +287,6 @@ struct nfs_server {
#define NFS_CAP_LAYOUTERROR (1U << 26)
#define NFS_CAP_COPY_NOTIFY (1U << 27)
#define NFS_CAP_XATTR (1U << 28)
+#define NFS_CAP_READ_PLUS (1U << 29)
#endif
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 69cb46f7b8d2..d63cb862d58e 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -525,7 +525,7 @@ struct nfs_closeargs {
struct nfs_seqid * seqid;
fmode_t fmode;
u32 share_access;
- const u32 * bitmask;
+ u32 * bitmask;
struct nfs4_layoutreturn_args *lr_args;
};
@@ -608,7 +608,7 @@ struct nfs4_delegreturnargs {
struct nfs4_sequence_args seq_args;
const struct nfs_fh *fhandle;
const nfs4_stateid *stateid;
- const u32 * bitmask;
+ u32 * bitmask;
struct nfs4_layoutreturn_args *lr_args;
};
@@ -648,7 +648,7 @@ struct nfs_pgio_args {
union {
unsigned int replen; /* used by read */
struct {
- const u32 * bitmask; /* used by write */
+ u32 * bitmask; /* used by write */
enum nfs3_stable_how stable; /* used by write */
};
};
@@ -657,7 +657,7 @@ struct nfs_pgio_args {
struct nfs_pgio_res {
struct nfs4_sequence_res seq_res;
struct nfs_fattr * fattr;
- __u32 count;
+ __u64 count;
__u32 op_status;
union {
struct {
diff --git a/include/linux/node.h b/include/linux/node.h
index 014ba3ab2efd..8e5a29897936 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -99,15 +99,14 @@ extern struct node *node_devices[];
typedef void (*node_registration_func_t)(struct node *);
#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_NUMA)
-int link_mem_sections(int nid, unsigned long start_pfn,
- unsigned long end_pfn,
- enum meminit_context context);
+void link_mem_sections(int nid, unsigned long start_pfn,
+ unsigned long end_pfn,
+ enum meminit_context context);
#else
-static inline int link_mem_sections(int nid, unsigned long start_pfn,
- unsigned long end_pfn,
- enum meminit_context context)
+static inline void link_mem_sections(int nid, unsigned long start_pfn,
+ unsigned long end_pfn,
+ enum meminit_context context)
{
- return 0;
}
#endif
@@ -130,8 +129,7 @@ static inline int register_one_node(int nid)
if (error)
return error;
/* link memory sections under this node */
- error = link_mem_sections(nid, start_pfn, end_pfn,
- MEMINIT_EARLY);
+ link_mem_sections(nid, start_pfn, end_pfn, MEMINIT_EARLY);
}
return error;
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 3334ce056335..ac398e143c9a 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -90,9 +90,9 @@
* for such situations. See below and CPUMASK_ALLOC also.
*/
-#include <linux/kernel.h>
#include <linux/threads.h>
#include <linux/bitmap.h>
+#include <linux/minmax.h>
#include <linux/numa.h>
typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t;
diff --git a/include/linux/overflow.h b/include/linux/overflow.h
index f1c4e7b56bd9..ef74051d5cfe 100644
--- a/include/linux/overflow.h
+++ b/include/linux/overflow.h
@@ -3,6 +3,7 @@
#define __LINUX_OVERFLOW_H
#include <linux/compiler.h>
+#include <linux/limits.h>
/*
* In the fallback code below, we need to compute the minimum and
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 38ded408bd4c..4f6ba9379112 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -431,13 +431,9 @@ PAGEFLAG_FALSE(Uncached)
PAGEFLAG(HWPoison, hwpoison, PF_ANY)
TESTSCFLAG(HWPoison, hwpoison, PF_ANY)
#define __PG_HWPOISON (1UL << PG_hwpoison)
-extern bool set_hwpoison_free_buddy_page(struct page *page);
+extern bool take_page_off_buddy(struct page *page);
#else
PAGEFLAG_FALSE(HWPoison)
-static inline bool set_hwpoison_free_buddy_page(struct page *page)
-{
- return 0;
-}
#define __PG_HWPOISON 0
#endif
diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h
index 8679ccd722e8..3468794f83d2 100644
--- a/include/linux/page_owner.h
+++ b/include/linux/page_owner.h
@@ -11,7 +11,7 @@ extern struct page_ext_operations page_owner_ops;
extern void __reset_page_owner(struct page *page, unsigned int order);
extern void __set_page_owner(struct page *page,
unsigned int order, gfp_t gfp_mask);
-extern void __split_page_owner(struct page *page, unsigned int order);
+extern void __split_page_owner(struct page *page, unsigned int nr);
extern void __copy_page_owner(struct page *oldpage, struct page *newpage);
extern void __set_page_owner_migrate_reason(struct page *page, int reason);
extern void __dump_page_owner(struct page *page);
@@ -31,10 +31,10 @@ static inline void set_page_owner(struct page *page,
__set_page_owner(page, order, gfp_mask);
}
-static inline void split_page_owner(struct page *page, unsigned int order)
+static inline void split_page_owner(struct page *page, unsigned int nr)
{
if (static_branch_unlikely(&page_owner_inited))
- __split_page_owner(page, order);
+ __split_page_owner(page, nr);
}
static inline void copy_page_owner(struct page *oldpage, struct page *newpage)
{
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index c3afd3242b54..c77b7c31b2e4 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -29,6 +29,7 @@ enum mapping_flags {
AS_EXITING = 4, /* final truncate in progress */
/* writeback related tags are not used */
AS_NO_WRITEBACK_TAGS = 5,
+ AS_THP_SUPPORT = 6, /* THPs supported */
};
/**
@@ -120,6 +121,40 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
m->gfp_mask = mask;
}
+static inline bool mapping_thp_support(struct address_space *mapping)
+{
+ return test_bit(AS_THP_SUPPORT, &mapping->flags);
+}
+
+static inline int filemap_nr_thps(struct address_space *mapping)
+{
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
+ return atomic_read(&mapping->nr_thps);
+#else
+ return 0;
+#endif
+}
+
+static inline void filemap_nr_thps_inc(struct address_space *mapping)
+{
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
+ if (!mapping_thp_support(mapping))
+ atomic_inc(&mapping->nr_thps);
+#else
+ WARN_ON_ONCE(1);
+#endif
+}
+
+static inline void filemap_nr_thps_dec(struct address_space *mapping)
+{
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
+ if (!mapping_thp_support(mapping))
+ atomic_dec(&mapping->nr_thps);
+#else
+ WARN_ON_ONCE(1);
+#endif
+}
+
void release_pages(struct page **pages, int nr);
/*
@@ -726,17 +761,6 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask);
void delete_from_page_cache_batch(struct address_space *mapping,
struct pagevec *pvec);
-#define VM_READAHEAD_PAGES (SZ_128K / PAGE_SIZE)
-
-void page_cache_sync_readahead(struct address_space *, struct file_ra_state *,
- struct file *, pgoff_t index, unsigned long req_count);
-void page_cache_async_readahead(struct address_space *, struct file_ra_state *,
- struct file *, struct page *, pgoff_t index,
- unsigned long req_count);
-void page_cache_readahead_unbounded(struct address_space *, struct file *,
- pgoff_t index, unsigned long nr_to_read,
- unsigned long lookahead_count);
-
/*
* Like add_to_page_cache_locked, but used to add newly allocated pages:
* the page is new, so we can just run __SetPageLocked() against it.
@@ -777,6 +801,67 @@ struct readahead_control {
unsigned int _batch_count;
};
+#define DEFINE_READAHEAD(rac, f, m, i) \
+ struct readahead_control rac = { \
+ .file = f, \
+ .mapping = m, \
+ ._index = i, \
+ }
+
+#define VM_READAHEAD_PAGES (SZ_128K / PAGE_SIZE)
+
+void page_cache_ra_unbounded(struct readahead_control *,
+ unsigned long nr_to_read, unsigned long lookahead_count);
+void page_cache_sync_ra(struct readahead_control *, struct file_ra_state *,
+ unsigned long req_count);
+void page_cache_async_ra(struct readahead_control *, struct file_ra_state *,
+ struct page *, unsigned long req_count);
+
+/**
+ * page_cache_sync_readahead - generic file readahead
+ * @mapping: address_space which holds the pagecache and I/O vectors
+ * @ra: file_ra_state which holds the readahead state
+ * @file: Used by the filesystem for authentication.
+ * @index: Index of first page to be read.
+ * @req_count: Total number of pages being read by the caller.
+ *
+ * page_cache_sync_readahead() should be called when a cache miss happened:
+ * it will submit the read. The readahead logic may decide to piggyback more
+ * pages onto the read request if access patterns suggest it will improve
+ * performance.
+ */
+static inline
+void page_cache_sync_readahead(struct address_space *mapping,
+ struct file_ra_state *ra, struct file *file, pgoff_t index,
+ unsigned long req_count)
+{
+ DEFINE_READAHEAD(ractl, file, mapping, index);
+ page_cache_sync_ra(&ractl, ra, req_count);
+}
+
+/**
+ * page_cache_async_readahead - file readahead for marked pages
+ * @mapping: address_space which holds the pagecache and I/O vectors
+ * @ra: file_ra_state which holds the readahead state
+ * @file: Used by the filesystem for authentication.
+ * @page: The page at @index which triggered the readahead call.
+ * @index: Index of first page to be read.
+ * @req_count: Total number of pages being read by the caller.
+ *
+ * page_cache_async_readahead() should be called when a page is used which
+ * is marked as PageReadahead; this is a marker to suggest that the application
+ * has used up enough of the readahead window that we should start pulling in
+ * more pages.
+ */
+static inline
+void page_cache_async_readahead(struct address_space *mapping,
+ struct file_ra_state *ra, struct file *file,
+ struct page *page, pgoff_t index, unsigned long req_count)
+{
+ DEFINE_READAHEAD(ractl, file, mapping, index);
+ page_cache_async_ra(&ractl, ra, page, req_count);
+}
+
/**
* readahead_page - Get the next page to read.
* @rac: The current readahead request.
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 176d6cf80e7c..fa10acb8d6a4 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -77,6 +77,7 @@ extern const struct file_operations pidfd_fops;
struct file;
extern struct pid *pidfd_pid(const struct file *file);
+struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags);
static inline struct pid *get_pid(struct pid *pid)
{
diff --git a/include/linux/platform_data/mtd-davinci.h b/include/linux/platform_data/mtd-davinci.h
index 03e92c71b3fa..dd474dd44848 100644
--- a/include/linux/platform_data/mtd-davinci.h
+++ b/include/linux/platform_data/mtd-davinci.h
@@ -60,15 +60,16 @@ struct davinci_nand_pdata { /* platform_data */
struct mtd_partition *parts;
unsigned nr_parts;
- /* none == NAND_ECC_NONE (strongly *not* advised!!)
- * soft == NAND_ECC_SOFT
- * else == NAND_ECC_HW, according to ecc_bits
+ /* none == NAND_ECC_ENGINE_TYPE_NONE (strongly *not* advised!!)
+ * soft == NAND_ECC_ENGINE_TYPE_SOFT
+ * else == NAND_ECC_ENGINE_TYPE_ON_HOST, according to ecc_bits
*
* All DaVinci-family chips support 1-bit hardware ECC.
* Newer ones also support 4-bit ECC, but are awkward
* using it with large page chips.
*/
- enum nand_ecc_mode ecc_mode;
+ enum nand_ecc_engine_type engine_type;
+ enum nand_ecc_placement ecc_placement;
u8 ecc_bits;
/* e.g. NAND_BUSWIDTH_16 */
diff --git a/include/linux/platform_data/mtd-nand-s3c2410.h b/include/linux/platform_data/mtd-nand-s3c2410.h
index 08675b16f9e1..25390fc3e795 100644
--- a/include/linux/platform_data/mtd-nand-s3c2410.h
+++ b/include/linux/platform_data/mtd-nand-s3c2410.h
@@ -49,7 +49,7 @@ struct s3c2410_platform_nand {
unsigned int ignore_unset_ecc:1;
- enum nand_ecc_mode ecc_mode;
+ enum nand_ecc_engine_type engine_type;
int nr_sets;
struct s3c2410_nand_set *sets;
diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h
index 987d9652aa4e..111a40d0d3d5 100644
--- a/include/linux/power/bq27xxx_battery.h
+++ b/include/linux/power/bq27xxx_battery.h
@@ -32,6 +32,7 @@ enum bq27xxx_chip {
BQ27621,
BQ27Z561,
BQ28Z610,
+ BQ34Z100,
};
struct bq27xxx_device_info;
diff --git a/include/linux/power/charger-manager.h b/include/linux/power/charger-manager.h
index ae94dcebd936..45e228b353ea 100644
--- a/include/linux/power/charger-manager.h
+++ b/include/linux/power/charger-manager.h
@@ -31,22 +31,16 @@ enum polling_modes {
CM_POLL_CHARGING_ONLY,
};
-enum cm_event_types {
- CM_EVENT_UNKNOWN = 0,
- CM_EVENT_BATT_FULL,
- CM_EVENT_BATT_IN,
- CM_EVENT_BATT_OUT,
- CM_EVENT_BATT_OVERHEAT,
- CM_EVENT_BATT_COLD,
- CM_EVENT_EXT_PWR_IN_OUT,
- CM_EVENT_CHG_START_STOP,
- CM_EVENT_OTHERS,
+enum cm_batt_temp {
+ CM_BATT_OK = 0,
+ CM_BATT_OVERHEAT,
+ CM_BATT_COLD,
};
/**
* struct charger_cable
* @extcon_name: the name of extcon device.
- * @name: the name of charger cable(external connector).
+ * @name: the name of the cable connector
* @extcon_dev: the extcon device.
* @wq: the workqueue to control charger according to the state of
* charger cable. If charger cable is attached, enable charger.
@@ -62,9 +56,10 @@ enum cm_event_types {
struct charger_cable {
const char *extcon_name;
const char *name;
+ struct extcon_dev *extcon_dev;
+ u64 extcon_type;
/* The charger-manager use Extcon framework */
- struct extcon_specific_cable_nb extcon_dev;
struct work_struct wq;
struct notifier_block nb;
@@ -131,11 +126,10 @@ struct charger_regulator {
* @psy_name: the name of power-supply-class for charger manager
* @polling_mode:
* Determine which polling mode will be used
- * @fullbatt_vchkdrop_ms:
* @fullbatt_vchkdrop_uV:
* Check voltage drop after the battery is fully charged.
- * If it has dropped more than fullbatt_vchkdrop_uV after
- * fullbatt_vchkdrop_ms, CM will restart charging.
+ * If it has dropped more than fullbatt_vchkdrop_uV
+ * CM will restart charging.
* @fullbatt_uV: voltage in microvolt
* If VBATT >= fullbatt_uV, it is assumed to be full.
* @fullbatt_soc: state of Charge in %
@@ -172,7 +166,6 @@ struct charger_desc {
enum polling_modes polling_mode;
unsigned int polling_interval_ms;
- unsigned int fullbatt_vchkdrop_ms;
unsigned int fullbatt_vchkdrop_uV;
unsigned int fullbatt_uV;
unsigned int fullbatt_soc;
@@ -211,9 +204,6 @@ struct charger_desc {
* @charger_stat: array of power_supply for chargers
* @tzd_batt : thermal zone device for battery
* @charger_enabled: the state of charger
- * @fullbatt_vchk_jiffies_at:
- * jiffies at the time full battery check will occur.
- * @fullbatt_vchk_work: work queue for full battery check
* @emergency_stop:
* When setting true, stop charging
* @psy_name_buf: the name of power-supply-class for charger manager
@@ -224,6 +214,7 @@ struct charger_desc {
* saved status of battery before entering suspend-to-RAM
* @charging_start_time: saved start time of enabling charging
* @charging_end_time: saved end time of disabling charging
+ * @battery_status: Current battery status
*/
struct charger_manager {
struct list_head entry;
@@ -235,9 +226,6 @@ struct charger_manager {
#endif
bool charger_enabled;
- unsigned long fullbatt_vchk_jiffies_at;
- struct delayed_work fullbatt_vchk_work;
-
int emergency_stop;
char psy_name_buf[PSY_NAME_MAX + 1];
@@ -246,13 +234,8 @@ struct charger_manager {
u64 charging_start_time;
u64 charging_end_time;
+
+ int battery_status;
};
-#if IS_ENABLED(CONFIG_CHARGER_MANAGER)
-extern void cm_notify_event(struct power_supply *psy,
- enum cm_event_types type, char *msg);
-#else
-static inline void cm_notify_event(struct power_supply *psy,
- enum cm_event_types type, char *msg) { }
-#endif
#endif /* _CHARGER_MANAGER_H */
diff --git a/include/linux/power/gpio-charger.h b/include/linux/power/gpio-charger.h
index 5a5a8de98181..c0b7657ac1df 100644
--- a/include/linux/power/gpio-charger.h
+++ b/include/linux/power/gpio-charger.h
@@ -13,18 +13,12 @@
* struct gpio_charger_platform_data - platform_data for gpio_charger devices
* @name: Name for the chargers power_supply device
* @type: Type of the charger
- * @gpio: GPIO which is used to indicate the chargers status
- * @gpio_active_low: Should be set to 1 if the GPIO is active low otherwise 0
* @supplied_to: Array of battery names to which this chargers supplies power
* @num_supplicants: Number of entries in the supplied_to array
*/
struct gpio_charger_platform_data {
const char *name;
enum power_supply_type type;
-
- int gpio;
- int gpio_active_low;
-
char **supplied_to;
size_t num_supplicants;
};
diff --git a/include/linux/power/smb347-charger.h b/include/linux/power/smb347-charger.h
deleted file mode 100644
index e0b687a4d20c..000000000000
--- a/include/linux/power/smb347-charger.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Summit Microelectronics SMB347 Battery Charger Driver
- *
- * Copyright (C) 2011, Intel Corporation
- *
- * Authors: Bruce E. Robertson <bruce.e.robertson@intel.com>
- * Mika Westerberg <mika.westerberg@linux.intel.com>
- */
-
-#ifndef SMB347_CHARGER_H
-#define SMB347_CHARGER_H
-
-#include <linux/types.h>
-#include <linux/power_supply.h>
-
-enum {
- /* use the default compensation method */
- SMB347_SOFT_TEMP_COMPENSATE_DEFAULT = -1,
-
- SMB347_SOFT_TEMP_COMPENSATE_NONE,
- SMB347_SOFT_TEMP_COMPENSATE_CURRENT,
- SMB347_SOFT_TEMP_COMPENSATE_VOLTAGE,
-};
-
-/* Use default factory programmed value for hard/soft temperature limit */
-#define SMB347_TEMP_USE_DEFAULT -273
-
-/*
- * Charging enable can be controlled by software (via i2c) by
- * smb347-charger driver or by EN pin (active low/high).
- */
-enum smb347_chg_enable {
- SMB347_CHG_ENABLE_SW,
- SMB347_CHG_ENABLE_PIN_ACTIVE_LOW,
- SMB347_CHG_ENABLE_PIN_ACTIVE_HIGH,
-};
-
-/**
- * struct smb347_charger_platform_data - platform data for SMB347 charger
- * @battery_info: Information about the battery
- * @max_charge_current: maximum current (in uA) the battery can be charged
- * @max_charge_voltage: maximum voltage (in uV) the battery can be charged
- * @pre_charge_current: current (in uA) to use in pre-charging phase
- * @termination_current: current (in uA) used to determine when the
- * charging cycle terminates
- * @pre_to_fast_voltage: voltage (in uV) treshold used for transitioning to
- * pre-charge to fast charge mode
- * @mains_current_limit: maximum input current drawn from AC/DC input (in uA)
- * @usb_hc_current_limit: maximum input high current (in uA) drawn from USB
- * input
- * @chip_temp_threshold: die temperature where device starts limiting charge
- * current [%100 - %130] (in degree C)
- * @soft_cold_temp_limit: soft cold temperature limit [%0 - %15] (in degree C),
- * granularity is 5 deg C.
- * @soft_hot_temp_limit: soft hot temperature limit [%40 - %55] (in degree C),
- * granularity is 5 deg C.
- * @hard_cold_temp_limit: hard cold temperature limit [%-5 - %10] (in degree C),
- * granularity is 5 deg C.
- * @hard_hot_temp_limit: hard hot temperature limit [%50 - %65] (in degree C),
- * granularity is 5 deg C.
- * @suspend_on_hard_temp_limit: suspend charging when hard limit is hit
- * @soft_temp_limit_compensation: compensation method when soft temperature
- * limit is hit
- * @charge_current_compensation: current (in uA) for charging compensation
- * current when temperature hits soft limits
- * @use_mains: AC/DC input can be used
- * @use_usb: USB input can be used
- * @use_usb_otg: USB OTG output can be used (not implemented yet)
- * @irq_gpio: GPIO number used for interrupts (%-1 if not used)
- * @enable_control: how charging enable/disable is controlled
- * (driver/pin controls)
- *
- * @use_main, @use_usb, and @use_usb_otg are means to enable/disable
- * hardware support for these. This is useful when we want to have for
- * example OTG charging controlled via OTG transceiver driver and not by
- * the SMB347 hardware.
- *
- * Hard and soft temperature limit values are given as described in the
- * device data sheet and assuming NTC beta value is %3750. Even if this is
- * not the case, these values should be used. They can be mapped to the
- * corresponding NTC beta values with the help of table %2 in the data
- * sheet. So for example if NTC beta is %3375 and we want to program hard
- * hot limit to be %53 deg C, @hard_hot_temp_limit should be set to %50.
- *
- * If zero value is given in any of the current and voltage values, the
- * factory programmed default will be used. For soft/hard temperature
- * values, pass in %SMB347_TEMP_USE_DEFAULT instead.
- */
-struct smb347_charger_platform_data {
- struct power_supply_info battery_info;
- unsigned int max_charge_current;
- unsigned int max_charge_voltage;
- unsigned int pre_charge_current;
- unsigned int termination_current;
- unsigned int pre_to_fast_voltage;
- unsigned int mains_current_limit;
- unsigned int usb_hc_current_limit;
- unsigned int chip_temp_threshold;
- int soft_cold_temp_limit;
- int soft_hot_temp_limit;
- int hard_cold_temp_limit;
- int hard_hot_temp_limit;
- bool suspend_on_hard_temp_limit;
- unsigned int soft_temp_limit_compensation;
- unsigned int charge_current_compensation;
- bool use_mains;
- bool use_usb;
- bool use_usb_otg;
- int irq_gpio;
- enum smb347_chg_enable enable_control;
-};
-
-#endif /* SMB347_CHARGER_H */
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 97cc4b85bf61..81a55e974feb 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -186,6 +186,7 @@ enum power_supply_type {
POWER_SUPPLY_TYPE_USB_PD, /* Power Delivery Port */
POWER_SUPPLY_TYPE_USB_PD_DRP, /* PD Dual Role Port */
POWER_SUPPLY_TYPE_APPLE_BRICK_ID, /* Apple Charging Method */
+ POWER_SUPPLY_TYPE_WIRELESS, /* Wireless */
};
enum power_supply_usb_type {
@@ -365,6 +366,12 @@ struct power_supply_battery_info {
int constant_charge_voltage_max_uv; /* microVolts */
int factory_internal_resistance_uohm; /* microOhms */
int ocv_temp[POWER_SUPPLY_OCV_TEMP_MAX];/* celsius */
+ int temp_ambient_alert_min; /* celsius */
+ int temp_ambient_alert_max; /* celsius */
+ int temp_alert_min; /* celsius */
+ int temp_alert_max; /* celsius */
+ int temp_min; /* celsius */
+ int temp_max; /* celsius */
struct power_supply_battery_ocv_table *ocv_table[POWER_SUPPLY_OCV_TEMP_MAX];
int ocv_table_size[POWER_SUPPLY_OCV_TEMP_MAX];
struct power_supply_resistance_temp_table *resist_table;
diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h
index f464d85e88a4..aeb242cefebf 100644
--- a/include/linux/qed/qed_rdma_if.h
+++ b/include/linux/qed/qed_rdma_if.h
@@ -242,10 +242,8 @@ struct qed_rdma_register_tid_in_params {
bool pbl_two_level;
u8 pbl_page_size_log;
u8 page_size_log;
- u32 fbo;
u64 length;
u64 vaddr;
- bool zbva;
bool phy_mr;
bool dma_mr;
diff --git a/include/linux/qed/qede_rdma.h b/include/linux/qed/qede_rdma.h
index 072da2f6da37..0d5564a59a59 100644
--- a/include/linux/qed/qede_rdma.h
+++ b/include/linux/qed/qede_rdma.h
@@ -20,7 +20,8 @@ enum qede_rdma_event {
QEDE_UP,
QEDE_DOWN,
QEDE_CHANGE_ADDR,
- QEDE_CLOSE
+ QEDE_CLOSE,
+ QEDE_CHANGE_MTU,
};
struct qede_rdma_event_work {
@@ -54,6 +55,7 @@ void qede_rdma_dev_event_open(struct qede_dev *dev);
void qede_rdma_dev_event_close(struct qede_dev *dev);
void qede_rdma_dev_remove(struct qede_dev *dev, bool recovery);
void qede_rdma_event_changeaddr(struct qede_dev *edr);
+void qede_rdma_event_change_mtu(struct qede_dev *edev);
#else
static inline int qede_rdma_dev_add(struct qede_dev *dev,
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index c2a9f7c90727..64ad900ac742 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -11,6 +11,7 @@
#include <linux/bitops.h>
#include <linux/kernel.h>
#include <linux/list.h>
+#include <linux/percpu.h>
#include <linux/preempt.h>
#include <linux/rcupdate.h>
#include <linux/spinlock.h>
@@ -376,7 +377,7 @@ radix_tree_chunk_size(struct radix_tree_iter *iter)
* radix_tree_next_slot - find next slot in chunk
*
* @slot: pointer to current slot
- * @iter: pointer to interator state
+ * @iter: pointer to iterator state
* @flags: RADIX_TREE_ITER_*, should be constant
* Returns: pointer to next slot, or NULL if there no more left
*
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 7a6fc9956510..f8633d37e358 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -63,9 +63,17 @@ static inline void INIT_LIST_HEAD_RCU(struct list_head *list)
RCU_LOCKDEP_WARN(!(cond) && !rcu_read_lock_any_held(), \
"RCU-list traversed in non-reader section!"); \
})
+
+#define __list_check_srcu(cond) \
+ ({ \
+ RCU_LOCKDEP_WARN(!(cond), \
+ "RCU-list traversed without holding the required lock!");\
+ })
#else
#define __list_check_rcu(dummy, cond, extra...) \
({ check_arg_count_one(extra); })
+
+#define __list_check_srcu(cond) ({ })
#endif
/*
@@ -386,6 +394,25 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
pos = list_entry_rcu(pos->member.next, typeof(*pos), member))
/**
+ * list_for_each_entry_srcu - iterate over rcu list of given type
+ * @pos: the type * to use as a loop cursor.
+ * @head: the head for your list.
+ * @member: the name of the list_head within the struct.
+ * @cond: lockdep expression for the lock required to traverse the list.
+ *
+ * This list-traversal primitive may safely run concurrently with
+ * the _rcu list-mutation primitives such as list_add_rcu()
+ * as long as the traversal is guarded by srcu_read_lock().
+ * The lockdep expression srcu_read_lock_held() can be passed as the
+ * cond argument from read side.
+ */
+#define list_for_each_entry_srcu(pos, head, member, cond) \
+ for (__list_check_srcu(cond), \
+ pos = list_entry_rcu((head)->next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = list_entry_rcu(pos->member.next, typeof(*pos), member))
+
+/**
* list_entry_lockless - get the struct for this entry
* @ptr: the &struct list_head pointer.
* @type: the type of the struct this is embedded in.
@@ -684,6 +711,27 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n,
&(pos)->member)), typeof(*(pos)), member))
/**
+ * hlist_for_each_entry_srcu - iterate over rcu list of given type
+ * @pos: the type * to use as a loop cursor.
+ * @head: the head for your list.
+ * @member: the name of the hlist_node within the struct.
+ * @cond: lockdep expression for the lock required to traverse the list.
+ *
+ * This list-traversal primitive may safely run concurrently with
+ * the _rcu list-mutation primitives such as hlist_add_head_rcu()
+ * as long as the traversal is guarded by srcu_read_lock().
+ * The lockdep expression srcu_read_lock_held() can be passed as the
+ * cond argument from read side.
+ */
+#define hlist_for_each_entry_srcu(pos, head, member, cond) \
+ for (__list_check_srcu(cond), \
+ pos = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),\
+ typeof(*(pos)), member); \
+ pos; \
+ pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(\
+ &(pos)->member)), typeof(*(pos)), member))
+
+/**
* hlist_for_each_entry_rcu_notrace - iterate over rcu list of given type (for tracing)
* @pos: the type * to use as a loop cursor.
* @head: the head for your list.
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index d15d46db61f7..7c1ceff02852 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -55,6 +55,12 @@ void __rcu_read_unlock(void);
#else /* #ifdef CONFIG_PREEMPT_RCU */
+#ifdef CONFIG_TINY_RCU
+#define rcu_read_unlock_strict() do { } while (0)
+#else
+void rcu_read_unlock_strict(void);
+#endif
+
static inline void __rcu_read_lock(void)
{
preempt_disable();
@@ -63,6 +69,7 @@ static inline void __rcu_read_lock(void)
static inline void __rcu_read_unlock(void)
{
preempt_enable();
+ rcu_read_unlock_strict();
}
static inline int rcu_preempt_depth(void)
@@ -709,8 +716,8 @@ static inline void rcu_read_lock_bh(void)
"rcu_read_lock_bh() used illegally while idle");
}
-/*
- * rcu_read_unlock_bh - marks the end of a softirq-only RCU critical section
+/**
+ * rcu_read_unlock_bh() - marks the end of a softirq-only RCU critical section
*
* See rcu_read_lock_bh() for more information.
*/
@@ -751,10 +758,10 @@ static inline notrace void rcu_read_lock_sched_notrace(void)
__acquire(RCU_SCHED);
}
-/*
- * rcu_read_unlock_sched - marks the end of a RCU-classic critical section
+/**
+ * rcu_read_unlock_sched() - marks the end of a RCU-classic critical section
*
- * See rcu_read_lock_sched for more information.
+ * See rcu_read_lock_sched() for more information.
*/
static inline void rcu_read_unlock_sched(void)
{
@@ -945,7 +952,7 @@ static inline void rcu_head_init(struct rcu_head *rhp)
}
/**
- * rcu_head_after_call_rcu - Has this rcu_head been passed to call_rcu()?
+ * rcu_head_after_call_rcu() - Has this rcu_head been passed to call_rcu()?
* @rhp: The rcu_head structure to test.
* @f: The function passed to call_rcu() along with @rhp.
*
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 5cc9637cac16..7c1ecdb356d8 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -103,7 +103,6 @@ static inline void rcu_scheduler_starting(void) { }
static inline void rcu_end_inkernel_boot(void) { }
static inline bool rcu_inkernel_boot_has_ended(void) { return true; }
static inline bool rcu_is_watching(void) { return true; }
-static inline bool __rcu_is_watching(void) { return true; }
static inline void rcu_momentary_dyntick_idle(void) { }
static inline void kfree_rcu_scheduler_running(void) { }
static inline bool rcu_gp_might_be_stalled(void) { return false; }
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index d2f4064ebd1d..59eb5cd567d7 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -64,7 +64,6 @@ extern int rcu_scheduler_active __read_mostly;
void rcu_end_inkernel_boot(void);
bool rcu_inkernel_boot_has_ended(void);
bool rcu_is_watching(void);
-bool __rcu_is_watching(void);
#ifndef CONFIG_PREEMPTION
void rcu_all_qs(void);
#endif
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 45cf7b69d852..36c47e7e66a2 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -165,6 +165,22 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
#define for_each_sgtable_dma_sg(sgt, sg, i) \
for_each_sg((sgt)->sgl, sg, (sgt)->nents, i)
+static inline void __sg_chain(struct scatterlist *chain_sg,
+ struct scatterlist *sgl)
+{
+ /*
+ * offset and length are unused for chain entry. Clear them.
+ */
+ chain_sg->offset = 0;
+ chain_sg->length = 0;
+
+ /*
+ * Set lowest bit to indicate a link pointer, and make sure to clear
+ * the termination bit if it happens to be set.
+ */
+ chain_sg->page_link = ((unsigned long) sgl | SG_CHAIN) & ~SG_END;
+}
+
/**
* sg_chain - Chain two sglists together
* @prv: First scatterlist
@@ -178,18 +194,7 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
struct scatterlist *sgl)
{
- /*
- * offset and length are unused for chain entry. Clear them.
- */
- prv[prv_nents - 1].offset = 0;
- prv[prv_nents - 1].length = 0;
-
- /*
- * Set lowest bit to indicate a link pointer, and make sure to clear
- * the termination bit if it happens to be set.
- */
- prv[prv_nents - 1].page_link = ((unsigned long) sgl | SG_CHAIN)
- & ~SG_END;
+ __sg_chain(&prv[prv_nents - 1], sgl);
}
/**
@@ -286,10 +291,11 @@ void sg_free_table(struct sg_table *);
int __sg_alloc_table(struct sg_table *, unsigned int, unsigned int,
struct scatterlist *, unsigned int, gfp_t, sg_alloc_fn *);
int sg_alloc_table(struct sg_table *, unsigned int, gfp_t);
-int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
- unsigned int n_pages, unsigned int offset,
- unsigned long size, unsigned int max_segment,
- gfp_t gfp_mask);
+struct scatterlist *__sg_alloc_table_from_pages(struct sg_table *sgt,
+ struct page **pages, unsigned int n_pages, unsigned int offset,
+ unsigned long size, unsigned int max_segment,
+ struct scatterlist *prv, unsigned int left_pages,
+ gfp_t gfp_mask);
int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
unsigned int n_pages, unsigned int offset,
unsigned long size, gfp_t gfp_mask);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9030f3abd969..063cd120b459 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1013,7 +1013,7 @@ struct task_struct {
struct held_lock held_locks[MAX_LOCK_DEPTH];
#endif
-#ifdef CONFIG_UBSAN
+#if defined(CONFIG_UBSAN) && !defined(CONFIG_UBSAN_TRAP)
unsigned int in_ubsan;
#endif
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 15bfb06f2884..d5ece7a9a403 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -49,31 +49,6 @@ static inline void mmdrop(struct mm_struct *mm)
__mmdrop(mm);
}
-/*
- * This has to be called after a get_task_mm()/mmget_not_zero()
- * followed by taking the mmap_lock for writing before modifying the
- * vmas or anything the coredump pretends not to change from under it.
- *
- * It also has to be called when mmgrab() is used in the context of
- * the process, but then the mm_count refcount is transferred outside
- * the context of the process to run down_write() on that pinned mm.
- *
- * NOTE: find_extend_vma() called from GUP context is the only place
- * that can modify the "mm" (notably the vm_start/end) under mmap_lock
- * for reading and outside the context of the process, so it is also
- * the only case that holds the mmap_lock for reading that must call
- * this function. Generally if the mmap_lock is hold for reading
- * there's no need of this check after get_task_mm()/mmget_not_zero().
- *
- * This function can be obsoleted and the check can be removed, after
- * the coredump code will hold the mmap_lock for writing before
- * invoking the ->core_dump methods.
- */
-static inline bool mmget_still_valid(struct mm_struct *mm)
-{
- return likely(!mm->core_state);
-}
-
/**
* mmget() - Pin the address space associated with a &struct mm_struct.
* @mm: The address space to pin.
@@ -304,39 +279,38 @@ static inline void memalloc_nocma_restore(unsigned int flags)
#endif
#ifdef CONFIG_MEMCG
+DECLARE_PER_CPU(struct mem_cgroup *, int_active_memcg);
/**
- * memalloc_use_memcg - Starts the remote memcg charging scope.
+ * set_active_memcg - Starts the remote memcg charging scope.
* @memcg: memcg to charge.
*
* This function marks the beginning of the remote memcg charging scope. All the
* __GFP_ACCOUNT allocations till the end of the scope will be charged to the
* given memcg.
*
- * NOTE: This function is not nesting safe.
+ * NOTE: This function can nest. Users must save the return value and
+ * reset the previous value after their own charging scope is over.
*/
-static inline void memalloc_use_memcg(struct mem_cgroup *memcg)
+static inline struct mem_cgroup *
+set_active_memcg(struct mem_cgroup *memcg)
{
- WARN_ON_ONCE(current->active_memcg);
- current->active_memcg = memcg;
-}
+ struct mem_cgroup *old;
+
+ if (in_interrupt()) {
+ old = this_cpu_read(int_active_memcg);
+ this_cpu_write(int_active_memcg, memcg);
+ } else {
+ old = current->active_memcg;
+ current->active_memcg = memcg;
+ }
-/**
- * memalloc_unuse_memcg - Ends the remote memcg charging scope.
- *
- * This function marks the end of the remote memcg charging scope started by
- * memalloc_use_memcg().
- */
-static inline void memalloc_unuse_memcg(void)
-{
- current->active_memcg = NULL;
+ return old;
}
#else
-static inline void memalloc_use_memcg(struct mem_cgroup *memcg)
-{
-}
-
-static inline void memalloc_unuse_memcg(void)
+static inline struct mem_cgroup *
+set_active_memcg(struct mem_cgroup *memcg)
{
+ return NULL;
}
#endif
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 80d557ef8a11..9f13966d3d92 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -26,6 +26,9 @@ struct __call_single_data {
struct {
struct llist_node llist;
unsigned int flags;
+#ifdef CONFIG_64BIT
+ u16 src, dst;
+#endif
};
};
smp_call_func_t func;
diff --git a/include/linux/smp_types.h b/include/linux/smp_types.h
index 364b3ae3e41d..2e8461af8df6 100644
--- a/include/linux/smp_types.h
+++ b/include/linux/smp_types.h
@@ -61,6 +61,9 @@ struct __call_single_node {
unsigned int u_flags;
atomic_t a_flags;
};
+#ifdef CONFIG_64BIT
+ u16 src, dst;
+#endif
};
#endif /* __LINUX_SMP_TYPES_H */
diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h
index d796058cdff2..f07c334c599f 100644
--- a/include/linux/sunrpc/bc_xprt.h
+++ b/include/linux/sunrpc/bc_xprt.h
@@ -4,7 +4,7 @@
NetApp provides this source code under the GPL v2 License.
The GPL v2 license is available at
-http://opensource.org/licenses/gpl-license.php.
+https://opensource.org/licenses/gpl-license.php.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 10891b70fc7b..d0965e2997b0 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -45,7 +45,8 @@
*/
struct cache_head {
struct hlist_node cache_list;
- time64_t expiry_time; /* After time time, don't use the data */
+ time64_t expiry_time; /* After time expiry_time, don't use
+ * the data */
time64_t last_refresh; /* If CACHE_PENDING, this is when upcall was
* sent, else this is when update was
* received, though it is alway set to
diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h
index bea40d9f03a1..43f854487539 100644
--- a/include/linux/sunrpc/msg_prot.h
+++ b/include/linux/sunrpc/msg_prot.h
@@ -143,7 +143,7 @@ typedef __be32 rpc_fraghdr;
/*
* Well-known netids. See:
*
- * http://www.iana.org/assignments/rpc-netids/rpc-netids.xhtml
+ * https://www.iana.org/assignments/rpc-netids/rpc-netids.xhtml
*/
#define RPCBIND_NETID_UDP "udp"
#define RPCBIND_NETID_TCP "tcp"
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 5a6a81b7cd9f..fe7ff7f5b584 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -240,6 +240,7 @@ extern int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen);
extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
unsigned int base, unsigned int len);
extern unsigned int xdr_stream_pos(const struct xdr_stream *xdr);
+extern unsigned int xdr_page_pos(const struct xdr_stream *xdr);
extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf,
__be32 *p, struct rpc_rqst *rqst);
extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
@@ -249,6 +250,8 @@ extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes);
extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len);
extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data);
+extern uint64_t xdr_align_data(struct xdr_stream *, uint64_t, uint32_t);
+extern uint64_t xdr_expand_hole(struct xdr_stream *, uint64_t, uint64_t);
/**
* xdr_stream_remaining - Return the number of bytes remaining in the stream
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 06db09875aa4..2eda7678fe1d 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -879,6 +879,8 @@ asmlinkage long sys_munlockall(void);
asmlinkage long sys_mincore(unsigned long start, size_t len,
unsigned char __user * vec);
asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior);
+asmlinkage long sys_process_madvise(int pidfd, const struct iovec __user *vec,
+ size_t vlen, int behavior, unsigned int flags);
asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
unsigned long prot, unsigned long pgoff,
unsigned long flags);
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 42ef807e5d84..d07ea27e72a9 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -55,6 +55,7 @@ enum thermal_notify_event {
THERMAL_DEVICE_UP, /* Thermal device is up after a down event */
THERMAL_DEVICE_POWER_CAPABILITY_CHANGED, /* power capability changed */
THERMAL_TABLE_CHANGED, /* Thermal table(s) changed */
+ THERMAL_EVENT_KEEP_ALIVE, /* Request for user space handler to respond */
};
struct thermal_zone_device_ops {
@@ -84,12 +85,9 @@ struct thermal_cooling_device_ops {
int (*get_max_state) (struct thermal_cooling_device *, unsigned long *);
int (*get_cur_state) (struct thermal_cooling_device *, unsigned long *);
int (*set_cur_state) (struct thermal_cooling_device *, unsigned long);
- int (*get_requested_power)(struct thermal_cooling_device *,
- struct thermal_zone_device *, u32 *);
- int (*state2power)(struct thermal_cooling_device *,
- struct thermal_zone_device *, unsigned long, u32 *);
- int (*power2state)(struct thermal_cooling_device *,
- struct thermal_zone_device *, u32, unsigned long *);
+ int (*get_requested_power)(struct thermal_cooling_device *, u32 *);
+ int (*state2power)(struct thermal_cooling_device *, unsigned long, u32 *);
+ int (*power2state)(struct thermal_cooling_device *, u32, unsigned long *);
};
struct thermal_cooling_device {
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 608fa4aadf0e..ad03df1cc266 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -198,7 +198,7 @@ static inline int cpu_to_mem(int cpu)
#define topology_die_cpumask(cpu) cpumask_of(cpu)
#endif
-#ifdef CONFIG_SCHED_SMT
+#if defined(CONFIG_SCHED_SMT) && !defined(cpu_smt_mask)
static inline const struct cpumask *cpu_smt_mask(int cpu)
{
return topology_sibling_cpumask(cpu);
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 1ae36bc8db35..1b8c9d6162bc 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -2,7 +2,9 @@
#ifndef __LINUX_UACCESS_H__
#define __LINUX_UACCESS_H__
+#include <linux/fault-inject-usercopy.h>
#include <linux/instrumented.h>
+#include <linux/minmax.h>
#include <linux/sched.h>
#include <linux/thread_info.h>
@@ -83,6 +85,8 @@ static __always_inline __must_check unsigned long
__copy_from_user(void *to, const void __user *from, unsigned long n)
{
might_fault();
+ if (should_fail_usercopy())
+ return n;
instrument_copy_from_user(to, from, n);
check_object_size(to, n, false);
return raw_copy_from_user(to, from, n);
@@ -104,6 +108,8 @@ __copy_from_user(void *to, const void __user *from, unsigned long n)
static __always_inline __must_check unsigned long
__copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
{
+ if (should_fail_usercopy())
+ return n;
instrument_copy_to_user(to, from, n);
check_object_size(from, n, true);
return raw_copy_to_user(to, from, n);
@@ -113,6 +119,8 @@ static __always_inline __must_check unsigned long
__copy_to_user(void __user *to, const void *from, unsigned long n)
{
might_fault();
+ if (should_fail_usercopy())
+ return n;
instrument_copy_to_user(to, from, n);
check_object_size(from, n, true);
return raw_copy_to_user(to, from, n);
@@ -124,7 +132,7 @@ _copy_from_user(void *to, const void __user *from, unsigned long n)
{
unsigned long res = n;
might_fault();
- if (likely(access_ok(from, n))) {
+ if (!should_fail_usercopy() && likely(access_ok(from, n))) {
instrument_copy_from_user(to, from, n);
res = raw_copy_from_user(to, from, n);
}
@@ -142,6 +150,8 @@ static inline __must_check unsigned long
_copy_to_user(void __user *to, const void *from, unsigned long n)
{
might_fault();
+ if (should_fail_usercopy())
+ return n;
if (access_ok(to, n)) {
instrument_copy_to_user(to, from, n);
n = raw_copy_to_user(to, from, n);
diff --git a/include/linux/unicode.h b/include/linux/unicode.h
index 990aa97d8049..74484d44c755 100644
--- a/include/linux/unicode.h
+++ b/include/linux/unicode.h
@@ -27,6 +27,9 @@ int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
unsigned char *dest, size_t dlen);
+int utf8_casefold_hash(const struct unicode_map *um, const void *salt,
+ struct qstr *str);
+
struct unicode_map *utf8_load(const char *version);
void utf8_unload(struct unicode_map *um);
diff --git a/include/linux/usb/typec_altmode.h b/include/linux/usb/typec_altmode.h
index a4b65eaa0f62..5e0a7b7647c3 100644
--- a/include/linux/usb/typec_altmode.h
+++ b/include/linux/usb/typec_altmode.h
@@ -152,10 +152,26 @@ struct typec_altmode_driver {
#define to_altmode_driver(d) container_of(d, struct typec_altmode_driver, \
driver)
+/**
+ * typec_altmode_register_driver - registers a USB Type-C alternate mode
+ * device driver
+ * @drv: pointer to struct typec_altmode_driver
+ *
+ * These drivers will be bind to the partner alternate mode devices. They will
+ * handle all SVID specific communication.
+ */
#define typec_altmode_register_driver(drv) \
__typec_altmode_register_driver(drv, THIS_MODULE)
int __typec_altmode_register_driver(struct typec_altmode_driver *drv,
struct module *module);
+/**
+ * typec_altmode_unregister_driver - unregisters a USB Type-C alternate mode
+ * device driver
+ * @drv: pointer to struct typec_altmode_driver
+ *
+ * These drivers will be bind to the partner alternate mode devices. They will
+ * handle all SVID specific communication.
+ */
void typec_altmode_unregister_driver(struct typec_altmode_driver *drv);
#define module_typec_altmode_driver(__typec_altmode_driver) \
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 0221f852a7e1..938eaf9517e2 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -24,6 +24,7 @@ struct notifier_block; /* in notifier.h */
#define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */
#define VM_NO_GUARD 0x00000040 /* don't add guard page */
#define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */
+#define VM_MAP_PUT_PAGES 0x00000100 /* put pages and free array in vfree */
/*
* VM_KASAN is used slighly differently depending on CONFIG_KASAN_VMALLOC.
@@ -121,6 +122,7 @@ extern void vfree_atomic(const void *addr);
extern void *vmap(struct page **pages, unsigned int count,
unsigned long flags, pgprot_t prot);
+void *vmap_pfn(unsigned long *pfns, unsigned int count, pgprot_t prot);
extern void vunmap(const void *addr);
extern int remap_vmalloc_range_partial(struct vm_area_struct *vma,
@@ -167,6 +169,7 @@ extern struct vm_struct *__get_vm_area_caller(unsigned long size,
unsigned long flags,
unsigned long start, unsigned long end,
const void *caller);
+void free_vm_area(struct vm_struct *area);
extern struct vm_struct *remove_vm_area(const void *addr);
extern struct vm_struct *find_vm_area(const void *addr);
@@ -202,10 +205,6 @@ static inline void set_vm_flush_reset_perms(void *addr)
}
#endif
-/* Allocate/destroy a 'vmalloc' VM area. */
-extern struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes);
-extern void free_vm_area(struct vm_struct *area);
-
/* for /dev/kmem */
extern long vread(char *buf, char *addr, unsigned long count);
extern long vwrite(char *buf, char *addr, unsigned long count);
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 7557c1070fd7..322dcbfcc933 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -28,7 +28,7 @@ struct reclaim_stat {
unsigned nr_writeback;
unsigned nr_immediate;
unsigned nr_pageout;
- unsigned nr_activate[2];
+ unsigned nr_activate[ANON_AND_FILE];
unsigned nr_ref_keep;
unsigned nr_unmap_fail;
unsigned nr_lazyfree_fail;
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index b4d70e7568b2..92c0160b3352 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -1286,6 +1286,8 @@ static inline bool xa_is_advanced(const void *entry)
*/
typedef void (*xa_update_node_t)(struct xa_node *node);
+void xa_delete_node(struct xa_node *, xa_update_node_t);
+
/*
* The xa_state is opaque to its users. It contains various different pieces
* of state involved in the current operation on the XArray. It should be
@@ -1505,6 +1507,28 @@ void xas_pause(struct xa_state *);
void xas_create_range(struct xa_state *);
+#ifdef CONFIG_XARRAY_MULTI
+int xa_get_order(struct xarray *, unsigned long index);
+void xas_split(struct xa_state *, void *entry, unsigned int order);
+void xas_split_alloc(struct xa_state *, void *entry, unsigned int order, gfp_t);
+#else
+static inline int xa_get_order(struct xarray *xa, unsigned long index)
+{
+ return 0;
+}
+
+static inline void xas_split(struct xa_state *xas, void *entry,
+ unsigned int order)
+{
+ xas_store(xas, entry);
+}
+
+static inline void xas_split_alloc(struct xa_state *xas, void *entry,
+ unsigned int order, gfp_t gfp)
+{
+}
+#endif
+
/**
* xas_reload() - Refetch an entry from the xarray.
* @xas: XArray operation state.
@@ -1522,10 +1546,21 @@ void xas_create_range(struct xa_state *);
static inline void *xas_reload(struct xa_state *xas)
{
struct xa_node *node = xas->xa_node;
-
- if (node)
- return xa_entry(xas->xa, node, xas->xa_offset);
- return xa_head(xas->xa);
+ void *entry;
+ char offset;
+
+ if (!node)
+ return xa_head(xas->xa);
+ if (IS_ENABLED(CONFIG_XARRAY_MULTI)) {
+ offset = (xas->xa_index >> node->shift) & XA_CHUNK_MASK;
+ entry = xa_entry(xas->xa, node, offset);
+ if (!xa_is_sibling(entry))
+ return entry;
+ offset = xa_to_sibling(entry);
+ } else {
+ offset = xas->xa_offset;
+ }
+ return xa_entry(xas->xa, node, offset);
}
/**
@@ -1714,13 +1749,12 @@ enum {
* @xas: XArray operation state.
* @entry: Entry retrieved from the array.
*
- * The loop body will be executed for each entry in the XArray that lies
- * within the range specified by @xas. If the loop completes successfully,
- * any entries that lie in this range will be replaced by @entry. The caller
- * may break out of the loop; if they do so, the contents of the XArray will
- * be unchanged. The operation may fail due to an out of memory condition.
- * The caller may also call xa_set_err() to exit the loop while setting an
- * error to record the reason.
+ * The loop body will be executed for each entry in the XArray that
+ * lies within the range specified by @xas. If the loop terminates
+ * normally, @entry will be %NULL. The user may break out of the loop,
+ * which will leave @entry set to the conflicting entry. The caller
+ * may also call xa_set_err() to exit the loop while setting an error
+ * to record the reason.
*/
#define xas_for_each_conflict(xas, entry) \
while ((entry = xas_find_conflict(xas)))
diff --git a/include/misc/ocxl.h b/include/misc/ocxl.h
index 357ef1aadbc0..e013736e275d 100644
--- a/include/misc/ocxl.h
+++ b/include/misc/ocxl.h
@@ -460,14 +460,8 @@ int ocxl_link_remove_pe(void *link_handle, int pasid);
* Allocate an AFU interrupt associated to the link.
*
* 'hw_irq' is the hardware interrupt number
- * 'obj_handle' is the 64-bit object handle to be passed to the AFU to
- * trigger the interrupt.
- * On P9, 'obj_handle' is an address, which, if written, triggers the
- * interrupt. It is an MMIO address which needs to be remapped (one
- * page).
- */
-int ocxl_link_irq_alloc(void *link_handle, int *hw_irq,
- u64 *obj_handle);
+ */
+int ocxl_link_irq_alloc(void *link_handle, int *hw_irq);
/*
* Free a previously allocated AFU interrupt
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index aee47f2b5709..661edfc8722e 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -474,6 +474,7 @@ struct ieee80211_sta_s1g_cap {
* @n_bitrates: Number of bitrates in @bitrates
* @ht_cap: HT capabilities in this band
* @vht_cap: VHT capabilities in this band
+ * @s1g_cap: S1G capabilities in this band
* @edmg_cap: EDMG capabilities in this band
* @s1g_cap: S1G capabilities in this band (S1B band only, of course)
* @n_iftype_data: number of iftype data entries
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index 36c5c5e38c1d..0bdbc0d17d2f 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -361,6 +361,7 @@ TRACE_EVENT(aer_event,
EM ( MF_MSG_POISONED_HUGE, "huge page already hardware poisoned" ) \
EM ( MF_MSG_HUGE, "huge page" ) \
EM ( MF_MSG_FREE_HUGE, "free huge page" ) \
+ EM ( MF_MSG_NON_PMD_HUGE, "non-pmd-sized huge page" ) \
EM ( MF_MSG_UNMAP_FAILED, "unmapping failed page" ) \
EM ( MF_MSG_DIRTY_SWAPCACHE, "dirty swapcache page" ) \
EM ( MF_MSG_CLEAN_SWAPCACHE, "clean swapcache page" ) \
@@ -373,6 +374,8 @@ TRACE_EVENT(aer_event,
EM ( MF_MSG_TRUNCATED_LRU, "already truncated LRU page" ) \
EM ( MF_MSG_BUDDY, "free buddy page" ) \
EM ( MF_MSG_BUDDY_2ND, "free buddy page (2nd try)" ) \
+ EM ( MF_MSG_DAX, "dax page" ) \
+ EM ( MF_MSG_UNSPLIT_THP, "unsplit thp" ) \
EMe ( MF_MSG_UNKNOWN, "unknown page" )
/*
diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h
index 66a8f369a2fa..bae29f50adff 100644
--- a/include/rdma/ib_cache.h
+++ b/include/rdma/ib_cache.h
@@ -110,5 +110,8 @@ const struct ib_gid_attr *rdma_get_gid_attr(struct ib_device *device,
u8 port_num, int index);
void rdma_put_gid_attr(const struct ib_gid_attr *attr);
void rdma_hold_gid_attr(const struct ib_gid_attr *attr);
+ssize_t rdma_query_gid_table(struct ib_device *device,
+ struct ib_uverbs_gid_entry *entries,
+ size_t max_entries);
#endif /* _IB_CACHE_H */
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index 382427add677..e23eb357b761 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -14,9 +14,6 @@
#include <rdma/ib_sa.h>
#include <rdma/rdma_cm.h>
-/* ib_cm and ib_user_cm modules share /sys/class/infiniband_cm */
-extern struct class cm_class;
-
enum ib_cm_state {
IB_CM_IDLE,
IB_CM_LISTEN,
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index 71f573a418bf..70597508c765 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -17,6 +17,7 @@ struct ib_umem_odp;
struct ib_umem {
struct ib_device *ibdev;
struct mm_struct *owning_mm;
+ u64 iova;
size_t length;
unsigned long address;
u32 writable : 1;
@@ -33,19 +34,46 @@ static inline int ib_umem_offset(struct ib_umem *umem)
return umem->address & ~PAGE_MASK;
}
+static inline size_t ib_umem_num_dma_blocks(struct ib_umem *umem,
+ unsigned long pgsz)
+{
+ return (size_t)((ALIGN(umem->iova + umem->length, pgsz) -
+ ALIGN_DOWN(umem->iova, pgsz))) /
+ pgsz;
+}
+
static inline size_t ib_umem_num_pages(struct ib_umem *umem)
{
- return (ALIGN(umem->address + umem->length, PAGE_SIZE) -
- ALIGN_DOWN(umem->address, PAGE_SIZE)) >>
- PAGE_SHIFT;
+ return ib_umem_num_dma_blocks(umem, PAGE_SIZE);
+}
+
+static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter,
+ struct ib_umem *umem,
+ unsigned long pgsz)
+{
+ __rdma_block_iter_start(biter, umem->sg_head.sgl, umem->nmap, pgsz);
}
+/**
+ * rdma_umem_for_each_dma_block - iterate over contiguous DMA blocks of the umem
+ * @umem: umem to iterate over
+ * @pgsz: Page size to split the list into
+ *
+ * pgsz must be <= PAGE_SIZE or computed by ib_umem_find_best_pgsz(). The
+ * returned DMA blocks will be aligned to pgsz and span the range:
+ * ALIGN_DOWN(umem->address, pgsz) to ALIGN(umem->address + umem->length, pgsz)
+ *
+ * Performs exactly ib_umem_num_dma_blocks() iterations.
+ */
+#define rdma_umem_for_each_dma_block(umem, biter, pgsz) \
+ for (__rdma_umem_block_iter_start(biter, umem, pgsz); \
+ __rdma_block_iter_next(biter);)
+
#ifdef CONFIG_INFINIBAND_USER_MEM
struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
size_t size, int access);
void ib_umem_release(struct ib_umem *umem);
-int ib_umem_page_count(struct ib_umem *umem);
int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
size_t length);
unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
@@ -63,15 +91,15 @@ static inline struct ib_umem *ib_umem_get(struct ib_device *device,
return ERR_PTR(-EINVAL);
}
static inline void ib_umem_release(struct ib_umem *umem) { }
-static inline int ib_umem_page_count(struct ib_umem *umem) { return 0; }
static inline int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
size_t length) {
return -EINVAL;
}
-static inline int ib_umem_find_best_pgsz(struct ib_umem *umem,
- unsigned long pgsz_bitmap,
- unsigned long virt) {
- return -EINVAL;
+static inline unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
+ unsigned long pgsz_bitmap,
+ unsigned long virt)
+{
+ return 0;
}
#endif /* CONFIG_INFINIBAND_USER_MEM */
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
index d16d2c17e733..0844c1d05ac6 100644
--- a/include/rdma/ib_umem_odp.h
+++ b/include/rdma/ib_umem_odp.h
@@ -14,17 +14,13 @@ struct ib_umem_odp {
struct mmu_interval_notifier notifier;
struct pid *tgid;
+ /* An array of the pfns included in the on-demand paging umem. */
+ unsigned long *pfn_list;
+
/*
- * An array of the pages included in the on-demand paging umem.
- * Indices of pages that are currently not mapped into the device will
- * contain NULL.
- */
- struct page **page_list;
- /*
- * An array of the same size as page_list, with DMA addresses mapped
- * for pages the pages in page_list. The lower two bits designate
- * access permissions. See ODP_READ_ALLOWED_BIT and
- * ODP_WRITE_ALLOWED_BIT.
+ * An array with DMA addresses mapped for pfns in pfn_list.
+ * The lower two bits designate access permissions.
+ * See ODP_READ_ALLOWED_BIT and ODP_WRITE_ALLOWED_BIT.
*/
dma_addr_t *dma_list;
/*
@@ -97,9 +93,8 @@ ib_umem_odp_alloc_child(struct ib_umem_odp *root_umem, unsigned long addr,
const struct mmu_interval_notifier_ops *ops);
void ib_umem_odp_release(struct ib_umem_odp *umem_odp);
-int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset,
- u64 bcnt, u64 access_mask,
- unsigned long current_seq);
+int ib_umem_odp_map_dma_and_lock(struct ib_umem_odp *umem_odp, u64 start_offset,
+ u64 bcnt, u64 access_mask, bool fault);
void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset,
u64 bound);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index c0b2fa7e9b95..9bf6c319a670 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -138,10 +138,9 @@ union ib_gid {
extern union ib_gid zgid;
enum ib_gid_type {
- /* If link layer is Ethernet, this is RoCE V1 */
- IB_GID_TYPE_IB = 0,
- IB_GID_TYPE_ROCE = 0,
- IB_GID_TYPE_ROCE_UDP_ENCAP = 1,
+ IB_GID_TYPE_IB = IB_UVERBS_GID_TYPE_IB,
+ IB_GID_TYPE_ROCE = IB_UVERBS_GID_TYPE_ROCE_V1,
+ IB_GID_TYPE_ROCE_UDP_ENCAP = IB_UVERBS_GID_TYPE_ROCE_V2,
IB_GID_TYPE_SIZE
};
@@ -180,7 +179,7 @@ rdma_node_get_transport(unsigned int node_type);
enum rdma_network_type {
RDMA_NETWORK_IB,
- RDMA_NETWORK_ROCE_V1 = RDMA_NETWORK_IB,
+ RDMA_NETWORK_ROCE_V1,
RDMA_NETWORK_IPV4,
RDMA_NETWORK_IPV6
};
@@ -190,9 +189,10 @@ static inline enum ib_gid_type ib_network_to_gid_type(enum rdma_network_type net
if (network_type == RDMA_NETWORK_IPV4 ||
network_type == RDMA_NETWORK_IPV6)
return IB_GID_TYPE_ROCE_UDP_ENCAP;
-
- /* IB_GID_TYPE_IB same as RDMA_NETWORK_ROCE_V1 */
- return IB_GID_TYPE_IB;
+ else if (network_type == RDMA_NETWORK_ROCE_V1)
+ return IB_GID_TYPE_ROCE;
+ else
+ return IB_GID_TYPE_IB;
}
static inline enum rdma_network_type
@@ -201,6 +201,9 @@ rdma_gid_attr_network_type(const struct ib_gid_attr *attr)
if (attr->gid_type == IB_GID_TYPE_IB)
return RDMA_NETWORK_IB;
+ if (attr->gid_type == IB_GID_TYPE_ROCE)
+ return RDMA_NETWORK_ROCE_V1;
+
if (ipv6_addr_v4mapped((struct in6_addr *)&attr->gid))
return RDMA_NETWORK_IPV4;
else
@@ -535,7 +538,8 @@ enum ib_port_speed {
IB_SPEED_FDR10 = 8,
IB_SPEED_FDR = 16,
IB_SPEED_EDR = 32,
- IB_SPEED_HDR = 64
+ IB_SPEED_HDR = 64,
+ IB_SPEED_NDR = 128,
};
/**
@@ -669,7 +673,7 @@ struct ib_port_attr {
u8 subnet_timeout;
u8 init_type_reply;
u8 active_width;
- u8 active_speed;
+ u16 active_speed;
u8 phys_state;
u16 port_cap_flags2;
};
@@ -952,13 +956,14 @@ enum ib_wc_status {
const char *__attribute_const__ ib_wc_status_msg(enum ib_wc_status status);
enum ib_wc_opcode {
- IB_WC_SEND,
- IB_WC_RDMA_WRITE,
- IB_WC_RDMA_READ,
- IB_WC_COMP_SWAP,
- IB_WC_FETCH_ADD,
- IB_WC_LSO,
- IB_WC_LOCAL_INV,
+ IB_WC_SEND = IB_UVERBS_WC_SEND,
+ IB_WC_RDMA_WRITE = IB_UVERBS_WC_RDMA_WRITE,
+ IB_WC_RDMA_READ = IB_UVERBS_WC_RDMA_READ,
+ IB_WC_COMP_SWAP = IB_UVERBS_WC_COMP_SWAP,
+ IB_WC_FETCH_ADD = IB_UVERBS_WC_FETCH_ADD,
+ IB_WC_BIND_MW = IB_UVERBS_WC_BIND_MW,
+ IB_WC_LOCAL_INV = IB_UVERBS_WC_LOCAL_INV,
+ IB_WC_LSO = IB_UVERBS_WC_TSO,
IB_WC_REG_MR,
IB_WC_MASKED_COMP_SWAP,
IB_WC_MASKED_FETCH_ADD,
@@ -1291,6 +1296,7 @@ enum ib_wr_opcode {
IB_WR_RDMA_READ = IB_UVERBS_WR_RDMA_READ,
IB_WR_ATOMIC_CMP_AND_SWP = IB_UVERBS_WR_ATOMIC_CMP_AND_SWP,
IB_WR_ATOMIC_FETCH_AND_ADD = IB_UVERBS_WR_ATOMIC_FETCH_AND_ADD,
+ IB_WR_BIND_MW = IB_UVERBS_WR_BIND_MW,
IB_WR_LSO = IB_UVERBS_WR_TSO,
IB_WR_SEND_WITH_INV = IB_UVERBS_WR_SEND_WITH_INV,
IB_WR_RDMA_READ_WITH_INV = IB_UVERBS_WR_RDMA_READ_WITH_INV,
@@ -1463,11 +1469,6 @@ enum rdma_remove_reason {
RDMA_REMOVE_DRIVER_REMOVE,
/* uobj is being cleaned-up before being committed */
RDMA_REMOVE_ABORT,
- /*
- * uobj has been fully created, with the uobj->object set, but is being
- * cleaned up before being comitted
- */
- RDMA_REMOVE_ABORT_HWOBJ,
};
struct ib_rdmacg_object {
@@ -1479,12 +1480,6 @@ struct ib_rdmacg_object {
struct ib_ucontext {
struct ib_device *device;
struct ib_uverbs_file *ufile;
- /*
- * 'closing' can be read by the driver only during a destroy callback,
- * it is set when we are closing the file descriptor and indicates
- * that mm_sem may be locked.
- */
- bool closing;
bool cleanup_retryable;
@@ -1863,17 +1858,6 @@ enum ib_flow_spec_type {
#define IB_FLOW_SPEC_LAYER_MASK 0xF0
#define IB_FLOW_SPEC_SUPPORT_LAYERS 10
-/* Flow steering rule priority is set according to it's domain.
- * Lower domain value means higher priority.
- */
-enum ib_flow_domain {
- IB_FLOW_DOMAIN_USER,
- IB_FLOW_DOMAIN_ETHTOOL,
- IB_FLOW_DOMAIN_RFS,
- IB_FLOW_DOMAIN_NIC,
- IB_FLOW_DOMAIN_NUM /* Must be last */
-};
-
enum ib_flow_flags {
IB_FLOW_ATTR_FLAGS_DONT_TRAP = 1UL << 1, /* Continue match, no steal */
IB_FLOW_ATTR_FLAGS_EGRESS = 1UL << 2, /* Egress flow */
@@ -2414,12 +2398,12 @@ struct ib_device_ops {
void (*mmap_free)(struct rdma_user_mmap_entry *entry);
void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
- void (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
+ int (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
int (*create_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr,
struct ib_udata *udata);
int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
- void (*destroy_ah)(struct ib_ah *ah, u32 flags);
+ int (*destroy_ah)(struct ib_ah *ah, u32 flags);
int (*create_srq)(struct ib_srq *srq,
struct ib_srq_init_attr *srq_init_attr,
struct ib_udata *udata);
@@ -2427,7 +2411,7 @@ struct ib_device_ops {
enum ib_srq_attr_mask srq_attr_mask,
struct ib_udata *udata);
int (*query_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
- void (*destroy_srq)(struct ib_srq *srq, struct ib_udata *udata);
+ int (*destroy_srq)(struct ib_srq *srq, struct ib_udata *udata);
struct ib_qp *(*create_qp)(struct ib_pd *pd,
struct ib_qp_init_attr *qp_init_attr,
struct ib_udata *udata);
@@ -2439,7 +2423,7 @@ struct ib_device_ops {
int (*create_cq)(struct ib_cq *cq, const struct ib_cq_init_attr *attr,
struct ib_udata *udata);
int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period);
- void (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata);
+ int (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata);
int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata);
struct ib_mr *(*get_dma_mr)(struct ib_pd *pd, int mr_access_flags);
struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length,
@@ -2462,16 +2446,15 @@ struct ib_device_ops {
unsigned int *sg_offset);
int (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status);
- struct ib_mw *(*alloc_mw)(struct ib_pd *pd, enum ib_mw_type type,
- struct ib_udata *udata);
+ int (*alloc_mw)(struct ib_mw *mw, struct ib_udata *udata);
int (*dealloc_mw)(struct ib_mw *mw);
int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
int (*alloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata);
- void (*dealloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata);
+ int (*dealloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata);
struct ib_flow *(*create_flow)(struct ib_qp *qp,
struct ib_flow_attr *flow_attr,
- int domain, struct ib_udata *udata);
+ struct ib_udata *udata);
int (*destroy_flow)(struct ib_flow *flow_id);
struct ib_flow_action *(*create_flow_action_esp)(
struct ib_device *device,
@@ -2496,13 +2479,12 @@ struct ib_device_ops {
struct ib_wq *(*create_wq)(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr,
struct ib_udata *udata);
- void (*destroy_wq)(struct ib_wq *wq, struct ib_udata *udata);
+ int (*destroy_wq)(struct ib_wq *wq, struct ib_udata *udata);
int (*modify_wq)(struct ib_wq *wq, struct ib_wq_attr *attr,
u32 wq_attr_mask, struct ib_udata *udata);
- struct ib_rwq_ind_table *(*create_rwq_ind_table)(
- struct ib_device *device,
- struct ib_rwq_ind_table_init_attr *init_attr,
- struct ib_udata *udata);
+ int (*create_rwq_ind_table)(struct ib_rwq_ind_table *ib_rwq_ind_table,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata);
int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table);
struct ib_dm *(*alloc_dm)(struct ib_device *device,
struct ib_ucontext *context,
@@ -2514,7 +2496,7 @@ struct ib_device_ops {
struct uverbs_attr_bundle *attrs);
int (*create_counters)(struct ib_counters *counters,
struct uverbs_attr_bundle *attrs);
- void (*destroy_counters)(struct ib_counters *counters);
+ int (*destroy_counters)(struct ib_counters *counters);
int (*read_counters)(struct ib_counters *counters,
struct ib_counters_read_attr *counters_read_attr,
struct uverbs_attr_bundle *attrs);
@@ -2624,7 +2606,9 @@ struct ib_device_ops {
DECLARE_RDMA_OBJ_SIZE(ib_ah);
DECLARE_RDMA_OBJ_SIZE(ib_counters);
DECLARE_RDMA_OBJ_SIZE(ib_cq);
+ DECLARE_RDMA_OBJ_SIZE(ib_mw);
DECLARE_RDMA_OBJ_SIZE(ib_pd);
+ DECLARE_RDMA_OBJ_SIZE(ib_rwq_ind_table);
DECLARE_RDMA_OBJ_SIZE(ib_srq);
DECLARE_RDMA_OBJ_SIZE(ib_ucontext);
DECLARE_RDMA_OBJ_SIZE(ib_xrcd);
@@ -2798,7 +2782,8 @@ void ib_dealloc_device(struct ib_device *device);
void ib_get_device_fw_str(struct ib_device *device, char *str);
-int ib_register_device(struct ib_device *device, const char *name);
+int ib_register_device(struct ib_device *device, const char *name,
+ struct device *dma_device);
void ib_unregister_device(struct ib_device *device);
void ib_unregister_driver(enum rdma_driver_id driver_id);
void ib_unregister_device_and_put(struct ib_device *device);
@@ -3352,30 +3337,6 @@ static inline bool rdma_cap_read_inv(struct ib_device *dev, u32 port_num)
}
/**
- * rdma_find_pg_bit - Find page bit given address and HW supported page sizes
- *
- * @addr: address
- * @pgsz_bitmap: bitmap of HW supported page sizes
- */
-static inline unsigned int rdma_find_pg_bit(unsigned long addr,
- unsigned long pgsz_bitmap)
-{
- unsigned long align;
- unsigned long pgsz;
-
- align = addr & -addr;
-
- /* Find page bit such that addr is aligned to the highest supported
- * HW page size
- */
- pgsz = pgsz_bitmap & ~(-align << 1);
- if (!pgsz)
- return __ffs(pgsz_bitmap);
-
- return __fls(pgsz);
-}
-
-/**
* rdma_core_cap_opa_port - Return whether the RDMA Port is OPA or not.
* @device: Device
* @port_num: 1 based Port number
@@ -3472,12 +3433,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
#define ib_alloc_pd(device, flags) \
__ib_alloc_pd((device), (flags), KBUILD_MODNAME)
-/**
- * ib_dealloc_pd_user - Deallocate kernel/user PD
- * @pd: The protection domain
- * @udata: Valid user data or NULL for kernel objects
- */
-void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata);
+int ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata);
/**
* ib_dealloc_pd - Deallocate kernel PD
@@ -3487,7 +3443,9 @@ void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata);
*/
static inline void ib_dealloc_pd(struct ib_pd *pd)
{
- ib_dealloc_pd_user(pd, NULL);
+ int ret = ib_dealloc_pd_user(pd, NULL);
+
+ WARN_ONCE(ret, "Destroy of kernel PD shouldn't fail");
}
enum rdma_create_ah_flags {
@@ -3615,9 +3573,11 @@ int rdma_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata);
*
* NOTE: for user ah use rdma_destroy_ah_user with valid udata!
*/
-static inline int rdma_destroy_ah(struct ib_ah *ah, u32 flags)
+static inline void rdma_destroy_ah(struct ib_ah *ah, u32 flags)
{
- return rdma_destroy_ah_user(ah, flags, NULL);
+ int ret = rdma_destroy_ah_user(ah, flags, NULL);
+
+ WARN_ONCE(ret, "Destroy of kernel AH shouldn't fail");
}
struct ib_srq *ib_create_srq_user(struct ib_pd *pd,
@@ -3671,9 +3631,11 @@ int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata);
*
* NOTE: for user srq use ib_destroy_srq_user with valid udata!
*/
-static inline int ib_destroy_srq(struct ib_srq *srq)
+static inline void ib_destroy_srq(struct ib_srq *srq)
{
- return ib_destroy_srq_user(srq, NULL);
+ int ret = ib_destroy_srq_user(srq, NULL);
+
+ WARN_ONCE(ret, "Destroy of kernel SRQ shouldn't fail");
}
/**
@@ -3817,46 +3779,15 @@ static inline int ib_post_recv(struct ib_qp *qp,
return qp->device->ops.post_recv(qp, recv_wr, bad_recv_wr ? : &dummy);
}
-struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
- int nr_cqe, int comp_vector,
- enum ib_poll_context poll_ctx,
- const char *caller, struct ib_udata *udata);
-
-/**
- * ib_alloc_cq_user: Allocate kernel/user CQ
- * @dev: The IB device
- * @private: Private data attached to the CQE
- * @nr_cqe: Number of CQEs in the CQ
- * @comp_vector: Completion vector used for the IRQs
- * @poll_ctx: Context used for polling the CQ
- * @udata: Valid user data or NULL for kernel objects
- */
-static inline struct ib_cq *ib_alloc_cq_user(struct ib_device *dev,
- void *private, int nr_cqe,
- int comp_vector,
- enum ib_poll_context poll_ctx,
- struct ib_udata *udata)
-{
- return __ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx,
- KBUILD_MODNAME, udata);
-}
-
-/**
- * ib_alloc_cq: Allocate kernel CQ
- * @dev: The IB device
- * @private: Private data attached to the CQE
- * @nr_cqe: Number of CQEs in the CQ
- * @comp_vector: Completion vector used for the IRQs
- * @poll_ctx: Context used for polling the CQ
- *
- * NOTE: for user cq use ib_alloc_cq_user with valid udata!
- */
+struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, int nr_cqe,
+ int comp_vector, enum ib_poll_context poll_ctx,
+ const char *caller);
static inline struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
int nr_cqe, int comp_vector,
enum ib_poll_context poll_ctx)
{
- return ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx,
- NULL);
+ return __ib_alloc_cq(dev, private, nr_cqe, comp_vector, poll_ctx,
+ KBUILD_MODNAME);
}
struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private,
@@ -3878,26 +3809,7 @@ static inline struct ib_cq *ib_alloc_cq_any(struct ib_device *dev,
KBUILD_MODNAME);
}
-/**
- * ib_free_cq_user - Free kernel/user CQ
- * @cq: The CQ to free
- * @udata: Valid user data or NULL for kernel objects
- *
- * NOTE: This function shouldn't be called on shared CQs.
- */
-void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata);
-
-/**
- * ib_free_cq - Free kernel CQ
- * @cq: The CQ to free
- *
- * NOTE: for user cq use ib_free_cq_user with valid udata!
- */
-static inline void ib_free_cq(struct ib_cq *cq)
-{
- ib_free_cq_user(cq, NULL);
-}
-
+void ib_free_cq(struct ib_cq *cq);
int ib_process_cq_direct(struct ib_cq *cq, int budget);
/**
@@ -3955,7 +3867,9 @@ int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata);
*/
static inline void ib_destroy_cq(struct ib_cq *cq)
{
- ib_destroy_cq_user(cq, NULL);
+ int ret = ib_destroy_cq_user(cq, NULL);
+
+ WARN_ONCE(ret, "Destroy of kernel CQ shouldn't fail");
}
/**
@@ -4379,10 +4293,9 @@ struct net_device *ib_device_netdev(struct ib_device *dev, u8 port);
struct ib_wq *ib_create_wq(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr);
-int ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata);
+int ib_destroy_wq_user(struct ib_wq *wq, struct ib_udata *udata);
int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *attr,
u32 wq_attr_mask);
-int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset, unsigned int page_size);
@@ -4410,7 +4323,7 @@ void ib_drain_rq(struct ib_qp *qp);
void ib_drain_sq(struct ib_qp *qp);
void ib_drain_qp(struct ib_qp *qp);
-int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width);
+int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u16 *speed, u8 *width);
static inline u8 *rdma_ah_retrieve_dmac(struct rdma_ah_attr *attr)
{
@@ -4717,6 +4630,7 @@ bool rdma_dev_access_netns(const struct ib_device *device,
const struct net *net);
#define IB_ROCE_UDP_ENCAP_VALID_PORT_MIN (0xC000)
+#define IB_ROCE_UDP_ENCAP_VALID_PORT_MAX (0xFFFF)
#define IB_GRH_FLOWLABEL_MASK (0x000FFFFF)
/**
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index cf5da2ae49bf..c672ae1da26b 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -110,11 +110,14 @@ struct rdma_cm_id {
u8 port_num;
};
-struct rdma_cm_id *__rdma_create_id(struct net *net,
- rdma_cm_event_handler event_handler,
- void *context, enum rdma_ucm_port_space ps,
- enum ib_qp_type qp_type,
- const char *caller);
+struct rdma_cm_id *
+__rdma_create_kernel_id(struct net *net, rdma_cm_event_handler event_handler,
+ void *context, enum rdma_ucm_port_space ps,
+ enum ib_qp_type qp_type, const char *caller);
+struct rdma_cm_id *rdma_create_user_id(rdma_cm_event_handler event_handler,
+ void *context,
+ enum rdma_ucm_port_space ps,
+ enum ib_qp_type qp_type);
/**
* rdma_create_id - Create an RDMA identifier.
@@ -132,9 +135,9 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
* The event handler callback serializes on the id's mutex and is
* allowed to sleep.
*/
-#define rdma_create_id(net, event_handler, context, ps, qp_type) \
- __rdma_create_id((net), (event_handler), (context), (ps), (qp_type), \
- KBUILD_MODNAME)
+#define rdma_create_id(net, event_handler, context, ps, qp_type) \
+ __rdma_create_kernel_id(net, event_handler, context, ps, qp_type, \
+ KBUILD_MODNAME)
/**
* rdma_destroy_id - Destroys an RDMA identifier.
@@ -250,29 +253,12 @@ int rdma_connect_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
*/
int rdma_listen(struct rdma_cm_id *id, int backlog);
-int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
- const char *caller);
+int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param);
-int __rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
- const char *caller, struct rdma_ucm_ece *ece);
-
-/**
- * rdma_accept - Called to accept a connection request or response.
- * @id: Connection identifier associated with the request.
- * @conn_param: Information needed to establish the connection. This must be
- * provided if accepting a connection request. If accepting a connection
- * response, this parameter must be NULL.
- *
- * Typically, this routine is only called by the listener to accept a connection
- * request. It must also be called on the active side of a connection if the
- * user is performing their own QP transitions.
- *
- * In the case of error, a reject message is sent to the remote side and the
- * state of the qp associated with the id is modified to error, such that any
- * previously posted receive buffers would be flushed.
- */
-#define rdma_accept(id, conn_param) \
- __rdma_accept((id), (conn_param), KBUILD_MODNAME)
+void rdma_lock_handler(struct rdma_cm_id *id);
+void rdma_unlock_handler(struct rdma_cm_id *id);
+int rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
+ struct rdma_ucm_ece *ece);
/**
* rdma_notify - Notifies the RDMA CM of an asynchronous event that has
diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
index 7682d1bcf789..d3a1cc5be7bc 100644
--- a/include/rdma/restrack.h
+++ b/include/rdma/restrack.h
@@ -106,22 +106,11 @@ struct rdma_restrack_entry {
int rdma_restrack_count(struct ib_device *dev,
enum rdma_restrack_type type);
-
-void rdma_restrack_kadd(struct rdma_restrack_entry *res);
-void rdma_restrack_uadd(struct rdma_restrack_entry *res);
-
-/**
- * rdma_restrack_del() - delete object from the reource tracking database
- * @res: resource entry
- * @type: actual type of object to operate
- */
-void rdma_restrack_del(struct rdma_restrack_entry *res);
-
/**
* rdma_is_kernel_res() - check the owner of resource
* @res: resource entry
*/
-static inline bool rdma_is_kernel_res(struct rdma_restrack_entry *res)
+static inline bool rdma_is_kernel_res(const struct rdma_restrack_entry *res)
{
return !res->user;
}
@@ -138,14 +127,6 @@ int __must_check rdma_restrack_get(struct rdma_restrack_entry *res);
*/
int rdma_restrack_put(struct rdma_restrack_entry *res);
-/**
- * rdma_restrack_set_task() - set the task for this resource
- * @res: resource entry
- * @caller: kernel name, the current task will be used if the caller is NULL.
- */
-void rdma_restrack_set_task(struct rdma_restrack_entry *res,
- const char *caller);
-
/*
* Helper functions for rdma drivers when filling out
* nldev driver attributes.
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index 5f0c1cf1ea13..8eb49231c6bb 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -40,6 +40,7 @@ enum afs_server_trace {
afs_server_trace_get_new_cbi,
afs_server_trace_get_probe,
afs_server_trace_give_up_cb,
+ afs_server_trace_purging,
afs_server_trace_put_call,
afs_server_trace_put_cbi,
afs_server_trace_put_find_rsq,
@@ -50,6 +51,7 @@ enum afs_server_trace {
afs_server_trace_update,
};
+
enum afs_volume_trace {
afs_volume_trace_alloc,
afs_volume_trace_free,
@@ -67,6 +69,46 @@ enum afs_volume_trace {
afs_volume_trace_remove,
};
+enum afs_cell_trace {
+ afs_cell_trace_alloc,
+ afs_cell_trace_free,
+ afs_cell_trace_get_queue_dns,
+ afs_cell_trace_get_queue_manage,
+ afs_cell_trace_get_queue_new,
+ afs_cell_trace_get_vol,
+ afs_cell_trace_insert,
+ afs_cell_trace_manage,
+ afs_cell_trace_put_candidate,
+ afs_cell_trace_put_destroy,
+ afs_cell_trace_put_queue_fail,
+ afs_cell_trace_put_queue_work,
+ afs_cell_trace_put_vol,
+ afs_cell_trace_see_source,
+ afs_cell_trace_see_ws,
+ afs_cell_trace_unuse_alias,
+ afs_cell_trace_unuse_check_alias,
+ afs_cell_trace_unuse_delete,
+ afs_cell_trace_unuse_fc,
+ afs_cell_trace_unuse_lookup,
+ afs_cell_trace_unuse_mntpt,
+ afs_cell_trace_unuse_no_pin,
+ afs_cell_trace_unuse_parse,
+ afs_cell_trace_unuse_pin,
+ afs_cell_trace_unuse_probe,
+ afs_cell_trace_unuse_sbi,
+ afs_cell_trace_unuse_ws,
+ afs_cell_trace_use_alias,
+ afs_cell_trace_use_check_alias,
+ afs_cell_trace_use_fc,
+ afs_cell_trace_use_fc_alias,
+ afs_cell_trace_use_lookup,
+ afs_cell_trace_use_mntpt,
+ afs_cell_trace_use_pin,
+ afs_cell_trace_use_probe,
+ afs_cell_trace_use_sbi,
+ afs_cell_trace_wait,
+};
+
enum afs_fs_operation {
afs_FS_FetchData = 130, /* AFS Fetch file data */
afs_FS_FetchACL = 131, /* AFS Fetch file ACL */
@@ -270,6 +312,7 @@ enum afs_cb_break_reason {
EM(afs_server_trace_get_new_cbi, "GET cbi ") \
EM(afs_server_trace_get_probe, "GET probe") \
EM(afs_server_trace_give_up_cb, "giveup-cb") \
+ EM(afs_server_trace_purging, "PURGE ") \
EM(afs_server_trace_put_call, "PUT call ") \
EM(afs_server_trace_put_cbi, "PUT cbi ") \
EM(afs_server_trace_put_find_rsq, "PUT f-rsq") \
@@ -295,6 +338,44 @@ enum afs_cb_break_reason {
EM(afs_volume_trace_put_validate_fc, "PUT fc-validat") \
E_(afs_volume_trace_remove, "REMOVE ")
+#define afs_cell_traces \
+ EM(afs_cell_trace_alloc, "ALLOC ") \
+ EM(afs_cell_trace_free, "FREE ") \
+ EM(afs_cell_trace_get_queue_dns, "GET q-dns ") \
+ EM(afs_cell_trace_get_queue_manage, "GET q-mng ") \
+ EM(afs_cell_trace_get_queue_new, "GET q-new ") \
+ EM(afs_cell_trace_get_vol, "GET vol ") \
+ EM(afs_cell_trace_insert, "INSERT ") \
+ EM(afs_cell_trace_manage, "MANAGE ") \
+ EM(afs_cell_trace_put_candidate, "PUT candid") \
+ EM(afs_cell_trace_put_destroy, "PUT destry") \
+ EM(afs_cell_trace_put_queue_work, "PUT q-work") \
+ EM(afs_cell_trace_put_queue_fail, "PUT q-fail") \
+ EM(afs_cell_trace_put_vol, "PUT vol ") \
+ EM(afs_cell_trace_see_source, "SEE source") \
+ EM(afs_cell_trace_see_ws, "SEE ws ") \
+ EM(afs_cell_trace_unuse_alias, "UNU alias ") \
+ EM(afs_cell_trace_unuse_check_alias, "UNU chk-al") \
+ EM(afs_cell_trace_unuse_delete, "UNU delete") \
+ EM(afs_cell_trace_unuse_fc, "UNU fc ") \
+ EM(afs_cell_trace_unuse_lookup, "UNU lookup") \
+ EM(afs_cell_trace_unuse_mntpt, "UNU mntpt ") \
+ EM(afs_cell_trace_unuse_parse, "UNU parse ") \
+ EM(afs_cell_trace_unuse_pin, "UNU pin ") \
+ EM(afs_cell_trace_unuse_probe, "UNU probe ") \
+ EM(afs_cell_trace_unuse_sbi, "UNU sbi ") \
+ EM(afs_cell_trace_unuse_ws, "UNU ws ") \
+ EM(afs_cell_trace_use_alias, "USE alias ") \
+ EM(afs_cell_trace_use_check_alias, "USE chk-al") \
+ EM(afs_cell_trace_use_fc, "USE fc ") \
+ EM(afs_cell_trace_use_fc_alias, "USE fc-al ") \
+ EM(afs_cell_trace_use_lookup, "USE lookup") \
+ EM(afs_cell_trace_use_mntpt, "USE mntpt ") \
+ EM(afs_cell_trace_use_pin, "USE pin ") \
+ EM(afs_cell_trace_use_probe, "USE probe ") \
+ EM(afs_cell_trace_use_sbi, "USE sbi ") \
+ E_(afs_cell_trace_wait, "WAIT ")
+
#define afs_fs_operations \
EM(afs_FS_FetchData, "FS.FetchData") \
EM(afs_FS_FetchStatus, "FS.FetchStatus") \
@@ -483,6 +564,7 @@ enum afs_cb_break_reason {
afs_call_traces;
afs_server_traces;
+afs_cell_traces;
afs_fs_operations;
afs_vl_operations;
afs_edit_dir_ops;
@@ -1358,6 +1440,33 @@ TRACE_EVENT(afs_volume,
__entry->ref)
);
+TRACE_EVENT(afs_cell,
+ TP_PROTO(unsigned int cell_debug_id, int usage, int active,
+ enum afs_cell_trace reason),
+
+ TP_ARGS(cell_debug_id, usage, active, reason),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, cell )
+ __field(int, usage )
+ __field(int, active )
+ __field(int, reason )
+ ),
+
+ TP_fast_assign(
+ __entry->cell = cell_debug_id;
+ __entry->usage = usage;
+ __entry->active = active;
+ __entry->reason = reason;
+ ),
+
+ TP_printk("L=%08x %s u=%d a=%d",
+ __entry->cell,
+ __print_symbolic(__entry->reason, afs_cell_traces),
+ __entry->usage,
+ __entry->active)
+ );
+
#endif /* _TRACE_AFS_H */
/* This part must be outside protection */
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 8a1c1311acac..f8f1e85ff130 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -111,13 +111,15 @@ TRACE_DEFINE_ENUM(CP_RESIZE);
#define show_alloc_mode(type) \
__print_symbolic(type, \
- { LFS, "LFS-mode" }, \
- { SSR, "SSR-mode" })
+ { LFS, "LFS-mode" }, \
+ { SSR, "SSR-mode" }, \
+ { AT_SSR, "AT_SSR-mode" })
#define show_victim_policy(type) \
__print_symbolic(type, \
{ GC_GREEDY, "Greedy" }, \
- { GC_CB, "Cost-Benefit" })
+ { GC_CB, "Cost-Benefit" }, \
+ { GC_AT, "Age-threshold" })
#define show_cpreason(type) \
__print_flags(type, "|", \
@@ -134,7 +136,7 @@ TRACE_DEFINE_ENUM(CP_RESIZE);
__print_symbolic(type, \
{ CP_NO_NEEDED, "no needed" }, \
{ CP_NON_REGULAR, "non regular" }, \
- { CP_COMPRESSED, "compreesed" }, \
+ { CP_COMPRESSED, "compressed" }, \
{ CP_HARDLINK, "hardlink" }, \
{ CP_SB_NEED_CP, "sb needs cp" }, \
{ CP_WRONG_PINO, "wrong pino" }, \
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index ced71237b7e4..155b5cb43cfd 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -74,17 +74,17 @@ TRACE_EVENT_RCU(rcu_grace_period,
TP_STRUCT__entry(
__field(const char *, rcuname)
- __field(unsigned long, gp_seq)
+ __field(long, gp_seq)
__field(const char *, gpevent)
),
TP_fast_assign(
__entry->rcuname = rcuname;
- __entry->gp_seq = gp_seq;
+ __entry->gp_seq = (long)gp_seq;
__entry->gpevent = gpevent;
),
- TP_printk("%s %lu %s",
+ TP_printk("%s %ld %s",
__entry->rcuname, __entry->gp_seq, __entry->gpevent)
);
@@ -114,8 +114,8 @@ TRACE_EVENT_RCU(rcu_future_grace_period,
TP_STRUCT__entry(
__field(const char *, rcuname)
- __field(unsigned long, gp_seq)
- __field(unsigned long, gp_seq_req)
+ __field(long, gp_seq)
+ __field(long, gp_seq_req)
__field(u8, level)
__field(int, grplo)
__field(int, grphi)
@@ -124,16 +124,16 @@ TRACE_EVENT_RCU(rcu_future_grace_period,
TP_fast_assign(
__entry->rcuname = rcuname;
- __entry->gp_seq = gp_seq;
- __entry->gp_seq_req = gp_seq_req;
+ __entry->gp_seq = (long)gp_seq;
+ __entry->gp_seq_req = (long)gp_seq_req;
__entry->level = level;
__entry->grplo = grplo;
__entry->grphi = grphi;
__entry->gpevent = gpevent;
),
- TP_printk("%s %lu %lu %u %d %d %s",
- __entry->rcuname, __entry->gp_seq, __entry->gp_seq_req, __entry->level,
+ TP_printk("%s %ld %ld %u %d %d %s",
+ __entry->rcuname, (long)__entry->gp_seq, (long)__entry->gp_seq_req, __entry->level,
__entry->grplo, __entry->grphi, __entry->gpevent)
);
@@ -153,7 +153,7 @@ TRACE_EVENT_RCU(rcu_grace_period_init,
TP_STRUCT__entry(
__field(const char *, rcuname)
- __field(unsigned long, gp_seq)
+ __field(long, gp_seq)
__field(u8, level)
__field(int, grplo)
__field(int, grphi)
@@ -162,14 +162,14 @@ TRACE_EVENT_RCU(rcu_grace_period_init,
TP_fast_assign(
__entry->rcuname = rcuname;
- __entry->gp_seq = gp_seq;
+ __entry->gp_seq = (long)gp_seq;
__entry->level = level;
__entry->grplo = grplo;
__entry->grphi = grphi;
__entry->qsmask = qsmask;
),
- TP_printk("%s %lu %u %d %d %lx",
+ TP_printk("%s %ld %u %d %d %lx",
__entry->rcuname, __entry->gp_seq, __entry->level,
__entry->grplo, __entry->grphi, __entry->qsmask)
);
@@ -197,17 +197,17 @@ TRACE_EVENT_RCU(rcu_exp_grace_period,
TP_STRUCT__entry(
__field(const char *, rcuname)
- __field(unsigned long, gpseq)
+ __field(long, gpseq)
__field(const char *, gpevent)
),
TP_fast_assign(
__entry->rcuname = rcuname;
- __entry->gpseq = gpseq;
+ __entry->gpseq = (long)gpseq;
__entry->gpevent = gpevent;
),
- TP_printk("%s %lu %s",
+ TP_printk("%s %ld %s",
__entry->rcuname, __entry->gpseq, __entry->gpevent)
);
@@ -316,17 +316,17 @@ TRACE_EVENT_RCU(rcu_preempt_task,
TP_STRUCT__entry(
__field(const char *, rcuname)
- __field(unsigned long, gp_seq)
+ __field(long, gp_seq)
__field(int, pid)
),
TP_fast_assign(
__entry->rcuname = rcuname;
- __entry->gp_seq = gp_seq;
+ __entry->gp_seq = (long)gp_seq;
__entry->pid = pid;
),
- TP_printk("%s %lu %d",
+ TP_printk("%s %ld %d",
__entry->rcuname, __entry->gp_seq, __entry->pid)
);
@@ -343,17 +343,17 @@ TRACE_EVENT_RCU(rcu_unlock_preempted_task,
TP_STRUCT__entry(
__field(const char *, rcuname)
- __field(unsigned long, gp_seq)
+ __field(long, gp_seq)
__field(int, pid)
),
TP_fast_assign(
__entry->rcuname = rcuname;
- __entry->gp_seq = gp_seq;
+ __entry->gp_seq = (long)gp_seq;
__entry->pid = pid;
),
- TP_printk("%s %lu %d", __entry->rcuname, __entry->gp_seq, __entry->pid)
+ TP_printk("%s %ld %d", __entry->rcuname, __entry->gp_seq, __entry->pid)
);
/*
@@ -374,7 +374,7 @@ TRACE_EVENT_RCU(rcu_quiescent_state_report,
TP_STRUCT__entry(
__field(const char *, rcuname)
- __field(unsigned long, gp_seq)
+ __field(long, gp_seq)
__field(unsigned long, mask)
__field(unsigned long, qsmask)
__field(u8, level)
@@ -385,7 +385,7 @@ TRACE_EVENT_RCU(rcu_quiescent_state_report,
TP_fast_assign(
__entry->rcuname = rcuname;
- __entry->gp_seq = gp_seq;
+ __entry->gp_seq = (long)gp_seq;
__entry->mask = mask;
__entry->qsmask = qsmask;
__entry->level = level;
@@ -394,7 +394,7 @@ TRACE_EVENT_RCU(rcu_quiescent_state_report,
__entry->gp_tasks = gp_tasks;
),
- TP_printk("%s %lu %lx>%lx %u %d %d %u",
+ TP_printk("%s %ld %lx>%lx %u %d %d %u",
__entry->rcuname, __entry->gp_seq,
__entry->mask, __entry->qsmask, __entry->level,
__entry->grplo, __entry->grphi, __entry->gp_tasks)
@@ -415,19 +415,19 @@ TRACE_EVENT_RCU(rcu_fqs,
TP_STRUCT__entry(
__field(const char *, rcuname)
- __field(unsigned long, gp_seq)
+ __field(long, gp_seq)
__field(int, cpu)
__field(const char *, qsevent)
),
TP_fast_assign(
__entry->rcuname = rcuname;
- __entry->gp_seq = gp_seq;
+ __entry->gp_seq = (long)gp_seq;
__entry->cpu = cpu;
__entry->qsevent = qsevent;
),
- TP_printk("%s %lu %d %s",
+ TP_printk("%s %ld %d %s",
__entry->rcuname, __entry->gp_seq,
__entry->cpu, __entry->qsevent)
);
diff --git a/include/trace/events/rdma.h b/include/trace/events/rdma.h
index aa19afc73a4e..81bb454fc288 100644
--- a/include/trace/events/rdma.h
+++ b/include/trace/events/rdma.h
@@ -6,7 +6,6 @@
/*
* enum ib_event_type, from include/rdma/ib_verbs.h
*/
-
#define IB_EVENT_LIST \
ib_event(CQ_ERR) \
ib_event(QP_FATAL) \
@@ -91,6 +90,46 @@ IB_WC_STATUS_LIST
__print_symbolic(x, IB_WC_STATUS_LIST)
/*
+ * enum ib_cm_event_type, from include/rdma/ib_cm.h
+ */
+#define IB_CM_EVENT_LIST \
+ ib_cm_event(REQ_ERROR) \
+ ib_cm_event(REQ_RECEIVED) \
+ ib_cm_event(REP_ERROR) \
+ ib_cm_event(REP_RECEIVED) \
+ ib_cm_event(RTU_RECEIVED) \
+ ib_cm_event(USER_ESTABLISHED) \
+ ib_cm_event(DREQ_ERROR) \
+ ib_cm_event(DREQ_RECEIVED) \
+ ib_cm_event(DREP_RECEIVED) \
+ ib_cm_event(TIMEWAIT_EXIT) \
+ ib_cm_event(MRA_RECEIVED) \
+ ib_cm_event(REJ_RECEIVED) \
+ ib_cm_event(LAP_ERROR) \
+ ib_cm_event(LAP_RECEIVED) \
+ ib_cm_event(APR_RECEIVED) \
+ ib_cm_event(SIDR_REQ_ERROR) \
+ ib_cm_event(SIDR_REQ_RECEIVED) \
+ ib_cm_event_end(SIDR_REP_RECEIVED)
+
+#undef ib_cm_event
+#undef ib_cm_event_end
+
+#define ib_cm_event(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+#define ib_cm_event_end(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+
+IB_CM_EVENT_LIST
+
+#undef ib_cm_event
+#undef ib_cm_event_end
+
+#define ib_cm_event(x) { IB_CM_##x, #x },
+#define ib_cm_event_end(x) { IB_CM_##x, #x }
+
+#define rdma_show_ib_cm_event(x) \
+ __print_symbolic(x, IB_CM_EVENT_LIST)
+
+/*
* enum rdma_cm_event_type, from include/rdma/rdma_cm.h
*/
#define RDMA_CM_EVENT_LIST \
diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
index abe942225637..bf1065772228 100644
--- a/include/trace/events/rpcrdma.h
+++ b/include/trace/events/rpcrdma.h
@@ -13,6 +13,7 @@
#include <linux/scatterlist.h>
#include <linux/sunrpc/rpc_rdma_cid.h>
#include <linux/tracepoint.h>
+#include <rdma/ib_cm.h>
#include <trace/events/rdma.h>
/**
@@ -423,7 +424,6 @@ DEFINE_CONN_EVENT(connect);
DEFINE_CONN_EVENT(disconnect);
DEFINE_RXPRT_EVENT(xprtrdma_op_inject_dsc);
-DEFINE_RXPRT_EVENT(xprtrdma_op_setport);
TRACE_EVENT(xprtrdma_op_connect,
TP_PROTO(
@@ -1188,68 +1188,6 @@ TRACE_EVENT(xprtrdma_decode_seg,
);
/**
- ** Allocation/release of rpcrdma_reqs and rpcrdma_reps
- **/
-
-TRACE_EVENT(xprtrdma_op_allocate,
- TP_PROTO(
- const struct rpc_task *task,
- const struct rpcrdma_req *req
- ),
-
- TP_ARGS(task, req),
-
- TP_STRUCT__entry(
- __field(unsigned int, task_id)
- __field(unsigned int, client_id)
- __field(const void *, req)
- __field(size_t, callsize)
- __field(size_t, rcvsize)
- ),
-
- TP_fast_assign(
- __entry->task_id = task->tk_pid;
- __entry->client_id = task->tk_client->cl_clid;
- __entry->req = req;
- __entry->callsize = task->tk_rqstp->rq_callsize;
- __entry->rcvsize = task->tk_rqstp->rq_rcvsize;
- ),
-
- TP_printk("task:%u@%u req=%p (%zu, %zu)",
- __entry->task_id, __entry->client_id,
- __entry->req, __entry->callsize, __entry->rcvsize
- )
-);
-
-TRACE_EVENT(xprtrdma_op_free,
- TP_PROTO(
- const struct rpc_task *task,
- const struct rpcrdma_req *req
- ),
-
- TP_ARGS(task, req),
-
- TP_STRUCT__entry(
- __field(unsigned int, task_id)
- __field(unsigned int, client_id)
- __field(const void *, req)
- __field(const void *, rep)
- ),
-
- TP_fast_assign(
- __entry->task_id = task->tk_pid;
- __entry->client_id = task->tk_client->cl_clid;
- __entry->req = req;
- __entry->rep = req->rl_reply;
- ),
-
- TP_printk("task:%u@%u req=%p rep=%p",
- __entry->task_id, __entry->client_id,
- __entry->req, __entry->rep
- )
-);
-
-/**
** Callback events
**/
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 65d7dfbbc9cd..f45b3c01370c 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -259,8 +259,10 @@ DECLARE_EVENT_CLASS(rpc_task_status,
TP_ARGS(task))
DEFINE_RPC_STATUS_EVENT(call);
-DEFINE_RPC_STATUS_EVENT(bind);
DEFINE_RPC_STATUS_EVENT(connect);
+DEFINE_RPC_STATUS_EVENT(timeout);
+DEFINE_RPC_STATUS_EVENT(retry_refresh);
+DEFINE_RPC_STATUS_EVENT(refresh);
TRACE_EVENT(rpc_request,
TP_PROTO(const struct rpc_task *task),
@@ -385,7 +387,10 @@ DECLARE_EVENT_CLASS(rpc_task_running,
DEFINE_RPC_RUNNING_EVENT(begin);
DEFINE_RPC_RUNNING_EVENT(run_action);
+DEFINE_RPC_RUNNING_EVENT(sync_sleep);
+DEFINE_RPC_RUNNING_EVENT(sync_wake);
DEFINE_RPC_RUNNING_EVENT(complete);
+DEFINE_RPC_RUNNING_EVENT(timeout);
DEFINE_RPC_RUNNING_EVENT(signalled);
DEFINE_RPC_RUNNING_EVENT(end);
@@ -517,6 +522,49 @@ DEFINE_RPC_REPLY_EVENT(stale_creds);
DEFINE_RPC_REPLY_EVENT(bad_creds);
DEFINE_RPC_REPLY_EVENT(auth_tooweak);
+#define DEFINE_RPCB_ERROR_EVENT(name) \
+ DEFINE_EVENT(rpc_reply_event, rpcb_##name##_err, \
+ TP_PROTO( \
+ const struct rpc_task *task \
+ ), \
+ TP_ARGS(task))
+
+DEFINE_RPCB_ERROR_EVENT(prog_unavail);
+DEFINE_RPCB_ERROR_EVENT(timeout);
+DEFINE_RPCB_ERROR_EVENT(bind_version);
+DEFINE_RPCB_ERROR_EVENT(unreachable);
+DEFINE_RPCB_ERROR_EVENT(unrecognized);
+
+TRACE_EVENT(rpc_buf_alloc,
+ TP_PROTO(
+ const struct rpc_task *task,
+ int status
+ ),
+
+ TP_ARGS(task, status),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(size_t, callsize)
+ __field(size_t, recvsize)
+ __field(int, status)
+ ),
+
+ TP_fast_assign(
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = task->tk_client->cl_clid;
+ __entry->callsize = task->tk_rqstp->rq_callsize;
+ __entry->recvsize = task->tk_rqstp->rq_rcvsize;
+ __entry->status = status;
+ ),
+
+ TP_printk("task:%u@%u callsize=%zu recvsize=%zu status=%d",
+ __entry->task_id, __entry->client_id,
+ __entry->callsize, __entry->recvsize, __entry->status
+ )
+);
+
TRACE_EVENT(rpc_call_rpcerror,
TP_PROTO(
const struct rpc_task *task,
@@ -868,6 +916,34 @@ DEFINE_RPC_SOCKET_EVENT_DONE(rpc_socket_reset_connection);
DEFINE_RPC_SOCKET_EVENT(rpc_socket_close);
DEFINE_RPC_SOCKET_EVENT(rpc_socket_shutdown);
+TRACE_EVENT(rpc_socket_nospace,
+ TP_PROTO(
+ const struct rpc_rqst *rqst,
+ const struct sock_xprt *transport
+ ),
+
+ TP_ARGS(rqst, transport),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(unsigned int, total)
+ __field(unsigned int, remaining)
+ ),
+
+ TP_fast_assign(
+ __entry->task_id = rqst->rq_task->tk_pid;
+ __entry->client_id = rqst->rq_task->tk_client->cl_clid;
+ __entry->total = rqst->rq_slen;
+ __entry->remaining = rqst->rq_slen - transport->xmit.offset;
+ ),
+
+ TP_printk("task:%u@%u total=%u remaining=%u",
+ __entry->task_id, __entry->client_id,
+ __entry->total, __entry->remaining
+ )
+);
+
TRACE_DEFINE_ENUM(XPRT_LOCKED);
TRACE_DEFINE_ENUM(XPRT_CONNECTED);
TRACE_DEFINE_ENUM(XPRT_CONNECTING);
@@ -925,6 +1001,7 @@ DECLARE_EVENT_CLASS(rpc_xprt_lifetime_class,
TP_ARGS(xprt))
DEFINE_RPC_XPRT_LIFETIME_EVENT(create);
+DEFINE_RPC_XPRT_LIFETIME_EVENT(connect);
DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_auto);
DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_done);
DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_force);
@@ -969,7 +1046,6 @@ DECLARE_EVENT_CLASS(rpc_xprt_event,
DEFINE_RPC_XPRT_EVENT(timer);
DEFINE_RPC_XPRT_EVENT(lookup_rqst);
-DEFINE_RPC_XPRT_EVENT(complete_rqst);
TRACE_EVENT(xprt_transmit,
TP_PROTO(
@@ -1002,37 +1078,6 @@ TRACE_EVENT(xprt_transmit,
__entry->seqno, __entry->status)
);
-TRACE_EVENT(xprt_enq_xmit,
- TP_PROTO(
- const struct rpc_task *task,
- int stage
- ),
-
- TP_ARGS(task, stage),
-
- TP_STRUCT__entry(
- __field(unsigned int, task_id)
- __field(unsigned int, client_id)
- __field(u32, xid)
- __field(u32, seqno)
- __field(int, stage)
- ),
-
- TP_fast_assign(
- __entry->task_id = task->tk_pid;
- __entry->client_id = task->tk_client ?
- task->tk_client->cl_clid : -1;
- __entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid);
- __entry->seqno = task->tk_rqstp->rq_seqno;
- __entry->stage = stage;
- ),
-
- TP_printk(
- "task:%u@%u xid=0x%08x seqno=%u stage=%d",
- __entry->task_id, __entry->client_id, __entry->xid,
- __entry->seqno, __entry->stage)
-);
-
TRACE_EVENT(xprt_ping,
TP_PROTO(const struct rpc_xprt *xprt, int status),
@@ -1095,6 +1140,7 @@ DECLARE_EVENT_CLASS(xprt_writelock_event,
DEFINE_WRITELOCK_EVENT(reserve_xprt);
DEFINE_WRITELOCK_EVENT(release_xprt);
+DEFINE_WRITELOCK_EVENT(transmit_queued);
DECLARE_EVENT_CLASS(xprt_cong_event,
TP_PROTO(
@@ -1147,6 +1193,30 @@ DEFINE_CONG_EVENT(release_cong);
DEFINE_CONG_EVENT(get_cong);
DEFINE_CONG_EVENT(put_cong);
+TRACE_EVENT(xprt_reserve,
+ TP_PROTO(
+ const struct rpc_rqst *rqst
+ ),
+
+ TP_ARGS(rqst),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(u32, xid)
+ ),
+
+ TP_fast_assign(
+ __entry->task_id = rqst->rq_task->tk_pid;
+ __entry->client_id = rqst->rq_task->tk_client->cl_clid;
+ __entry->xid = be32_to_cpu(rqst->rq_xid);
+ ),
+
+ TP_printk("task:%u@%u xid=0x%08x",
+ __entry->task_id, __entry->client_id, __entry->xid
+ )
+);
+
TRACE_EVENT(xs_stream_read_data,
TP_PROTO(struct rpc_xprt *xprt, ssize_t err, size_t total),
@@ -1202,6 +1272,156 @@ TRACE_EVENT(xs_stream_read_request,
__entry->copied, __entry->reclen, __entry->offset)
);
+TRACE_EVENT(rpcb_getport,
+ TP_PROTO(
+ const struct rpc_clnt *clnt,
+ const struct rpc_task *task,
+ unsigned int bind_version
+ ),
+
+ TP_ARGS(clnt, task, bind_version),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(unsigned int, program)
+ __field(unsigned int, version)
+ __field(int, protocol)
+ __field(unsigned int, bind_version)
+ __string(servername, task->tk_xprt->servername)
+ ),
+
+ TP_fast_assign(
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = clnt->cl_clid;
+ __entry->program = clnt->cl_prog;
+ __entry->version = clnt->cl_vers;
+ __entry->protocol = task->tk_xprt->prot;
+ __entry->bind_version = bind_version;
+ __assign_str(servername, task->tk_xprt->servername);
+ ),
+
+ TP_printk("task:%u@%u server=%s program=%u version=%u protocol=%d bind_version=%u",
+ __entry->task_id, __entry->client_id, __get_str(servername),
+ __entry->program, __entry->version, __entry->protocol,
+ __entry->bind_version
+ )
+);
+
+TRACE_EVENT(rpcb_setport,
+ TP_PROTO(
+ const struct rpc_task *task,
+ int status,
+ unsigned short port
+ ),
+
+ TP_ARGS(task, status, port),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(int, status)
+ __field(unsigned short, port)
+ ),
+
+ TP_fast_assign(
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = task->tk_client->cl_clid;
+ __entry->status = status;
+ __entry->port = port;
+ ),
+
+ TP_printk("task:%u@%u status=%d port=%u",
+ __entry->task_id, __entry->client_id,
+ __entry->status, __entry->port
+ )
+);
+
+TRACE_EVENT(pmap_register,
+ TP_PROTO(
+ u32 program,
+ u32 version,
+ int protocol,
+ unsigned short port
+ ),
+
+ TP_ARGS(program, version, protocol, port),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, program)
+ __field(unsigned int, version)
+ __field(int, protocol)
+ __field(unsigned int, port)
+ ),
+
+ TP_fast_assign(
+ __entry->program = program;
+ __entry->version = version;
+ __entry->protocol = protocol;
+ __entry->port = port;
+ ),
+
+ TP_printk("program=%u version=%u protocol=%d port=%u",
+ __entry->program, __entry->version,
+ __entry->protocol, __entry->port
+ )
+);
+
+TRACE_EVENT(rpcb_register,
+ TP_PROTO(
+ u32 program,
+ u32 version,
+ const char *addr,
+ const char *netid
+ ),
+
+ TP_ARGS(program, version, addr, netid),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, program)
+ __field(unsigned int, version)
+ __string(addr, addr)
+ __string(netid, netid)
+ ),
+
+ TP_fast_assign(
+ __entry->program = program;
+ __entry->version = version;
+ __assign_str(addr, addr);
+ __assign_str(netid, netid);
+ ),
+
+ TP_printk("program=%u version=%u addr=%s netid=%s",
+ __entry->program, __entry->version,
+ __get_str(addr), __get_str(netid)
+ )
+);
+
+TRACE_EVENT(rpcb_unregister,
+ TP_PROTO(
+ u32 program,
+ u32 version,
+ const char *netid
+ ),
+
+ TP_ARGS(program, version, netid),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, program)
+ __field(unsigned int, version)
+ __string(netid, netid)
+ ),
+
+ TP_fast_assign(
+ __entry->program = program;
+ __entry->version = version;
+ __assign_str(netid, netid);
+ ),
+
+ TP_printk("program=%u version=%u netid=%s",
+ __entry->program, __entry->version, __get_str(netid)
+ )
+);
DECLARE_EVENT_CLASS(svc_xdr_buf_class,
TP_PROTO(
diff --git a/include/uapi/asm-generic/hugetlb_encode.h b/include/uapi/asm-generic/hugetlb_encode.h
index b0f8e87235bd..4f3d5aaa11f5 100644
--- a/include/uapi/asm-generic/hugetlb_encode.h
+++ b/include/uapi/asm-generic/hugetlb_encode.h
@@ -20,6 +20,7 @@
#define HUGETLB_FLAG_ENCODE_SHIFT 26
#define HUGETLB_FLAG_ENCODE_MASK 0x3f
+#define HUGETLB_FLAG_ENCODE_16KB (14 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_64KB (16 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_512KB (19 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_1MB (20 << HUGETLB_FLAG_ENCODE_SHIFT)
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index f2b5d72a46c2..2056318988f7 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -857,9 +857,11 @@ __SYSCALL(__NR_openat2, sys_openat2)
__SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
#define __NR_faccessat2 439
__SYSCALL(__NR_faccessat2, sys_faccessat2)
+#define __NR_process_madvise 440
+__SYSCALL(__NR_process_madvise, sys_process_madvise)
#undef __NR_syscalls
-#define __NR_syscalls 440
+#define __NR_syscalls 441
/*
* 32 bit systems traditionally used different
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 373cada89815..7233502ea991 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -172,6 +172,9 @@
* - add FUSE_WRITE_KILL_PRIV flag
* - add FUSE_SETUPMAPPING and FUSE_REMOVEMAPPING
* - add map_alignment to fuse_init_out, add FUSE_MAP_ALIGNMENT flag
+ *
+ * 7.32
+ * - add flags to fuse_attr, add FUSE_ATTR_SUBMOUNT, add FUSE_SUBMOUNTS
*/
#ifndef _LINUX_FUSE_H
@@ -207,7 +210,7 @@
#define FUSE_KERNEL_VERSION 7
/** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 31
+#define FUSE_KERNEL_MINOR_VERSION 32
/** The node ID of the root inode */
#define FUSE_ROOT_ID 1
@@ -231,7 +234,7 @@ struct fuse_attr {
uint32_t gid;
uint32_t rdev;
uint32_t blksize;
- uint32_t padding;
+ uint32_t flags;
};
struct fuse_kstatfs {
@@ -313,7 +316,10 @@ struct fuse_file_lock {
* FUSE_CACHE_SYMLINKS: cache READLINK responses
* FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir
* FUSE_EXPLICIT_INVAL_DATA: only invalidate cached pages on explicit request
- * FUSE_MAP_ALIGNMENT: map_alignment field is valid
+ * FUSE_MAP_ALIGNMENT: init_out.map_alignment contains log2(byte alignment) for
+ * foffset and moffset fields in struct
+ * fuse_setupmapping_out and fuse_removemapping_one.
+ * FUSE_SUBMOUNTS: kernel supports auto-mounting directory submounts
*/
#define FUSE_ASYNC_READ (1 << 0)
#define FUSE_POSIX_LOCKS (1 << 1)
@@ -342,6 +348,7 @@ struct fuse_file_lock {
#define FUSE_NO_OPENDIR_SUPPORT (1 << 24)
#define FUSE_EXPLICIT_INVAL_DATA (1 << 25)
#define FUSE_MAP_ALIGNMENT (1 << 26)
+#define FUSE_SUBMOUNTS (1 << 27)
/**
* CUSE INIT request/reply flags
@@ -417,6 +424,13 @@ struct fuse_file_lock {
*/
#define FUSE_FSYNC_FDATASYNC (1 << 0)
+/**
+ * fuse_attr flags
+ *
+ * FUSE_ATTR_SUBMOUNT: Object is a submount root
+ */
+#define FUSE_ATTR_SUBMOUNT (1 << 0)
+
enum fuse_opcode {
FUSE_LOOKUP = 1,
FUSE_FORGET = 2, /* no reply */
@@ -892,4 +906,34 @@ struct fuse_copy_file_range_in {
uint64_t flags;
};
+#define FUSE_SETUPMAPPING_FLAG_WRITE (1ull << 0)
+#define FUSE_SETUPMAPPING_FLAG_READ (1ull << 1)
+struct fuse_setupmapping_in {
+ /* An already open handle */
+ uint64_t fh;
+ /* Offset into the file to start the mapping */
+ uint64_t foffset;
+ /* Length of mapping required */
+ uint64_t len;
+ /* Flags, FUSE_SETUPMAPPING_FLAG_* */
+ uint64_t flags;
+ /* Offset in Memory Window */
+ uint64_t moffset;
+};
+
+struct fuse_removemapping_in {
+ /* number of fuse_removemapping_one follows */
+ uint32_t count;
+};
+
+struct fuse_removemapping_one {
+ /* Offset into the dax window start the unmapping */
+ uint64_t moffset;
+ /* Length of mapping required */
+ uint64_t len;
+};
+
+#define FUSE_REMOVEMAPPING_MAX_ENTRY \
+ (PAGE_SIZE / sizeof(struct fuse_removemapping_one))
+
#endif /* _LINUX_FUSE_H */
diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h
index 923cc162609c..f55bc680b5b0 100644
--- a/include/uapi/linux/mman.h
+++ b/include/uapi/linux/mman.h
@@ -27,6 +27,7 @@
#define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT
#define MAP_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK
+#define MAP_HUGE_16KB HUGETLB_FLAG_ENCODE_16KB
#define MAP_HUGE_64KB HUGETLB_FLAG_ENCODE_64KB
#define MAP_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB
#define MAP_HUGE_1MB HUGETLB_FLAG_ENCODE_1MB
diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h
index bf197e99b98f..ed5415e0f1c1 100644
--- a/include/uapi/linux/nfs4.h
+++ b/include/uapi/linux/nfs4.h
@@ -139,6 +139,8 @@
#define EXCHGID4_FLAG_UPD_CONFIRMED_REC_A 0x40000000
#define EXCHGID4_FLAG_CONFIRMED_R 0x80000000
+
+#define EXCHGID4_FLAG_SUPP_FENCE_OPS 0x00000004
/*
* Since the validity of these bits depends on whether
* they're set in the argument or response, have separate
@@ -146,6 +148,7 @@
*/
#define EXCHGID4_FLAG_MASK_A 0x40070103
#define EXCHGID4_FLAG_MASK_R 0x80070103
+#define EXCHGID4_2_FLAG_MASK_R 0x80070107
#define SEQ4_STATUS_CB_PATH_DOWN 0x00000001
#define SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING 0x00000002
diff --git a/include/uapi/linux/virtio_fs.h b/include/uapi/linux/virtio_fs.h
index 3056b6e9f8ce..bea38291421b 100644
--- a/include/uapi/linux/virtio_fs.h
+++ b/include/uapi/linux/virtio_fs.h
@@ -16,4 +16,7 @@ struct virtio_fs_config {
__le32 num_request_queues;
} __attribute__((packed));
+/* For the id field in virtio_pci_shm_cap */
+#define VIRTIO_FS_SHMCAP_ID_CACHE 0
+
#endif /* _UAPI_LINUX_VIRTIO_FS_H */
diff --git a/include/uapi/rdma/efa-abi.h b/include/uapi/rdma/efa-abi.h
index 507a2862bedb..f89fbb5b1e8d 100644
--- a/include/uapi/rdma/efa-abi.h
+++ b/include/uapi/rdma/efa-abi.h
@@ -105,6 +105,7 @@ struct efa_ibv_create_ah_resp {
enum {
EFA_QUERY_DEVICE_CAPS_RDMA_READ = 1 << 0,
+ EFA_QUERY_DEVICE_CAPS_RNR_RETRY = 1 << 1,
};
struct efa_ibv_ex_query_device_resp {
diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h
index eb76b38a00d4..9ec85f76e9ac 100644
--- a/include/uapi/rdma/hns-abi.h
+++ b/include/uapi/rdma/hns-abi.h
@@ -39,6 +39,8 @@
struct hns_roce_ib_create_cq {
__aligned_u64 buf_addr;
__aligned_u64 db_addr;
+ __u32 cqe_size;
+ __u32 reserved;
};
struct hns_roce_ib_create_cq_resp {
@@ -73,7 +75,7 @@ struct hns_roce_ib_create_qp_resp {
struct hns_roce_ib_alloc_ucontext_resp {
__u32 qp_tab_size;
- __u32 reserved;
+ __u32 cqe_size;
};
struct hns_roce_ib_alloc_pd_resp {
diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h
index 99dcabf61a71..7968a1845355 100644
--- a/include/uapi/rdma/ib_user_ioctl_cmds.h
+++ b/include/uapi/rdma/ib_user_ioctl_cmds.h
@@ -70,6 +70,8 @@ enum uverbs_methods_device {
UVERBS_METHOD_QUERY_PORT,
UVERBS_METHOD_GET_CONTEXT,
UVERBS_METHOD_QUERY_CONTEXT,
+ UVERBS_METHOD_QUERY_GID_TABLE,
+ UVERBS_METHOD_QUERY_GID_ENTRY,
};
enum uverbs_attrs_invoke_write_cmd_attr_ids {
@@ -352,4 +354,18 @@ enum uverbs_attrs_async_event_create {
UVERBS_ATTR_ASYNC_EVENT_ALLOC_FD_HANDLE,
};
+enum uverbs_attrs_query_gid_table_cmd_attr_ids {
+ UVERBS_ATTR_QUERY_GID_TABLE_ENTRY_SIZE,
+ UVERBS_ATTR_QUERY_GID_TABLE_FLAGS,
+ UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES,
+ UVERBS_ATTR_QUERY_GID_TABLE_RESP_NUM_ENTRIES,
+};
+
+enum uverbs_attrs_query_gid_entry_cmd_attr_ids {
+ UVERBS_ATTR_QUERY_GID_ENTRY_PORT,
+ UVERBS_ATTR_QUERY_GID_ENTRY_GID_INDEX,
+ UVERBS_ATTR_QUERY_GID_ENTRY_FLAGS,
+ UVERBS_ATTR_QUERY_GID_ENTRY_RESP_ENTRY,
+};
+
#endif
diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h
index 5debab45ebcb..22483799cd07 100644
--- a/include/uapi/rdma/ib_user_ioctl_verbs.h
+++ b/include/uapi/rdma/ib_user_ioctl_verbs.h
@@ -208,6 +208,7 @@ enum ib_uverbs_read_counters_flags {
enum ib_uverbs_advise_mr_advice {
IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH,
IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE,
+ IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT,
};
enum ib_uverbs_advise_mr_flag {
@@ -250,4 +251,18 @@ enum rdma_driver_id {
RDMA_DRIVER_SIW,
};
+enum ib_uverbs_gid_type {
+ IB_UVERBS_GID_TYPE_IB,
+ IB_UVERBS_GID_TYPE_ROCE_V1,
+ IB_UVERBS_GID_TYPE_ROCE_V2,
+};
+
+struct ib_uverbs_gid_entry {
+ __aligned_u64 gid[2];
+ __u32 gid_index;
+ __u32 port_num;
+ __u32 gid_type;
+ __u32 netdev_ifindex; /* It is 0 if there is no netdev associated with it */
+};
+
#endif
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 0474c7400268..456438c18c2c 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -457,6 +457,17 @@ struct ib_uverbs_poll_cq {
__u32 ne;
};
+enum ib_uverbs_wc_opcode {
+ IB_UVERBS_WC_SEND = 0,
+ IB_UVERBS_WC_RDMA_WRITE = 1,
+ IB_UVERBS_WC_RDMA_READ = 2,
+ IB_UVERBS_WC_COMP_SWAP = 3,
+ IB_UVERBS_WC_FETCH_ADD = 4,
+ IB_UVERBS_WC_BIND_MW = 5,
+ IB_UVERBS_WC_LOCAL_INV = 6,
+ IB_UVERBS_WC_TSO = 7,
+};
+
struct ib_uverbs_wc {
__aligned_u64 wr_id;
__u32 status;
diff --git a/include/uapi/rdma/rdma_user_rxe.h b/include/uapi/rdma/rdma_user_rxe.h
index aae2e696bb38..e591d8c1f3cf 100644
--- a/include/uapi/rdma/rdma_user_rxe.h
+++ b/include/uapi/rdma/rdma_user_rxe.h
@@ -39,6 +39,11 @@
#include <linux/in.h>
#include <linux/in6.h>
+enum {
+ RXE_NETWORK_TYPE_IPV4 = 1,
+ RXE_NETWORK_TYPE_IPV6 = 2,
+};
+
union rxe_gid {
__u8 raw[16];
struct {
@@ -57,6 +62,7 @@ struct rxe_global_route {
struct rxe_av {
__u8 port_num;
+ /* From RXE_NETWORK_TYPE_* */
__u8 network_type;
__u8 dmac[6];
struct rxe_global_route grh;
@@ -99,8 +105,8 @@ struct rxe_send_wr {
struct ib_mr *mr;
__aligned_u64 reserved;
};
- __u32 key;
- __u32 access;
+ __u32 key;
+ __u32 access;
} reg;
} wr;
};
@@ -112,7 +118,7 @@ struct rxe_sge {
};
struct mminfo {
- __aligned_u64 offset;
+ __aligned_u64 offset;
__u32 size;
__u32 pad;
};
diff --git a/include/xen/events.h b/include/xen/events.h
index df1e6391f63f..3b8155c2ea03 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -15,10 +15,15 @@
unsigned xen_evtchn_nr_channels(void);
int bind_evtchn_to_irq(evtchn_port_t evtchn);
+int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn);
int bind_evtchn_to_irqhandler(evtchn_port_t evtchn,
irq_handler_t handler,
unsigned long irqflags, const char *devname,
void *dev_id);
+int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn,
+ irq_handler_t handler,
+ unsigned long irqflags, const char *devname,
+ void *dev_id);
int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu);
int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
irq_handler_t handler,
@@ -32,12 +37,20 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,
void *dev_id);
int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
evtchn_port_t remote_port);
+int bind_interdomain_evtchn_to_irq_lateeoi(unsigned int remote_domain,
+ evtchn_port_t remote_port);
int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
evtchn_port_t remote_port,
irq_handler_t handler,
unsigned long irqflags,
const char *devname,
void *dev_id);
+int bind_interdomain_evtchn_to_irqhandler_lateeoi(unsigned int remote_domain,
+ evtchn_port_t remote_port,
+ irq_handler_t handler,
+ unsigned long irqflags,
+ const char *devname,
+ void *dev_id);
/*
* Common unbind function for all event sources. Takes IRQ to unbind from.
@@ -46,6 +59,14 @@ int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
*/
void unbind_from_irqhandler(unsigned int irq, void *dev_id);
+/*
+ * Send late EOI for an IRQ bound to an event channel via one of the *_lateeoi
+ * functions above.
+ */
+void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags);
+/* Signal an event was spurious, i.e. there was no action resulting from it. */
+#define XEN_EOI_FLAG_SPURIOUS 0x00000001
+
#define XEN_IRQ_PRIORITY_MAX EVTCHN_FIFO_PRIORITY_MAX
#define XEN_IRQ_PRIORITY_DEFAULT EVTCHN_FIFO_PRIORITY_DEFAULT
#define XEN_IRQ_PRIORITY_MIN EVTCHN_FIFO_PRIORITY_MIN
diff --git a/init/main.c b/init/main.c
index 1af84337cb18..130376ec10ba 100644
--- a/init/main.c
+++ b/init/main.c
@@ -108,6 +108,8 @@
#define CREATE_TRACE_POINTS
#include <trace/events/initcall.h>
+#include <kunit/test.h>
+
static int kernel_init(void *);
extern void init_IRQ(void);
@@ -1513,6 +1515,8 @@ static noinline void __init kernel_init_freeable(void)
do_basic_setup();
+ kunit_run_all_tests();
+
console_on_rootfs();
/*
diff --git a/kernel/Makefile b/kernel/Makefile
index e5bc66a94b70..2044ef02763b 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -134,6 +134,8 @@ KASAN_SANITIZE_stackleak.o := n
KCSAN_SANITIZE_stackleak.o := n
KCOV_INSTRUMENT_stackleak.o := n
+obj-$(CONFIG_SCF_TORTURE_TEST) += scftorture.o
+
$(obj)/configs.o: $(obj)/config_data.gz
targets += config_data.gz
diff --git a/kernel/acct.c b/kernel/acct.c
index b0c5b3a9f5af..f175df8f6aa4 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -25,7 +25,7 @@
* Now we silently close acct_file on attempt to reopen. Cleaned sys_acct().
* XTerms and EMACS are manifestations of pure evil. 21/10/98, AV.
*
- * Fixed a nasty interaction with with sys_umount(). If the accointing
+ * Fixed a nasty interaction with sys_umount(). If the accounting
* was suspeneded we failed to stop it on umount(). Messy.
* Another one: remount to readonly didn't stop accounting.
* Question: what should we do if we have CAP_SYS_ADMIN but not
@@ -263,12 +263,12 @@ static DEFINE_MUTEX(acct_on_mutex);
* sys_acct - enable/disable process accounting
* @name: file name for accounting records or NULL to shutdown accounting
*
- * Returns 0 for success or negative errno values for failure.
- *
* sys_acct() is the only system call needed to implement process
* accounting. It takes the name of the file where accounting records
* should be written. If the filename is NULL, accounting will be
* shutdown.
+ *
+ * Returns: 0 for success or negative errno values for failure.
*/
SYSCALL_DEFINE1(acct, const char __user *, name)
{
@@ -586,9 +586,7 @@ static void slow_acct_process(struct pid_namespace *ns)
}
/**
- * acct_process
- *
- * handles process accounting for an exiting task
+ * acct_process - handles process accounting for an exiting task
*/
void acct_process(void)
{
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 642415b8c3c9..57b5b5d0a5fd 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -390,7 +390,7 @@ static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask)
* The top cpuset doesn't have any online cpu as a
* consequence of a race between cpuset_hotplug_work
* and cpu hotplug notifier. But we know the top
- * cpuset's effective_cpus is on its way to to be
+ * cpuset's effective_cpus is on its way to be
* identical to cpu_online_mask.
*/
cpumask_copy(pmask, cpu_online_mask);
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index b16dbc1bf056..1e75a8923a8d 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -80,7 +80,7 @@ static int exception_level;
struct kgdb_io *dbg_io_ops;
static DEFINE_SPINLOCK(kgdb_registration_lock);
-/* Action for the reboot notifiter, a global allow kdb to change it */
+/* Action for the reboot notifier, a global allow kdb to change it */
static int kgdbreboot;
/* kgdb console driver is loaded */
static int kgdb_con_registered;
@@ -94,14 +94,6 @@ int dbg_switch_cpu;
/* Use kdb or gdbserver mode */
int dbg_kdb_mode = 1;
-static int __init opt_kgdb_con(char *str)
-{
- kgdb_use_con = 1;
- return 0;
-}
-
-early_param("kgdbcon", opt_kgdb_con);
-
module_param(kgdb_use_con, int, 0644);
module_param(kgdbreboot, int, 0644);
@@ -163,7 +155,7 @@ early_param("nokgdbroundup", opt_nokgdbroundup);
/*
* Weak aliases for breakpoint management,
- * can be overriden by architectures when needed:
+ * can be overridden by architectures when needed:
*/
int __weak kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
{
@@ -177,17 +169,23 @@ int __weak kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE);
return err;
}
+NOKPROBE_SYMBOL(kgdb_arch_set_breakpoint);
int __weak kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
{
return copy_to_kernel_nofault((char *)bpt->bpt_addr,
(char *)bpt->saved_instr, BREAK_INSTR_SIZE);
}
+NOKPROBE_SYMBOL(kgdb_arch_remove_breakpoint);
int __weak kgdb_validate_break_address(unsigned long addr)
{
struct kgdb_bkpt tmp;
int err;
+
+ if (kgdb_within_blocklist(addr))
+ return -EINVAL;
+
/* Validate setting the breakpoint and then removing it. If the
* remove fails, the kernel needs to emit a bad message because we
* are deep trouble not being able to put things back the way we
@@ -208,6 +206,7 @@ unsigned long __weak kgdb_arch_pc(int exception, struct pt_regs *regs)
{
return instruction_pointer(regs);
}
+NOKPROBE_SYMBOL(kgdb_arch_pc);
int __weak kgdb_arch_init(void)
{
@@ -218,6 +217,7 @@ int __weak kgdb_skipexception(int exception, struct pt_regs *regs)
{
return 0;
}
+NOKPROBE_SYMBOL(kgdb_skipexception);
#ifdef CONFIG_SMP
@@ -239,6 +239,7 @@ void __weak kgdb_call_nmi_hook(void *ignored)
*/
kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs());
}
+NOKPROBE_SYMBOL(kgdb_call_nmi_hook);
void __weak kgdb_roundup_cpus(void)
{
@@ -272,6 +273,7 @@ void __weak kgdb_roundup_cpus(void)
kgdb_info[cpu].rounding_up = false;
}
}
+NOKPROBE_SYMBOL(kgdb_roundup_cpus);
#endif
@@ -298,6 +300,7 @@ static void kgdb_flush_swbreak_addr(unsigned long addr)
/* Force flush instruction cache if it was outside the mm */
flush_icache_range(addr, addr + BREAK_INSTR_SIZE);
}
+NOKPROBE_SYMBOL(kgdb_flush_swbreak_addr);
/*
* SW breakpoint management:
@@ -325,6 +328,7 @@ int dbg_activate_sw_breakpoints(void)
}
return ret;
}
+NOKPROBE_SYMBOL(dbg_activate_sw_breakpoints);
int dbg_set_sw_break(unsigned long addr)
{
@@ -388,6 +392,7 @@ int dbg_deactivate_sw_breakpoints(void)
}
return ret;
}
+NOKPROBE_SYMBOL(dbg_deactivate_sw_breakpoints);
int dbg_remove_sw_break(unsigned long addr)
{
@@ -509,6 +514,7 @@ static int kgdb_io_ready(int print_wait)
}
return 1;
}
+NOKPROBE_SYMBOL(kgdb_io_ready);
static int kgdb_reenter_check(struct kgdb_state *ks)
{
@@ -556,6 +562,7 @@ static int kgdb_reenter_check(struct kgdb_state *ks)
return 1;
}
+NOKPROBE_SYMBOL(kgdb_reenter_check);
static void dbg_touch_watchdogs(void)
{
@@ -563,6 +570,7 @@ static void dbg_touch_watchdogs(void)
clocksource_touch_watchdog();
rcu_cpu_stall_reset();
}
+NOKPROBE_SYMBOL(dbg_touch_watchdogs);
static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs,
int exception_state)
@@ -752,6 +760,8 @@ cpu_master_loop:
}
}
+ dbg_activate_sw_breakpoints();
+
/* Call the I/O driver's post_exception routine */
if (dbg_io_ops->post_exception)
dbg_io_ops->post_exception();
@@ -794,6 +804,7 @@ kgdb_restore:
return kgdb_info[cpu].ret_state;
}
+NOKPROBE_SYMBOL(kgdb_cpu_enter);
/*
* kgdb_handle_exception() - main entry point from a kernel exception
@@ -838,6 +849,7 @@ out:
arch_kgdb_ops.enable_nmi(1);
return ret;
}
+NOKPROBE_SYMBOL(kgdb_handle_exception);
/*
* GDB places a breakpoint at this function to know dynamically loaded objects.
@@ -872,6 +884,7 @@ int kgdb_nmicallback(int cpu, void *regs)
#endif
return 1;
}
+NOKPROBE_SYMBOL(kgdb_nmicallback);
int kgdb_nmicallin(int cpu, int trapnr, void *regs, int err_code,
atomic_t *send_ready)
@@ -897,6 +910,7 @@ int kgdb_nmicallin(int cpu, int trapnr, void *regs, int err_code,
#endif
return 1;
}
+NOKPROBE_SYMBOL(kgdb_nmicallin);
static void kgdb_console_write(struct console *co, const char *s,
unsigned count)
@@ -920,6 +934,20 @@ static struct console kgdbcons = {
.index = -1,
};
+static int __init opt_kgdb_con(char *str)
+{
+ kgdb_use_con = 1;
+
+ if (kgdb_io_module_registered && !kgdb_con_registered) {
+ register_console(&kgdbcons);
+ kgdb_con_registered = 1;
+ }
+
+ return 0;
+}
+
+early_param("kgdbcon", opt_kgdb_con);
+
#ifdef CONFIG_MAGIC_SYSRQ
static void sysrq_handle_dbg(int key)
{
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c
index cc3c43dfec44..a77df59d9ca5 100644
--- a/kernel/debug/gdbstub.c
+++ b/kernel/debug/gdbstub.c
@@ -725,7 +725,7 @@ static void gdb_cmd_query(struct kgdb_state *ks)
}
}
- do_each_thread(g, p) {
+ for_each_process_thread(g, p) {
if (i >= ks->thr_query && !finished) {
int_to_threadref(thref, p->pid);
ptr = pack_threadid(ptr, thref);
@@ -735,7 +735,7 @@ static void gdb_cmd_query(struct kgdb_state *ks)
finished = 1;
}
i++;
- } while_each_thread(g, p);
+ }
*(--ptr) = '\0';
break;
@@ -1061,7 +1061,6 @@ int gdb_serial_stub(struct kgdb_state *ks)
error_packet(remcom_out_buffer, -EINVAL);
break;
}
- dbg_activate_sw_breakpoints();
fallthrough; /* to default processing */
default:
default_handle:
diff --git a/kernel/debug/kdb/kdb_bp.c b/kernel/debug/kdb/kdb_bp.c
index d7ebb2c79cb8..ec4940146612 100644
--- a/kernel/debug/kdb/kdb_bp.c
+++ b/kernel/debug/kdb/kdb_bp.c
@@ -307,6 +307,15 @@ static int kdb_bp(int argc, const char **argv)
return KDB_BADINT;
/*
+ * This check is redundant (since the breakpoint machinery should
+ * be doing the same check during kdb_bp_install) but gives the
+ * user immediate feedback.
+ */
+ diag = kgdb_validate_break_address(template.bp_addr);
+ if (diag)
+ return diag;
+
+ /*
* Find an empty bp structure to allocate
*/
for (bpno = 0, bp = kdb_breakpoints; bpno < KDB_MAXBPT; bpno++, bp++) {
diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c
index 18e03aba2cfc..1f9f0e47aeda 100644
--- a/kernel/debug/kdb/kdb_bt.c
+++ b/kernel/debug/kdb/kdb_bt.c
@@ -149,14 +149,14 @@ kdb_bt(int argc, const char **argv)
return 0;
}
/* Now the inactive tasks */
- kdb_do_each_thread(g, p) {
+ for_each_process_thread(g, p) {
if (KDB_FLAG(CMD_INTERRUPT))
return 0;
if (task_curr(p))
continue;
if (kdb_bt1(p, mask, btaprompt))
return 0;
- } kdb_while_each_thread(g, p);
+ }
} else if (strcmp(argv[0], "btp") == 0) {
struct task_struct *p;
unsigned long pid;
diff --git a/kernel/debug/kdb/kdb_debugger.c b/kernel/debug/kdb/kdb_debugger.c
index 53a0df6e4d92..0220afda3200 100644
--- a/kernel/debug/kdb/kdb_debugger.c
+++ b/kernel/debug/kdb/kdb_debugger.c
@@ -147,7 +147,6 @@ int kdb_stub(struct kgdb_state *ks)
return DBG_PASS_EVENT;
}
kdb_bp_install(ks->linux_regs);
- dbg_activate_sw_breakpoints();
/* Set the exit state to a single step or a continue */
if (KDB_STATE(DOING_SS))
gdbstub_state(ks, "s");
@@ -167,7 +166,6 @@ int kdb_stub(struct kgdb_state *ks)
* differently vs the gdbstub
*/
kgdb_single_step = 0;
- dbg_deactivate_sw_breakpoints();
return DBG_SWITCH_CPU_EVENT;
}
return kgdb_info[ks->cpu].ret_state;
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index 9d847ab851db..6735ac36b718 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -545,18 +545,18 @@ static int kdb_search_string(char *searched, char *searchfor)
static void kdb_msg_write(const char *msg, int msg_len)
{
struct console *c;
+ const char *cp;
+ int len;
if (msg_len == 0)
return;
- if (dbg_io_ops) {
- const char *cp = msg;
- int len = msg_len;
+ cp = msg;
+ len = msg_len;
- while (len--) {
- dbg_io_ops->write_char(*cp);
- cp++;
- }
+ while (len--) {
+ dbg_io_ops->write_char(*cp);
+ cp++;
}
for_each_console(c) {
@@ -706,12 +706,16 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap)
size_avail = sizeof(kdb_buffer) - len;
goto kdb_print_out;
}
- if (kdb_grepping_flag >= KDB_GREPPING_FLAG_SEARCH)
+ if (kdb_grepping_flag >= KDB_GREPPING_FLAG_SEARCH) {
/*
* This was a interactive search (using '/' at more
- * prompt) and it has completed. Clear the flag.
+ * prompt) and it has completed. Replace the \0 with
+ * its original value to ensure multi-line strings
+ * are handled properly, and return to normal mode.
*/
+ *cphold = replaced_byte;
kdb_grepping_flag = 0;
+ }
/*
* at this point the string is a full line and
* should be printed, up to the null.
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 5c7949061671..930ac1b25ec7 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -2299,10 +2299,10 @@ void kdb_ps_suppressed(void)
if (kdb_task_state(p, mask_I))
++idle;
}
- kdb_do_each_thread(g, p) {
+ for_each_process_thread(g, p) {
if (kdb_task_state(p, mask_M))
++daemon;
- } kdb_while_each_thread(g, p);
+ }
if (idle || daemon) {
if (idle)
kdb_printf("%d idle process%s (state I)%s\n",
@@ -2370,12 +2370,12 @@ static int kdb_ps(int argc, const char **argv)
}
kdb_printf("\n");
/* Now the real tasks */
- kdb_do_each_thread(g, p) {
+ for_each_process_thread(g, p) {
if (KDB_FLAG(CMD_INTERRUPT))
return 0;
if (kdb_task_state(p, mask))
kdb_ps1(p);
- } kdb_while_each_thread(g, p);
+ }
return 0;
}
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h
index 2e296e4a234c..a4281fb99299 100644
--- a/kernel/debug/kdb/kdb_private.h
+++ b/kernel/debug/kdb/kdb_private.h
@@ -230,10 +230,6 @@ extern struct task_struct *kdb_curr_task(int);
#define kdb_task_has_cpu(p) (task_curr(p))
-/* Simplify coexistence with NPTL */
-#define kdb_do_each_thread(g, p) do_each_thread(g, p)
-#define kdb_while_each_thread(g, p) while_each_thread(g, p)
-
#define GFP_KDB (in_interrupt() ? GFP_ATOMIC : GFP_KERNEL)
extern void *debug_kmalloc(size_t size, gfp_t flags);
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index b92d08e65999..06c111544f61 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -16,7 +16,7 @@
#include "direct.h"
/*
- * Most architectures use ZONE_DMA for the first 16 Megabytes, but some use it
+ * Most architectures use ZONE_DMA for the first 16 Megabytes, but some use
* it for entirely different regions. In that case the arch code needs to
* override the variable below for dma-direct to work properly.
*/
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index 145ab11b8318..0a1e20f8d4e8 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -304,7 +304,7 @@ noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
* terminate a grace period, if and only if the timer interrupt is
* not nested into another interrupt.
*
- * Checking for __rcu_is_watching() here would prevent the nesting
+ * Checking for rcu_is_watching() here would prevent the nesting
* interrupt to invoke rcu_irq_enter(). If that nested interrupt is
* the tick then rcu_flavor_sched_clock_irq() would wrongfully
* assume that it is the first interupt and eventually claim
diff --git a/kernel/exit.c b/kernel/exit.c
index 1f51c27bae59..87a2d515de0d 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1474,25 +1474,6 @@ end:
return retval;
}
-static struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags)
-{
- struct fd f;
- struct pid *pid;
-
- f = fdget(fd);
- if (!f.file)
- return ERR_PTR(-EBADF);
-
- pid = pidfd_pid(f.file);
- if (!IS_ERR(pid)) {
- get_pid(pid);
- *flags = f.file->f_flags;
- }
-
- fdput(f);
- return pid;
-}
-
static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
int options, struct rusage *ru)
{
diff --git a/kernel/fork.c b/kernel/fork.c
index 3ca8f1f83fb3..32083db7a2a2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -556,7 +556,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
get_file(file);
if (tmp->vm_flags & VM_DENYWRITE)
- atomic_dec(&inode->i_writecount);
+ put_write_access(inode);
i_mmap_lock_write(mapping);
if (tmp->vm_flags & VM_SHARED)
mapping_allow_writable(mapping);
@@ -2189,7 +2189,7 @@ static __latent_entropy struct task_struct *copy_process(
/*
* Ensure that the cgroup subsystem policies allow the new process to be
- * forked. It should be noted the the new process's css_set can be changed
+ * forked. It should be noted that the new process's css_set can be changed
* between here and cgroup_post_fork() if an organisation operation is in
* progress.
*/
diff --git a/kernel/futex.c b/kernel/futex.c
index a5876694a60e..680854dcf156 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -916,7 +916,7 @@ static inline void exit_pi_state_list(struct task_struct *curr) { }
* [10] Found | Found | task | !=taskTID | 0/1 | Invalid
*
* [1] Indicates that the kernel can acquire the futex atomically. We
- * came came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit.
+ * came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit.
*
* [2] Valid, if TID does not belong to a kernel thread. If no matching
* thread is found then it indicates that the owner TID has died.
diff --git a/kernel/irq/timings.c b/kernel/irq/timings.c
index e960d7ce7bcc..773b6105c4ae 100644
--- a/kernel/irq/timings.c
+++ b/kernel/irq/timings.c
@@ -604,7 +604,7 @@ int irq_timings_alloc(int irq)
/*
* Some platforms can have the same private interrupt per cpu,
- * so this function may be be called several times with the
+ * so this function may be called several times with the
* same interrupt number. Just bail out in case the per cpu
* stat structure is already allocated.
*/
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index e661c61b3d6b..015ef903ce8c 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -19,7 +19,7 @@
#include <linux/cpu.h>
#include <asm/sections.h>
-/* mutex to protect coming/going of the the jump_label table */
+/* mutex to protect coming/going of the jump_label table */
static DEFINE_MUTEX(jump_label_mutex);
void jump_label_lock(void)
diff --git a/kernel/kcsan/encoding.h b/kernel/kcsan/encoding.h
index f03562aaf2eb..1a6db2f797ac 100644
--- a/kernel/kcsan/encoding.h
+++ b/kernel/kcsan/encoding.h
@@ -32,7 +32,7 @@
* 1. different addresses but with the same encoded address race;
* 2. and both map onto the same watchpoint slots;
*
- * Both these are assumed to be very unlikely. However, in case it still happens
+ * Both these are assumed to be very unlikely. However, in case it still
* happens, the report logic will filter out the false positive (see report.c).
*/
#define WATCHPOINT_ADDR_BITS (BITS_PER_LONG-1 - WATCHPOINT_SIZE_BITS)
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index c5e5e5a11535..8798a8183974 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -109,7 +109,7 @@ EXPORT_SYMBOL_GPL(kexec_crash_loaded);
* defined more restrictively in <asm/kexec.h>.
*
* The code for the transition from the current kernel to the
- * the new kernel is placed in the control_code_buffer, whose size
+ * new kernel is placed in the control_code_buffer, whose size
* is given by KEXEC_CONTROL_PAGE_SIZE. In the best case only a single
* page of memory is necessary, but some architectures require more.
* Because this memory must be identity mapped in the transition from
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 84f7316792a7..e21f6b9234f7 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -521,7 +521,7 @@ static int locate_mem_hole_callback(struct resource *res, void *arg)
/* Returning 0 will take to next memory range */
/* Don't use memory that will be detected and handled by a driver. */
- if (res->flags & IORESOURCE_MEM_DRIVER_MANAGED)
+ if (res->flags & IORESOURCE_SYSRAM_DRIVER_MANAGED)
return 0;
if (sz < kbuf->memsz)
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 3edaa380dc7b..e29773c82b70 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -775,7 +775,7 @@ EXPORT_SYMBOL(kthread_create_worker);
/**
* kthread_create_worker_on_cpu - create a kthread worker and bind it
- * it to a given CPU and the associated NUMA node.
+ * to a given CPU and the associated NUMA node.
* @cpu: CPU number
* @flags: flags modifying the default behavior of the worker
* @namefmt: printf-style name for the kthread worker (task).
diff --git a/kernel/livepatch/state.c b/kernel/livepatch/state.c
index 7ee19476de9d..2565d039ade0 100644
--- a/kernel/livepatch/state.c
+++ b/kernel/livepatch/state.c
@@ -55,7 +55,7 @@ EXPORT_SYMBOL_GPL(klp_get_state);
*
* The function can be called only during transition when a new
* livepatch is being enabled or when such a transition is reverted.
- * It is typically called only from from pre/post (un)patch
+ * It is typically called only from pre/post (un)patch
* callbacks.
*
* Return: pointer to the latest struct klp_state from already
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 9cfa5e89cff7..62d215b2e39f 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -566,7 +566,7 @@ static struct lock_torture_ops rwsem_lock_ops = {
#include <linux/percpu-rwsem.h>
static struct percpu_rw_semaphore pcpu_rwsem;
-void torture_percpu_rwsem_init(void)
+static void torture_percpu_rwsem_init(void)
{
BUG_ON(percpu_init_rwsem(&pcpu_rwsem));
}
diff --git a/kernel/panic.c b/kernel/panic.c
index aef8872ba843..396142ee43fd 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -589,6 +589,11 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
if (args)
vprintk(args->fmt, args->args);
+ print_modules();
+
+ if (regs)
+ show_regs(regs);
+
if (panic_on_warn) {
/*
* This thread may hit another WARN() in the panic path.
@@ -600,12 +605,7 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
panic("panic_on_warn set ...\n");
}
- print_modules();
-
- if (regs)
- show_regs(regs);
- else
- dump_stack();
+ dump_stack();
print_irqtrace_events(current);
diff --git a/kernel/pid.c b/kernel/pid.c
index 74ddbff1a6ba..a96bc4bf4f86 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -520,6 +520,25 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
return idr_get_next(&ns->idr, &nr);
}
+struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags)
+{
+ struct fd f;
+ struct pid *pid;
+
+ f = fdget(fd);
+ if (!f.file)
+ return ERR_PTR(-EBADF);
+
+ pid = pidfd_pid(f.file);
+ if (!IS_ERR(pid)) {
+ get_pid(pid);
+ *flags = f.file->f_flags;
+ }
+
+ fdput(f);
+ return pid;
+}
+
/**
* pidfd_create() - Create a new pid file descriptor.
*
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index ac135bd600eb..9de21803a8ae 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -233,7 +233,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
* to pid_ns->child_reaper. Thus pidns->child_reaper needs to
* stay valid until they all go away.
*
- * The code relies on the the pid_ns->child_reaper ignoring
+ * The code relies on the pid_ns->child_reaper ignoring
* SIGCHILD to cause those EXIT_ZOMBIE processes to be
* autoreaped if reparented.
*
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index d25749bce7cf..46b1804c1ddf 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -735,7 +735,7 @@ zone_found:
*/
/*
- * If the zone we wish to scan is the the current zone and the
+ * If the zone we wish to scan is the current zone and the
* pfn falls into the current node then we do not need to walk
* the tree.
*/
diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c
index 2493348a1631..24a960a89aa8 100644
--- a/kernel/printk/printk_ringbuffer.c
+++ b/kernel/printk/printk_ringbuffer.c
@@ -1125,7 +1125,10 @@ static char *data_realloc(struct printk_ringbuffer *rb,
/* If the data block does not increase, there is nothing to do. */
if (head_lpos - next_lpos < DATA_SIZE(data_ring)) {
- blk = to_block(data_ring, blk_lpos->begin);
+ if (wrapped)
+ blk = to_block(data_ring, 0);
+ else
+ blk = to_block(data_ring, blk_lpos->begin);
return &blk->data[0];
}
diff --git a/kernel/range.c b/kernel/range.c
index d84de6766472..56435f96da73 100644
--- a/kernel/range.c
+++ b/kernel/range.c
@@ -2,8 +2,9 @@
/*
* Range add and subtract
*/
-#include <linux/kernel.h>
#include <linux/init.h>
+#include <linux/minmax.h>
+#include <linux/printk.h>
#include <linux/sort.h>
#include <linux/string.h>
#include <linux/range.h>
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index 0ebe15a84985..b71e21f73c40 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -135,10 +135,12 @@ config RCU_FANOUT
config RCU_FANOUT_LEAF
int "Tree-based hierarchical RCU leaf-level fanout value"
- range 2 64 if 64BIT
- range 2 32 if !64BIT
+ range 2 64 if 64BIT && !RCU_STRICT_GRACE_PERIOD
+ range 2 32 if !64BIT && !RCU_STRICT_GRACE_PERIOD
+ range 2 3 if RCU_STRICT_GRACE_PERIOD
depends on TREE_RCU && RCU_EXPERT
- default 16
+ default 16 if !RCU_STRICT_GRACE_PERIOD
+ default 2 if RCU_STRICT_GRACE_PERIOD
help
This option controls the leaf-level fanout of hierarchical
implementations of RCU, and allows trading off cache misses
diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug
index 3cf6132a4bb9..1942c1f1bb65 100644
--- a/kernel/rcu/Kconfig.debug
+++ b/kernel/rcu/Kconfig.debug
@@ -23,7 +23,7 @@ config TORTURE_TEST
tristate
default n
-config RCU_PERF_TEST
+config RCU_SCALE_TEST
tristate "performance tests for RCU"
depends on DEBUG_KERNEL
select TORTURE_TEST
@@ -114,4 +114,19 @@ config RCU_EQS_DEBUG
Say N here if you need ultimate kernel/user switch latencies
Say Y if you are unsure
+config RCU_STRICT_GRACE_PERIOD
+ bool "Provide debug RCU implementation with short grace periods"
+ depends on DEBUG_KERNEL && RCU_EXPERT
+ default n
+ select PREEMPT_COUNT if PREEMPT=n
+ help
+ Select this option to build an RCU variant that is strict about
+ grace periods, making them as short as it can. This limits
+ scalability, destroys real-time response, degrades battery
+ lifetime and kills performance. Don't try this on large
+ machines, as in systems with more than about 10 or 20 CPUs.
+ But in conjunction with tools like KASAN, it can be helpful
+ when looking for certain types of RCU usage bugs, for example,
+ too-short RCU read-side critical sections.
+
endmenu # "RCU Debugging"
diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile
index 95f5117ef8da..0cfb009a99b9 100644
--- a/kernel/rcu/Makefile
+++ b/kernel/rcu/Makefile
@@ -11,7 +11,7 @@ obj-y += update.o sync.o
obj-$(CONFIG_TREE_SRCU) += srcutree.o
obj-$(CONFIG_TINY_SRCU) += srcutiny.o
obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
-obj-$(CONFIG_RCU_PERF_TEST) += rcuperf.o
+obj-$(CONFIG_RCU_SCALE_TEST) += rcuscale.o
obj-$(CONFIG_RCU_REF_SCALE_TEST) += refscale.o
obj-$(CONFIG_TREE_RCU) += tree.o
obj-$(CONFIG_TINY_RCU) += tiny.o
diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c
index 9a0f66133b4b..2d2a6b6b9dfb 100644
--- a/kernel/rcu/rcu_segcblist.c
+++ b/kernel/rcu/rcu_segcblist.c
@@ -475,8 +475,16 @@ bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq)
* Also advance to the oldest segment of callbacks whose
* ->gp_seq[] completion is at or after that passed in via "seq",
* skipping any empty segments.
+ *
+ * Note that segment "i" (and any lower-numbered segments
+ * containing older callbacks) will be unaffected, and their
+ * grace-period numbers remain unchanged. For example, if i ==
+ * WAIT_TAIL, then neither WAIT_TAIL nor DONE_TAIL will be touched.
+ * Instead, the CBs in NEXT_TAIL will be merged with those in
+ * NEXT_READY_TAIL and the grace-period number of NEXT_READY_TAIL
+ * would be updated. NEXT_TAIL would then be empty.
*/
- if (++i >= RCU_NEXT_TAIL)
+ if (rcu_segcblist_restempty(rsclp, i) || ++i >= RCU_NEXT_TAIL)
return false;
/*
diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuscale.c
index 21448d3374e2..2819b95479af 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuscale.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0+
/*
- * Read-Copy Update module-based performance-test facility
+ * Read-Copy Update module-based scalability-test facility
*
* Copyright (C) IBM Corporation, 2015
*
@@ -44,13 +44,13 @@
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com>");
-#define PERF_FLAG "-perf:"
-#define PERFOUT_STRING(s) \
- pr_alert("%s" PERF_FLAG " %s\n", perf_type, s)
-#define VERBOSE_PERFOUT_STRING(s) \
- do { if (verbose) pr_alert("%s" PERF_FLAG " %s\n", perf_type, s); } while (0)
-#define VERBOSE_PERFOUT_ERRSTRING(s) \
- do { if (verbose) pr_alert("%s" PERF_FLAG "!!! %s\n", perf_type, s); } while (0)
+#define SCALE_FLAG "-scale:"
+#define SCALEOUT_STRING(s) \
+ pr_alert("%s" SCALE_FLAG " %s\n", scale_type, s)
+#define VERBOSE_SCALEOUT_STRING(s) \
+ do { if (verbose) pr_alert("%s" SCALE_FLAG " %s\n", scale_type, s); } while (0)
+#define VERBOSE_SCALEOUT_ERRSTRING(s) \
+ do { if (verbose) pr_alert("%s" SCALE_FLAG "!!! %s\n", scale_type, s); } while (0)
/*
* The intended use cases for the nreaders and nwriters module parameters
@@ -61,25 +61,25 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com>");
* nr_cpus for a mixed reader/writer test.
*
* 2. Specify the nr_cpus kernel boot parameter, but set
- * rcuperf.nreaders to zero. This will set nwriters to the
+ * rcuscale.nreaders to zero. This will set nwriters to the
* value specified by nr_cpus for an update-only test.
*
* 3. Specify the nr_cpus kernel boot parameter, but set
- * rcuperf.nwriters to zero. This will set nreaders to the
+ * rcuscale.nwriters to zero. This will set nreaders to the
* value specified by nr_cpus for a read-only test.
*
* Various other use cases may of course be specified.
*
* Note that this test's readers are intended only as a test load for
- * the writers. The reader performance statistics will be overly
+ * the writers. The reader scalability statistics will be overly
* pessimistic due to the per-critical-section interrupt disabling,
* test-end checks, and the pair of calls through pointers.
*/
#ifdef MODULE
-# define RCUPERF_SHUTDOWN 0
+# define RCUSCALE_SHUTDOWN 0
#else
-# define RCUPERF_SHUTDOWN 1
+# define RCUSCALE_SHUTDOWN 1
#endif
torture_param(bool, gp_async, false, "Use asynchronous GP wait primitives");
@@ -88,16 +88,16 @@ torture_param(bool, gp_exp, false, "Use expedited GP wait primitives");
torture_param(int, holdoff, 10, "Holdoff time before test start (s)");
torture_param(int, nreaders, -1, "Number of RCU reader threads");
torture_param(int, nwriters, -1, "Number of RCU updater threads");
-torture_param(bool, shutdown, RCUPERF_SHUTDOWN,
- "Shutdown at end of performance tests.");
+torture_param(bool, shutdown, RCUSCALE_SHUTDOWN,
+ "Shutdown at end of scalability tests.");
torture_param(int, verbose, 1, "Enable verbose debugging printk()s");
torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable");
-torture_param(int, kfree_rcu_test, 0, "Do we run a kfree_rcu() perf test?");
+torture_param(int, kfree_rcu_test, 0, "Do we run a kfree_rcu() scale test?");
torture_param(int, kfree_mult, 1, "Multiple of kfree_obj size to allocate.");
-static char *perf_type = "rcu";
-module_param(perf_type, charp, 0444);
-MODULE_PARM_DESC(perf_type, "Type of RCU to performance-test (rcu, srcu, ...)");
+static char *scale_type = "rcu";
+module_param(scale_type, charp, 0444);
+MODULE_PARM_DESC(scale_type, "Type of RCU to scalability-test (rcu, srcu, ...)");
static int nrealreaders;
static int nrealwriters;
@@ -107,12 +107,12 @@ static struct task_struct *shutdown_task;
static u64 **writer_durations;
static int *writer_n_durations;
-static atomic_t n_rcu_perf_reader_started;
-static atomic_t n_rcu_perf_writer_started;
-static atomic_t n_rcu_perf_writer_finished;
+static atomic_t n_rcu_scale_reader_started;
+static atomic_t n_rcu_scale_writer_started;
+static atomic_t n_rcu_scale_writer_finished;
static wait_queue_head_t shutdown_wq;
-static u64 t_rcu_perf_writer_started;
-static u64 t_rcu_perf_writer_finished;
+static u64 t_rcu_scale_writer_started;
+static u64 t_rcu_scale_writer_finished;
static unsigned long b_rcu_gp_test_started;
static unsigned long b_rcu_gp_test_finished;
static DEFINE_PER_CPU(atomic_t, n_async_inflight);
@@ -124,7 +124,7 @@ static DEFINE_PER_CPU(atomic_t, n_async_inflight);
* Operations vector for selecting different types of tests.
*/
-struct rcu_perf_ops {
+struct rcu_scale_ops {
int ptype;
void (*init)(void);
void (*cleanup)(void);
@@ -140,19 +140,19 @@ struct rcu_perf_ops {
const char *name;
};
-static struct rcu_perf_ops *cur_ops;
+static struct rcu_scale_ops *cur_ops;
/*
- * Definitions for rcu perf testing.
+ * Definitions for rcu scalability testing.
*/
-static int rcu_perf_read_lock(void) __acquires(RCU)
+static int rcu_scale_read_lock(void) __acquires(RCU)
{
rcu_read_lock();
return 0;
}
-static void rcu_perf_read_unlock(int idx) __releases(RCU)
+static void rcu_scale_read_unlock(int idx) __releases(RCU)
{
rcu_read_unlock();
}
@@ -162,15 +162,15 @@ static unsigned long __maybe_unused rcu_no_completed(void)
return 0;
}
-static void rcu_sync_perf_init(void)
+static void rcu_sync_scale_init(void)
{
}
-static struct rcu_perf_ops rcu_ops = {
+static struct rcu_scale_ops rcu_ops = {
.ptype = RCU_FLAVOR,
- .init = rcu_sync_perf_init,
- .readlock = rcu_perf_read_lock,
- .readunlock = rcu_perf_read_unlock,
+ .init = rcu_sync_scale_init,
+ .readlock = rcu_scale_read_lock,
+ .readunlock = rcu_scale_read_unlock,
.get_gp_seq = rcu_get_gp_seq,
.gp_diff = rcu_seq_diff,
.exp_completed = rcu_exp_batches_completed,
@@ -182,23 +182,23 @@ static struct rcu_perf_ops rcu_ops = {
};
/*
- * Definitions for srcu perf testing.
+ * Definitions for srcu scalability testing.
*/
-DEFINE_STATIC_SRCU(srcu_ctl_perf);
-static struct srcu_struct *srcu_ctlp = &srcu_ctl_perf;
+DEFINE_STATIC_SRCU(srcu_ctl_scale);
+static struct srcu_struct *srcu_ctlp = &srcu_ctl_scale;
-static int srcu_perf_read_lock(void) __acquires(srcu_ctlp)
+static int srcu_scale_read_lock(void) __acquires(srcu_ctlp)
{
return srcu_read_lock(srcu_ctlp);
}
-static void srcu_perf_read_unlock(int idx) __releases(srcu_ctlp)
+static void srcu_scale_read_unlock(int idx) __releases(srcu_ctlp)
{
srcu_read_unlock(srcu_ctlp, idx);
}
-static unsigned long srcu_perf_completed(void)
+static unsigned long srcu_scale_completed(void)
{
return srcu_batches_completed(srcu_ctlp);
}
@@ -213,78 +213,78 @@ static void srcu_rcu_barrier(void)
srcu_barrier(srcu_ctlp);
}
-static void srcu_perf_synchronize(void)
+static void srcu_scale_synchronize(void)
{
synchronize_srcu(srcu_ctlp);
}
-static void srcu_perf_synchronize_expedited(void)
+static void srcu_scale_synchronize_expedited(void)
{
synchronize_srcu_expedited(srcu_ctlp);
}
-static struct rcu_perf_ops srcu_ops = {
+static struct rcu_scale_ops srcu_ops = {
.ptype = SRCU_FLAVOR,
- .init = rcu_sync_perf_init,
- .readlock = srcu_perf_read_lock,
- .readunlock = srcu_perf_read_unlock,
- .get_gp_seq = srcu_perf_completed,
+ .init = rcu_sync_scale_init,
+ .readlock = srcu_scale_read_lock,
+ .readunlock = srcu_scale_read_unlock,
+ .get_gp_seq = srcu_scale_completed,
.gp_diff = rcu_seq_diff,
- .exp_completed = srcu_perf_completed,
+ .exp_completed = srcu_scale_completed,
.async = srcu_call_rcu,
.gp_barrier = srcu_rcu_barrier,
- .sync = srcu_perf_synchronize,
- .exp_sync = srcu_perf_synchronize_expedited,
+ .sync = srcu_scale_synchronize,
+ .exp_sync = srcu_scale_synchronize_expedited,
.name = "srcu"
};
static struct srcu_struct srcud;
-static void srcu_sync_perf_init(void)
+static void srcu_sync_scale_init(void)
{
srcu_ctlp = &srcud;
init_srcu_struct(srcu_ctlp);
}
-static void srcu_sync_perf_cleanup(void)
+static void srcu_sync_scale_cleanup(void)
{
cleanup_srcu_struct(srcu_ctlp);
}
-static struct rcu_perf_ops srcud_ops = {
+static struct rcu_scale_ops srcud_ops = {
.ptype = SRCU_FLAVOR,
- .init = srcu_sync_perf_init,
- .cleanup = srcu_sync_perf_cleanup,
- .readlock = srcu_perf_read_lock,
- .readunlock = srcu_perf_read_unlock,
- .get_gp_seq = srcu_perf_completed,
+ .init = srcu_sync_scale_init,
+ .cleanup = srcu_sync_scale_cleanup,
+ .readlock = srcu_scale_read_lock,
+ .readunlock = srcu_scale_read_unlock,
+ .get_gp_seq = srcu_scale_completed,
.gp_diff = rcu_seq_diff,
- .exp_completed = srcu_perf_completed,
+ .exp_completed = srcu_scale_completed,
.async = srcu_call_rcu,
.gp_barrier = srcu_rcu_barrier,
- .sync = srcu_perf_synchronize,
- .exp_sync = srcu_perf_synchronize_expedited,
+ .sync = srcu_scale_synchronize,
+ .exp_sync = srcu_scale_synchronize_expedited,
.name = "srcud"
};
/*
- * Definitions for RCU-tasks perf testing.
+ * Definitions for RCU-tasks scalability testing.
*/
-static int tasks_perf_read_lock(void)
+static int tasks_scale_read_lock(void)
{
return 0;
}
-static void tasks_perf_read_unlock(int idx)
+static void tasks_scale_read_unlock(int idx)
{
}
-static struct rcu_perf_ops tasks_ops = {
+static struct rcu_scale_ops tasks_ops = {
.ptype = RCU_TASKS_FLAVOR,
- .init = rcu_sync_perf_init,
- .readlock = tasks_perf_read_lock,
- .readunlock = tasks_perf_read_unlock,
+ .init = rcu_sync_scale_init,
+ .readlock = tasks_scale_read_lock,
+ .readunlock = tasks_scale_read_unlock,
.get_gp_seq = rcu_no_completed,
.gp_diff = rcu_seq_diff,
.async = call_rcu_tasks,
@@ -294,7 +294,7 @@ static struct rcu_perf_ops tasks_ops = {
.name = "tasks"
};
-static unsigned long rcuperf_seq_diff(unsigned long new, unsigned long old)
+static unsigned long rcuscale_seq_diff(unsigned long new, unsigned long old)
{
if (!cur_ops->gp_diff)
return new - old;
@@ -302,60 +302,60 @@ static unsigned long rcuperf_seq_diff(unsigned long new, unsigned long old)
}
/*
- * If performance tests complete, wait for shutdown to commence.
+ * If scalability tests complete, wait for shutdown to commence.
*/
-static void rcu_perf_wait_shutdown(void)
+static void rcu_scale_wait_shutdown(void)
{
cond_resched_tasks_rcu_qs();
- if (atomic_read(&n_rcu_perf_writer_finished) < nrealwriters)
+ if (atomic_read(&n_rcu_scale_writer_finished) < nrealwriters)
return;
while (!torture_must_stop())
schedule_timeout_uninterruptible(1);
}
/*
- * RCU perf reader kthread. Repeatedly does empty RCU read-side critical
- * section, minimizing update-side interference. However, the point of
- * this test is not to evaluate reader performance, but instead to serve
- * as a test load for update-side performance testing.
+ * RCU scalability reader kthread. Repeatedly does empty RCU read-side
+ * critical section, minimizing update-side interference. However, the
+ * point of this test is not to evaluate reader scalability, but instead
+ * to serve as a test load for update-side scalability testing.
*/
static int
-rcu_perf_reader(void *arg)
+rcu_scale_reader(void *arg)
{
unsigned long flags;
int idx;
long me = (long)arg;
- VERBOSE_PERFOUT_STRING("rcu_perf_reader task started");
+ VERBOSE_SCALEOUT_STRING("rcu_scale_reader task started");
set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids));
set_user_nice(current, MAX_NICE);
- atomic_inc(&n_rcu_perf_reader_started);
+ atomic_inc(&n_rcu_scale_reader_started);
do {
local_irq_save(flags);
idx = cur_ops->readlock();
cur_ops->readunlock(idx);
local_irq_restore(flags);
- rcu_perf_wait_shutdown();
+ rcu_scale_wait_shutdown();
} while (!torture_must_stop());
- torture_kthread_stopping("rcu_perf_reader");
+ torture_kthread_stopping("rcu_scale_reader");
return 0;
}
/*
- * Callback function for asynchronous grace periods from rcu_perf_writer().
+ * Callback function for asynchronous grace periods from rcu_scale_writer().
*/
-static void rcu_perf_async_cb(struct rcu_head *rhp)
+static void rcu_scale_async_cb(struct rcu_head *rhp)
{
atomic_dec(this_cpu_ptr(&n_async_inflight));
kfree(rhp);
}
/*
- * RCU perf writer kthread. Repeatedly does a grace period.
+ * RCU scale writer kthread. Repeatedly does a grace period.
*/
static int
-rcu_perf_writer(void *arg)
+rcu_scale_writer(void *arg)
{
int i = 0;
int i_max;
@@ -366,7 +366,7 @@ rcu_perf_writer(void *arg)
u64 *wdp;
u64 *wdpp = writer_durations[me];
- VERBOSE_PERFOUT_STRING("rcu_perf_writer task started");
+ VERBOSE_SCALEOUT_STRING("rcu_scale_writer task started");
WARN_ON(!wdpp);
set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids));
sched_set_fifo_low(current);
@@ -383,8 +383,8 @@ rcu_perf_writer(void *arg)
schedule_timeout_uninterruptible(1);
t = ktime_get_mono_fast_ns();
- if (atomic_inc_return(&n_rcu_perf_writer_started) >= nrealwriters) {
- t_rcu_perf_writer_started = t;
+ if (atomic_inc_return(&n_rcu_scale_writer_started) >= nrealwriters) {
+ t_rcu_scale_writer_started = t;
if (gp_exp) {
b_rcu_gp_test_started =
cur_ops->exp_completed() / 2;
@@ -404,7 +404,7 @@ retry:
rhp = kmalloc(sizeof(*rhp), GFP_KERNEL);
if (rhp && atomic_read(this_cpu_ptr(&n_async_inflight)) < gp_async_max) {
atomic_inc(this_cpu_ptr(&n_async_inflight));
- cur_ops->async(rhp, rcu_perf_async_cb);
+ cur_ops->async(rhp, rcu_scale_async_cb);
rhp = NULL;
} else if (!kthread_should_stop()) {
cur_ops->gp_barrier();
@@ -421,19 +421,19 @@ retry:
*wdp = t - *wdp;
i_max = i;
if (!started &&
- atomic_read(&n_rcu_perf_writer_started) >= nrealwriters)
+ atomic_read(&n_rcu_scale_writer_started) >= nrealwriters)
started = true;
if (!done && i >= MIN_MEAS) {
done = true;
sched_set_normal(current, 0);
- pr_alert("%s%s rcu_perf_writer %ld has %d measurements\n",
- perf_type, PERF_FLAG, me, MIN_MEAS);
- if (atomic_inc_return(&n_rcu_perf_writer_finished) >=
+ pr_alert("%s%s rcu_scale_writer %ld has %d measurements\n",
+ scale_type, SCALE_FLAG, me, MIN_MEAS);
+ if (atomic_inc_return(&n_rcu_scale_writer_finished) >=
nrealwriters) {
schedule_timeout_interruptible(10);
rcu_ftrace_dump(DUMP_ALL);
- PERFOUT_STRING("Test complete");
- t_rcu_perf_writer_finished = t;
+ SCALEOUT_STRING("Test complete");
+ t_rcu_scale_writer_finished = t;
if (gp_exp) {
b_rcu_gp_test_finished =
cur_ops->exp_completed() / 2;
@@ -448,30 +448,30 @@ retry:
}
}
if (done && !alldone &&
- atomic_read(&n_rcu_perf_writer_finished) >= nrealwriters)
+ atomic_read(&n_rcu_scale_writer_finished) >= nrealwriters)
alldone = true;
if (started && !alldone && i < MAX_MEAS - 1)
i++;
- rcu_perf_wait_shutdown();
+ rcu_scale_wait_shutdown();
} while (!torture_must_stop());
if (gp_async) {
cur_ops->gp_barrier();
}
writer_n_durations[me] = i_max;
- torture_kthread_stopping("rcu_perf_writer");
+ torture_kthread_stopping("rcu_scale_writer");
return 0;
}
static void
-rcu_perf_print_module_parms(struct rcu_perf_ops *cur_ops, const char *tag)
+rcu_scale_print_module_parms(struct rcu_scale_ops *cur_ops, const char *tag)
{
- pr_alert("%s" PERF_FLAG
+ pr_alert("%s" SCALE_FLAG
"--- %s: nreaders=%d nwriters=%d verbose=%d shutdown=%d\n",
- perf_type, tag, nrealreaders, nrealwriters, verbose, shutdown);
+ scale_type, tag, nrealreaders, nrealwriters, verbose, shutdown);
}
static void
-rcu_perf_cleanup(void)
+rcu_scale_cleanup(void)
{
int i;
int j;
@@ -484,11 +484,11 @@ rcu_perf_cleanup(void)
* during the mid-boot phase, so have to wait till the end.
*/
if (rcu_gp_is_expedited() && !rcu_gp_is_normal() && !gp_exp)
- VERBOSE_PERFOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!");
+ VERBOSE_SCALEOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!");
if (rcu_gp_is_normal() && gp_exp)
- VERBOSE_PERFOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!");
+ VERBOSE_SCALEOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!");
if (gp_exp && gp_async)
- VERBOSE_PERFOUT_ERRSTRING("No expedited async GPs, so went with async!");
+ VERBOSE_SCALEOUT_ERRSTRING("No expedited async GPs, so went with async!");
if (torture_cleanup_begin())
return;
@@ -499,30 +499,30 @@ rcu_perf_cleanup(void)
if (reader_tasks) {
for (i = 0; i < nrealreaders; i++)
- torture_stop_kthread(rcu_perf_reader,
+ torture_stop_kthread(rcu_scale_reader,
reader_tasks[i]);
kfree(reader_tasks);
}
if (writer_tasks) {
for (i = 0; i < nrealwriters; i++) {
- torture_stop_kthread(rcu_perf_writer,
+ torture_stop_kthread(rcu_scale_writer,
writer_tasks[i]);
if (!writer_n_durations)
continue;
j = writer_n_durations[i];
pr_alert("%s%s writer %d gps: %d\n",
- perf_type, PERF_FLAG, i, j);
+ scale_type, SCALE_FLAG, i, j);
ngps += j;
}
pr_alert("%s%s start: %llu end: %llu duration: %llu gps: %d batches: %ld\n",
- perf_type, PERF_FLAG,
- t_rcu_perf_writer_started, t_rcu_perf_writer_finished,
- t_rcu_perf_writer_finished -
- t_rcu_perf_writer_started,
+ scale_type, SCALE_FLAG,
+ t_rcu_scale_writer_started, t_rcu_scale_writer_finished,
+ t_rcu_scale_writer_finished -
+ t_rcu_scale_writer_started,
ngps,
- rcuperf_seq_diff(b_rcu_gp_test_finished,
- b_rcu_gp_test_started));
+ rcuscale_seq_diff(b_rcu_gp_test_finished,
+ b_rcu_gp_test_started));
for (i = 0; i < nrealwriters; i++) {
if (!writer_durations)
break;
@@ -534,7 +534,7 @@ rcu_perf_cleanup(void)
for (j = 0; j <= writer_n_durations[i]; j++) {
wdp = &wdpp[j];
pr_alert("%s%s %4d writer-duration: %5d %llu\n",
- perf_type, PERF_FLAG,
+ scale_type, SCALE_FLAG,
i, j, *wdp);
if (j % 100 == 0)
schedule_timeout_uninterruptible(1);
@@ -573,22 +573,22 @@ static int compute_real(int n)
}
/*
- * RCU perf shutdown kthread. Just waits to be awakened, then shuts
+ * RCU scalability shutdown kthread. Just waits to be awakened, then shuts
* down system.
*/
static int
-rcu_perf_shutdown(void *arg)
+rcu_scale_shutdown(void *arg)
{
wait_event(shutdown_wq,
- atomic_read(&n_rcu_perf_writer_finished) >= nrealwriters);
+ atomic_read(&n_rcu_scale_writer_finished) >= nrealwriters);
smp_mb(); /* Wake before output. */
- rcu_perf_cleanup();
+ rcu_scale_cleanup();
kernel_power_off();
return -EINVAL;
}
/*
- * kfree_rcu() performance tests: Start a kfree_rcu() loop on all CPUs for number
+ * kfree_rcu() scalability tests: Start a kfree_rcu() loop on all CPUs for number
* of iterations and measure total time and number of GP for all iterations to complete.
*/
@@ -598,8 +598,8 @@ torture_param(int, kfree_loops, 10, "Number of loops doing kfree_alloc_num alloc
static struct task_struct **kfree_reader_tasks;
static int kfree_nrealthreads;
-static atomic_t n_kfree_perf_thread_started;
-static atomic_t n_kfree_perf_thread_ended;
+static atomic_t n_kfree_scale_thread_started;
+static atomic_t n_kfree_scale_thread_ended;
struct kfree_obj {
char kfree_obj[8];
@@ -607,7 +607,7 @@ struct kfree_obj {
};
static int
-kfree_perf_thread(void *arg)
+kfree_scale_thread(void *arg)
{
int i, loop = 0;
long me = (long)arg;
@@ -615,13 +615,13 @@ kfree_perf_thread(void *arg)
u64 start_time, end_time;
long long mem_begin, mem_during = 0;
- VERBOSE_PERFOUT_STRING("kfree_perf_thread task started");
+ VERBOSE_SCALEOUT_STRING("kfree_scale_thread task started");
set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids));
set_user_nice(current, MAX_NICE);
start_time = ktime_get_mono_fast_ns();
- if (atomic_inc_return(&n_kfree_perf_thread_started) >= kfree_nrealthreads) {
+ if (atomic_inc_return(&n_kfree_scale_thread_started) >= kfree_nrealthreads) {
if (gp_exp)
b_rcu_gp_test_started = cur_ops->exp_completed() / 2;
else
@@ -646,7 +646,7 @@ kfree_perf_thread(void *arg)
cond_resched();
} while (!torture_must_stop() && ++loop < kfree_loops);
- if (atomic_inc_return(&n_kfree_perf_thread_ended) >= kfree_nrealthreads) {
+ if (atomic_inc_return(&n_kfree_scale_thread_ended) >= kfree_nrealthreads) {
end_time = ktime_get_mono_fast_ns();
if (gp_exp)
@@ -656,7 +656,7 @@ kfree_perf_thread(void *arg)
pr_alert("Total time taken by all kfree'ers: %llu ns, loops: %d, batches: %ld, memory footprint: %lldMB\n",
(unsigned long long)(end_time - start_time), kfree_loops,
- rcuperf_seq_diff(b_rcu_gp_test_finished, b_rcu_gp_test_started),
+ rcuscale_seq_diff(b_rcu_gp_test_finished, b_rcu_gp_test_started),
(mem_begin - mem_during) >> (20 - PAGE_SHIFT));
if (shutdown) {
@@ -665,12 +665,12 @@ kfree_perf_thread(void *arg)
}
}
- torture_kthread_stopping("kfree_perf_thread");
+ torture_kthread_stopping("kfree_scale_thread");
return 0;
}
static void
-kfree_perf_cleanup(void)
+kfree_scale_cleanup(void)
{
int i;
@@ -679,7 +679,7 @@ kfree_perf_cleanup(void)
if (kfree_reader_tasks) {
for (i = 0; i < kfree_nrealthreads; i++)
- torture_stop_kthread(kfree_perf_thread,
+ torture_stop_kthread(kfree_scale_thread,
kfree_reader_tasks[i]);
kfree(kfree_reader_tasks);
}
@@ -691,20 +691,20 @@ kfree_perf_cleanup(void)
* shutdown kthread. Just waits to be awakened, then shuts down system.
*/
static int
-kfree_perf_shutdown(void *arg)
+kfree_scale_shutdown(void *arg)
{
wait_event(shutdown_wq,
- atomic_read(&n_kfree_perf_thread_ended) >= kfree_nrealthreads);
+ atomic_read(&n_kfree_scale_thread_ended) >= kfree_nrealthreads);
smp_mb(); /* Wake before output. */
- kfree_perf_cleanup();
+ kfree_scale_cleanup();
kernel_power_off();
return -EINVAL;
}
static int __init
-kfree_perf_init(void)
+kfree_scale_init(void)
{
long i;
int firsterr = 0;
@@ -713,7 +713,7 @@ kfree_perf_init(void)
/* Start up the kthreads. */
if (shutdown) {
init_waitqueue_head(&shutdown_wq);
- firsterr = torture_create_kthread(kfree_perf_shutdown, NULL,
+ firsterr = torture_create_kthread(kfree_scale_shutdown, NULL,
shutdown_task);
if (firsterr)
goto unwind;
@@ -730,13 +730,13 @@ kfree_perf_init(void)
}
for (i = 0; i < kfree_nrealthreads; i++) {
- firsterr = torture_create_kthread(kfree_perf_thread, (void *)i,
+ firsterr = torture_create_kthread(kfree_scale_thread, (void *)i,
kfree_reader_tasks[i]);
if (firsterr)
goto unwind;
}
- while (atomic_read(&n_kfree_perf_thread_started) < kfree_nrealthreads)
+ while (atomic_read(&n_kfree_scale_thread_started) < kfree_nrealthreads)
schedule_timeout_uninterruptible(1);
torture_init_end();
@@ -744,35 +744,35 @@ kfree_perf_init(void)
unwind:
torture_init_end();
- kfree_perf_cleanup();
+ kfree_scale_cleanup();
return firsterr;
}
static int __init
-rcu_perf_init(void)
+rcu_scale_init(void)
{
long i;
int firsterr = 0;
- static struct rcu_perf_ops *perf_ops[] = {
+ static struct rcu_scale_ops *scale_ops[] = {
&rcu_ops, &srcu_ops, &srcud_ops, &tasks_ops,
};
- if (!torture_init_begin(perf_type, verbose))
+ if (!torture_init_begin(scale_type, verbose))
return -EBUSY;
- /* Process args and tell the world that the perf'er is on the job. */
- for (i = 0; i < ARRAY_SIZE(perf_ops); i++) {
- cur_ops = perf_ops[i];
- if (strcmp(perf_type, cur_ops->name) == 0)
+ /* Process args and announce that the scalability'er is on the job. */
+ for (i = 0; i < ARRAY_SIZE(scale_ops); i++) {
+ cur_ops = scale_ops[i];
+ if (strcmp(scale_type, cur_ops->name) == 0)
break;
}
- if (i == ARRAY_SIZE(perf_ops)) {
- pr_alert("rcu-perf: invalid perf type: \"%s\"\n", perf_type);
- pr_alert("rcu-perf types:");
- for (i = 0; i < ARRAY_SIZE(perf_ops); i++)
- pr_cont(" %s", perf_ops[i]->name);
+ if (i == ARRAY_SIZE(scale_ops)) {
+ pr_alert("rcu-scale: invalid scale type: \"%s\"\n", scale_type);
+ pr_alert("rcu-scale types:");
+ for (i = 0; i < ARRAY_SIZE(scale_ops); i++)
+ pr_cont(" %s", scale_ops[i]->name);
pr_cont("\n");
- WARN_ON(!IS_MODULE(CONFIG_RCU_PERF_TEST));
+ WARN_ON(!IS_MODULE(CONFIG_RCU_SCALE_TEST));
firsterr = -EINVAL;
cur_ops = NULL;
goto unwind;
@@ -781,20 +781,20 @@ rcu_perf_init(void)
cur_ops->init();
if (kfree_rcu_test)
- return kfree_perf_init();
+ return kfree_scale_init();
nrealwriters = compute_real(nwriters);
nrealreaders = compute_real(nreaders);
- atomic_set(&n_rcu_perf_reader_started, 0);
- atomic_set(&n_rcu_perf_writer_started, 0);
- atomic_set(&n_rcu_perf_writer_finished, 0);
- rcu_perf_print_module_parms(cur_ops, "Start of test");
+ atomic_set(&n_rcu_scale_reader_started, 0);
+ atomic_set(&n_rcu_scale_writer_started, 0);
+ atomic_set(&n_rcu_scale_writer_finished, 0);
+ rcu_scale_print_module_parms(cur_ops, "Start of test");
/* Start up the kthreads. */
if (shutdown) {
init_waitqueue_head(&shutdown_wq);
- firsterr = torture_create_kthread(rcu_perf_shutdown, NULL,
+ firsterr = torture_create_kthread(rcu_scale_shutdown, NULL,
shutdown_task);
if (firsterr)
goto unwind;
@@ -803,17 +803,17 @@ rcu_perf_init(void)
reader_tasks = kcalloc(nrealreaders, sizeof(reader_tasks[0]),
GFP_KERNEL);
if (reader_tasks == NULL) {
- VERBOSE_PERFOUT_ERRSTRING("out of memory");
+ VERBOSE_SCALEOUT_ERRSTRING("out of memory");
firsterr = -ENOMEM;
goto unwind;
}
for (i = 0; i < nrealreaders; i++) {
- firsterr = torture_create_kthread(rcu_perf_reader, (void *)i,
+ firsterr = torture_create_kthread(rcu_scale_reader, (void *)i,
reader_tasks[i]);
if (firsterr)
goto unwind;
}
- while (atomic_read(&n_rcu_perf_reader_started) < nrealreaders)
+ while (atomic_read(&n_rcu_scale_reader_started) < nrealreaders)
schedule_timeout_uninterruptible(1);
writer_tasks = kcalloc(nrealwriters, sizeof(reader_tasks[0]),
GFP_KERNEL);
@@ -823,7 +823,7 @@ rcu_perf_init(void)
kcalloc(nrealwriters, sizeof(*writer_n_durations),
GFP_KERNEL);
if (!writer_tasks || !writer_durations || !writer_n_durations) {
- VERBOSE_PERFOUT_ERRSTRING("out of memory");
+ VERBOSE_SCALEOUT_ERRSTRING("out of memory");
firsterr = -ENOMEM;
goto unwind;
}
@@ -835,7 +835,7 @@ rcu_perf_init(void)
firsterr = -ENOMEM;
goto unwind;
}
- firsterr = torture_create_kthread(rcu_perf_writer, (void *)i,
+ firsterr = torture_create_kthread(rcu_scale_writer, (void *)i,
writer_tasks[i]);
if (firsterr)
goto unwind;
@@ -845,9 +845,9 @@ rcu_perf_init(void)
unwind:
torture_init_end();
- rcu_perf_cleanup();
+ rcu_scale_cleanup();
return firsterr;
}
-module_init(rcu_perf_init);
-module_exit(rcu_perf_cleanup);
+module_init(rcu_scale_init);
+module_exit(rcu_scale_cleanup);
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index f453bf8d2f1e..916ea4f66e4b 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -52,19 +52,6 @@
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com> and Josh Triplett <josh@joshtriplett.org>");
-#ifndef data_race
-#define data_race(expr) \
- ({ \
- expr; \
- })
-#endif
-#ifndef ASSERT_EXCLUSIVE_WRITER
-#define ASSERT_EXCLUSIVE_WRITER(var) do { } while (0)
-#endif
-#ifndef ASSERT_EXCLUSIVE_ACCESS
-#define ASSERT_EXCLUSIVE_ACCESS(var) do { } while (0)
-#endif
-
/* Bits for ->extendables field, extendables param, and related definitions. */
#define RCUTORTURE_RDR_SHIFT 8 /* Put SRCU index in upper bits. */
#define RCUTORTURE_RDR_MASK ((1 << RCUTORTURE_RDR_SHIFT) - 1)
@@ -100,6 +87,7 @@ torture_param(bool, gp_normal, false,
"Use normal (non-expedited) GP wait primitives");
torture_param(bool, gp_sync, false, "Use synchronous GP wait primitives");
torture_param(int, irqreader, 1, "Allow RCU readers from irq handlers");
+torture_param(int, leakpointer, 0, "Leak pointer dereferences from readers");
torture_param(int, n_barrier_cbs, 0,
"# of callbacks/kthreads for barrier testing");
torture_param(int, nfakewriters, 4, "Number of RCU fake writer threads");
@@ -185,6 +173,7 @@ static long n_barrier_successes; /* did rcu_barrier test succeed? */
static unsigned long n_read_exits;
static struct list_head rcu_torture_removed;
static unsigned long shutdown_jiffies;
+static unsigned long start_gp_seq;
static int rcu_torture_writer_state;
#define RTWS_FIXED_DELAY 0
@@ -1413,6 +1402,9 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp)
preempt_enable();
rcutorture_one_extend(&readstate, 0, trsp, rtrsp);
WARN_ON_ONCE(readstate & RCUTORTURE_RDR_MASK);
+ // This next splat is expected behavior if leakpointer, especially
+ // for CONFIG_RCU_STRICT_GRACE_PERIOD=y kernels.
+ WARN_ON_ONCE(leakpointer && READ_ONCE(p->rtort_pipe_count) > 1);
/* If error or close call, record the sequence of reader protections. */
if ((pipe_count > 1 || completed > 1) && !xchg(&err_segs_recorded, 1)) {
@@ -1808,6 +1800,7 @@ struct rcu_fwd {
unsigned long rcu_launder_gp_seq_start;
};
+static DEFINE_MUTEX(rcu_fwd_mutex);
static struct rcu_fwd *rcu_fwds;
static bool rcu_fwd_emergency_stop;
@@ -2074,8 +2067,14 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp)
static int rcutorture_oom_notify(struct notifier_block *self,
unsigned long notused, void *nfreed)
{
- struct rcu_fwd *rfp = rcu_fwds;
+ struct rcu_fwd *rfp;
+ mutex_lock(&rcu_fwd_mutex);
+ rfp = rcu_fwds;
+ if (!rfp) {
+ mutex_unlock(&rcu_fwd_mutex);
+ return NOTIFY_OK;
+ }
WARN(1, "%s invoked upon OOM during forward-progress testing.\n",
__func__);
rcu_torture_fwd_cb_hist(rfp);
@@ -2093,6 +2092,7 @@ static int rcutorture_oom_notify(struct notifier_block *self,
smp_mb(); /* Frees before return to avoid redoing OOM. */
(*(unsigned long *)nfreed)++; /* Forward progress CBs freed! */
pr_info("%s returning after OOM processing.\n", __func__);
+ mutex_unlock(&rcu_fwd_mutex);
return NOTIFY_OK;
}
@@ -2114,13 +2114,11 @@ static int rcu_torture_fwd_prog(void *args)
do {
schedule_timeout_interruptible(fwd_progress_holdoff * HZ);
WRITE_ONCE(rcu_fwd_emergency_stop, false);
- register_oom_notifier(&rcutorture_oom_nb);
if (!IS_ENABLED(CONFIG_TINY_RCU) ||
rcu_inkernel_boot_has_ended())
rcu_torture_fwd_prog_nr(rfp, &tested, &tested_tries);
if (rcu_inkernel_boot_has_ended())
rcu_torture_fwd_prog_cr(rfp);
- unregister_oom_notifier(&rcutorture_oom_nb);
/* Avoid slow periods, better to test when busy. */
stutter_wait("rcu_torture_fwd_prog");
@@ -2160,9 +2158,26 @@ static int __init rcu_torture_fwd_prog_init(void)
return -ENOMEM;
spin_lock_init(&rfp->rcu_fwd_lock);
rfp->rcu_fwd_cb_tail = &rfp->rcu_fwd_cb_head;
+ mutex_lock(&rcu_fwd_mutex);
+ rcu_fwds = rfp;
+ mutex_unlock(&rcu_fwd_mutex);
+ register_oom_notifier(&rcutorture_oom_nb);
return torture_create_kthread(rcu_torture_fwd_prog, rfp, fwd_prog_task);
}
+static void rcu_torture_fwd_prog_cleanup(void)
+{
+ struct rcu_fwd *rfp;
+
+ torture_stop_kthread(rcu_torture_fwd_prog, fwd_prog_task);
+ rfp = rcu_fwds;
+ mutex_lock(&rcu_fwd_mutex);
+ rcu_fwds = NULL;
+ mutex_unlock(&rcu_fwd_mutex);
+ unregister_oom_notifier(&rcutorture_oom_nb);
+ kfree(rfp);
+}
+
/* Callback function for RCU barrier testing. */
static void rcu_torture_barrier_cbf(struct rcu_head *rcu)
{
@@ -2460,7 +2475,7 @@ rcu_torture_cleanup(void)
show_rcu_gp_kthreads();
rcu_torture_read_exit_cleanup();
rcu_torture_barrier_cleanup();
- torture_stop_kthread(rcu_torture_fwd_prog, fwd_prog_task);
+ rcu_torture_fwd_prog_cleanup();
torture_stop_kthread(rcu_torture_stall, stall_task);
torture_stop_kthread(rcu_torture_writer, writer_task);
@@ -2482,8 +2497,9 @@ rcu_torture_cleanup(void)
rcutorture_get_gp_data(cur_ops->ttype, &flags, &gp_seq);
srcutorture_get_gp_data(cur_ops->ttype, srcu_ctlp, &flags, &gp_seq);
- pr_alert("%s: End-test grace-period state: g%lu f%#x\n",
- cur_ops->name, gp_seq, flags);
+ pr_alert("%s: End-test grace-period state: g%ld f%#x total-gps=%ld\n",
+ cur_ops->name, (long)gp_seq, flags,
+ rcutorture_seq_diff(gp_seq, start_gp_seq));
torture_stop_kthread(rcu_torture_stats, stats_task);
torture_stop_kthread(rcu_torture_fqs, fqs_task);
if (rcu_torture_can_boost())
@@ -2607,6 +2623,8 @@ rcu_torture_init(void)
long i;
int cpu;
int firsterr = 0;
+ int flags = 0;
+ unsigned long gp_seq = 0;
static struct rcu_torture_ops *torture_ops[] = {
&rcu_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops,
&busted_srcud_ops, &tasks_ops, &tasks_rude_ops,
@@ -2649,6 +2667,11 @@ rcu_torture_init(void)
nrealreaders = 1;
}
rcu_torture_print_module_parms(cur_ops, "Start of test");
+ rcutorture_get_gp_data(cur_ops->ttype, &flags, &gp_seq);
+ srcutorture_get_gp_data(cur_ops->ttype, srcu_ctlp, &flags, &gp_seq);
+ start_gp_seq = gp_seq;
+ pr_alert("%s: Start-test grace-period state: g%ld f%#x\n",
+ cur_ops->name, (long)gp_seq, flags);
/* Set up the freelist. */
diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c
index d9291f883b54..952595c678b3 100644
--- a/kernel/rcu/refscale.c
+++ b/kernel/rcu/refscale.c
@@ -546,9 +546,11 @@ static int main_func(void *arg)
// Print the average of all experiments
SCALEOUT("END OF TEST. Calculating average duration per loop (nanoseconds)...\n");
- buf[0] = 0;
- strcat(buf, "\n");
- strcat(buf, "Runs\tTime(ns)\n");
+ if (!errexit) {
+ buf[0] = 0;
+ strcat(buf, "\n");
+ strcat(buf, "Runs\tTime(ns)\n");
+ }
for (exp = 0; exp < nruns; exp++) {
u64 avg;
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index c100acf332ed..c13348ee80a5 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -29,19 +29,6 @@
#include "rcu.h"
#include "rcu_segcblist.h"
-#ifndef data_race
-#define data_race(expr) \
- ({ \
- expr; \
- })
-#endif
-#ifndef ASSERT_EXCLUSIVE_WRITER
-#define ASSERT_EXCLUSIVE_WRITER(var) do { } while (0)
-#endif
-#ifndef ASSERT_EXCLUSIVE_ACCESS
-#define ASSERT_EXCLUSIVE_ACCESS(var) do { } while (0)
-#endif
-
/* Holdoff in nanoseconds for auto-expediting. */
#define DEFAULT_SRCU_EXP_HOLDOFF (25 * 1000)
static ulong exp_holdoff = DEFAULT_SRCU_EXP_HOLDOFF;
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index f78ee759af9c..06895ef85d69 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -70,19 +70,6 @@
#endif
#define MODULE_PARAM_PREFIX "rcutree."
-#ifndef data_race
-#define data_race(expr) \
- ({ \
- expr; \
- })
-#endif
-#ifndef ASSERT_EXCLUSIVE_WRITER
-#define ASSERT_EXCLUSIVE_WRITER(var) do { } while (0)
-#endif
-#ifndef ASSERT_EXCLUSIVE_ACCESS
-#define ASSERT_EXCLUSIVE_ACCESS(var) do { } while (0)
-#endif
-
/* Data structures. */
/*
@@ -178,6 +165,12 @@ module_param(gp_init_delay, int, 0444);
static int gp_cleanup_delay;
module_param(gp_cleanup_delay, int, 0444);
+// Add delay to rcu_read_unlock() for strict grace periods.
+static int rcu_unlock_delay;
+#ifdef CONFIG_RCU_STRICT_GRACE_PERIOD
+module_param(rcu_unlock_delay, int, 0444);
+#endif
+
/*
* This rcu parameter is runtime-read-only. It reflects
* a minimum allowed number of objects which can be cached
@@ -468,24 +461,25 @@ static int rcu_is_cpu_rrupt_from_idle(void)
return __this_cpu_read(rcu_data.dynticks_nesting) == 0;
}
-#define DEFAULT_RCU_BLIMIT 10 /* Maximum callbacks per rcu_do_batch ... */
-#define DEFAULT_MAX_RCU_BLIMIT 10000 /* ... even during callback flood. */
+#define DEFAULT_RCU_BLIMIT (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ? 1000 : 10)
+ // Maximum callbacks per rcu_do_batch ...
+#define DEFAULT_MAX_RCU_BLIMIT 10000 // ... even during callback flood.
static long blimit = DEFAULT_RCU_BLIMIT;
-#define DEFAULT_RCU_QHIMARK 10000 /* If this many pending, ignore blimit. */
+#define DEFAULT_RCU_QHIMARK 10000 // If this many pending, ignore blimit.
static long qhimark = DEFAULT_RCU_QHIMARK;
-#define DEFAULT_RCU_QLOMARK 100 /* Once only this many pending, use blimit. */
+#define DEFAULT_RCU_QLOMARK 100 // Once only this many pending, use blimit.
static long qlowmark = DEFAULT_RCU_QLOMARK;
#define DEFAULT_RCU_QOVLD_MULT 2
#define DEFAULT_RCU_QOVLD (DEFAULT_RCU_QOVLD_MULT * DEFAULT_RCU_QHIMARK)
-static long qovld = DEFAULT_RCU_QOVLD; /* If this many pending, hammer QS. */
-static long qovld_calc = -1; /* No pre-initialization lock acquisitions! */
+static long qovld = DEFAULT_RCU_QOVLD; // If this many pending, hammer QS.
+static long qovld_calc = -1; // No pre-initialization lock acquisitions!
module_param(blimit, long, 0444);
module_param(qhimark, long, 0444);
module_param(qlowmark, long, 0444);
module_param(qovld, long, 0444);
-static ulong jiffies_till_first_fqs = ULONG_MAX;
+static ulong jiffies_till_first_fqs = IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ? 0 : ULONG_MAX;
static ulong jiffies_till_next_fqs = ULONG_MAX;
static bool rcu_kick_kthreads;
static int rcu_divisor = 7;
@@ -1092,11 +1086,6 @@ static void rcu_disable_urgency_upon_qs(struct rcu_data *rdp)
}
}
-noinstr bool __rcu_is_watching(void)
-{
- return !rcu_dynticks_curr_cpu_in_eqs();
-}
-
/**
* rcu_is_watching - see if RCU thinks that the current CPU is not idle
*
@@ -1229,13 +1218,28 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
return 1;
}
- /* If waiting too long on an offline CPU, complain. */
- if (!(rdp->grpmask & rcu_rnp_online_cpus(rnp)) &&
- time_after(jiffies, rcu_state.gp_start + HZ)) {
+ /*
+ * Complain if a CPU that is considered to be offline from RCU's
+ * perspective has not yet reported a quiescent state. After all,
+ * the offline CPU should have reported a quiescent state during
+ * the CPU-offline process, or, failing that, by rcu_gp_init()
+ * if it ran concurrently with either the CPU going offline or the
+ * last task on a leaf rcu_node structure exiting its RCU read-side
+ * critical section while all CPUs corresponding to that structure
+ * are offline. This added warning detects bugs in any of these
+ * code paths.
+ *
+ * The rcu_node structure's ->lock is held here, which excludes
+ * the relevant portions the CPU-hotplug code, the grace-period
+ * initialization code, and the rcu_read_unlock() code paths.
+ *
+ * For more detail, please refer to the "Hotplug CPU" section
+ * of RCU's Requirements documentation.
+ */
+ if (WARN_ON_ONCE(!(rdp->grpmask & rcu_rnp_online_cpus(rnp)))) {
bool onl;
struct rcu_node *rnp1;
- WARN_ON(1); /* Offline CPUs are supposed to report QS! */
pr_info("%s: grp: %d-%d level: %d ->gp_seq %ld ->completedqs %ld\n",
__func__, rnp->grplo, rnp->grphi, rnp->level,
(long)rnp->gp_seq, (long)rnp->completedqs);
@@ -1498,9 +1502,10 @@ static bool rcu_accelerate_cbs(struct rcu_node *rnp, struct rcu_data *rdp)
/* Trace depending on how much we were able to accelerate. */
if (rcu_segcblist_restempty(&rdp->cblist, RCU_WAIT_TAIL))
- trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("AccWaitCB"));
+ trace_rcu_grace_period(rcu_state.name, gp_seq_req, TPS("AccWaitCB"));
else
- trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("AccReadyCB"));
+ trace_rcu_grace_period(rcu_state.name, gp_seq_req, TPS("AccReadyCB"));
+
return ret;
}
@@ -1576,6 +1581,19 @@ static void __maybe_unused rcu_advance_cbs_nowake(struct rcu_node *rnp,
}
/*
+ * In CONFIG_RCU_STRICT_GRACE_PERIOD=y kernels, attempt to generate a
+ * quiescent state. This is intended to be invoked when the CPU notices
+ * a new grace period.
+ */
+static void rcu_strict_gp_check_qs(void)
+{
+ if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) {
+ rcu_read_lock();
+ rcu_read_unlock();
+ }
+}
+
+/*
* Update CPU-local rcu_data state to record the beginnings and ends of
* grace periods. The caller must hold the ->lock of the leaf rcu_node
* structure corresponding to the current CPU, and must have irqs disabled.
@@ -1645,6 +1663,7 @@ static void note_gp_changes(struct rcu_data *rdp)
}
needwake = __note_gp_changes(rnp, rdp);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+ rcu_strict_gp_check_qs();
if (needwake)
rcu_gp_kthread_wake();
}
@@ -1683,6 +1702,15 @@ static void rcu_gp_torture_wait(void)
}
/*
+ * Handler for on_each_cpu() to invoke the target CPU's RCU core
+ * processing.
+ */
+static void rcu_strict_gp_boundary(void *unused)
+{
+ invoke_rcu_core();
+}
+
+/*
* Initialize a new grace period. Return false if no grace period required.
*/
static bool rcu_gp_init(void)
@@ -1720,10 +1748,13 @@ static bool rcu_gp_init(void)
raw_spin_unlock_irq_rcu_node(rnp);
/*
- * Apply per-leaf buffered online and offline operations to the
- * rcu_node tree. Note that this new grace period need not wait
- * for subsequent online CPUs, and that quiescent-state forcing
- * will handle subsequent offline CPUs.
+ * Apply per-leaf buffered online and offline operations to
+ * the rcu_node tree. Note that this new grace period need not
+ * wait for subsequent online CPUs, and that RCU hooks in the CPU
+ * offlining path, when combined with checks in this function,
+ * will handle CPUs that are currently going offline or that will
+ * go offline later. Please also refer to "Hotplug CPU" section
+ * of RCU's Requirements documentation.
*/
rcu_state.gp_state = RCU_GP_ONOFF;
rcu_for_each_leaf_node(rnp) {
@@ -1810,6 +1841,10 @@ static bool rcu_gp_init(void)
WRITE_ONCE(rcu_state.gp_activity, jiffies);
}
+ // If strict, make all CPUs aware of new grace period.
+ if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
+ on_each_cpu(rcu_strict_gp_boundary, NULL, 0);
+
return true;
}
@@ -1898,7 +1933,7 @@ static void rcu_gp_fqs_loop(void)
break;
/* If time for quiescent-state forcing, do it. */
if (!time_after(rcu_state.jiffies_force_qs, jiffies) ||
- (gf & RCU_GP_FLAG_FQS)) {
+ (gf & (RCU_GP_FLAG_FQS | RCU_GP_FLAG_OVLD))) {
trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq,
TPS("fqsstart"));
rcu_gp_fqs(first_gp_fqs);
@@ -2026,6 +2061,10 @@ static void rcu_gp_cleanup(void)
rcu_state.gp_flags & RCU_GP_FLAG_INIT);
}
raw_spin_unlock_irq_rcu_node(rnp);
+
+ // If strict, make all CPUs aware of the end of the old grace period.
+ if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
+ on_each_cpu(rcu_strict_gp_boundary, NULL, 0);
}
/*
@@ -2204,7 +2243,7 @@ rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
* structure. This must be called from the specified CPU.
*/
static void
-rcu_report_qs_rdp(int cpu, struct rcu_data *rdp)
+rcu_report_qs_rdp(struct rcu_data *rdp)
{
unsigned long flags;
unsigned long mask;
@@ -2213,6 +2252,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp)
rcu_segcblist_is_offloaded(&rdp->cblist);
struct rcu_node *rnp;
+ WARN_ON_ONCE(rdp->cpu != smp_processor_id());
rnp = rdp->mynode;
raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (rdp->cpu_no_qs.b.norm || rdp->gp_seq != rnp->gp_seq ||
@@ -2229,8 +2269,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp)
return;
}
mask = rdp->grpmask;
- if (rdp->cpu == smp_processor_id())
- rdp->core_needs_qs = false;
+ rdp->core_needs_qs = false;
if ((rnp->qsmask & mask) == 0) {
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
} else {
@@ -2279,7 +2318,7 @@ rcu_check_quiescent_state(struct rcu_data *rdp)
* Tell RCU we are done (but rcu_report_qs_rdp() will be the
* judge of that).
*/
- rcu_report_qs_rdp(rdp->cpu, rdp);
+ rcu_report_qs_rdp(rdp);
}
/*
@@ -2376,6 +2415,7 @@ int rcutree_dead_cpu(unsigned int cpu)
*/
static void rcu_do_batch(struct rcu_data *rdp)
{
+ int div;
unsigned long flags;
const bool offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
rcu_segcblist_is_offloaded(&rdp->cblist);
@@ -2404,9 +2444,15 @@ static void rcu_do_batch(struct rcu_data *rdp)
rcu_nocb_lock(rdp);
WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
pending = rcu_segcblist_n_cbs(&rdp->cblist);
- bl = max(rdp->blimit, pending >> rcu_divisor);
- if (unlikely(bl > 100))
- tlimit = local_clock() + rcu_resched_ns;
+ div = READ_ONCE(rcu_divisor);
+ div = div < 0 ? 7 : div > sizeof(long) * 8 - 2 ? sizeof(long) * 8 - 2 : div;
+ bl = max(rdp->blimit, pending >> div);
+ if (unlikely(bl > 100)) {
+ long rrn = READ_ONCE(rcu_resched_ns);
+
+ rrn = rrn < NSEC_PER_MSEC ? NSEC_PER_MSEC : rrn > NSEC_PER_SEC ? NSEC_PER_SEC : rrn;
+ tlimit = local_clock() + rrn;
+ }
trace_rcu_batch_start(rcu_state.name,
rcu_segcblist_n_cbs(&rdp->cblist), bl);
rcu_segcblist_extract_done_cbs(&rdp->cblist, &rcl);
@@ -2547,8 +2593,7 @@ static void force_qs_rnp(int (*f)(struct rcu_data *rdp))
raw_spin_lock_irqsave_rcu_node(rnp, flags);
rcu_state.cbovldnext |= !!rnp->cbovldmask;
if (rnp->qsmask == 0) {
- if (!IS_ENABLED(CONFIG_PREEMPT_RCU) ||
- rcu_preempt_blocked_readers_cgp(rnp)) {
+ if (rcu_preempt_blocked_readers_cgp(rnp)) {
/*
* No point in scanning bits because they
* are all zero. But we might need to
@@ -2616,6 +2661,14 @@ void rcu_force_quiescent_state(void)
}
EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
+// Workqueue handler for an RCU reader for kernels enforcing struct RCU
+// grace periods.
+static void strict_work_handler(struct work_struct *work)
+{
+ rcu_read_lock();
+ rcu_read_unlock();
+}
+
/* Perform RCU core processing work for the current CPU. */
static __latent_entropy void rcu_core(void)
{
@@ -2660,6 +2713,10 @@ static __latent_entropy void rcu_core(void)
/* Do any needed deferred wakeups of rcuo kthreads. */
do_nocb_deferred_wakeup(rdp);
trace_rcu_utilization(TPS("End RCU core"));
+
+ // If strict GPs, schedule an RCU reader in a clean environment.
+ if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
+ queue_work_on(rdp->cpu, rcu_gp_wq, &rdp->strict_work);
}
static void rcu_core_si(struct softirq_action *h)
@@ -3022,6 +3079,12 @@ struct kfree_rcu_cpu_work {
* @monitor_todo: Tracks whether a @monitor_work delayed work is pending
* @initialized: The @rcu_work fields have been initialized
* @count: Number of objects for which GP not started
+ * @bkvcache:
+ * A simple cache list that contains objects for reuse purpose.
+ * In order to save some per-cpu space the list is singular.
+ * Even though it is lockless an access has to be protected by the
+ * per-cpu lock.
+ * @nr_bkv_objs: number of allocated objects at @bkvcache.
*
* This is a per-CPU structure. The reason that it is not included in
* the rcu_data structure is to permit this code to be extracted from
@@ -3037,14 +3100,6 @@ struct kfree_rcu_cpu {
bool monitor_todo;
bool initialized;
int count;
-
- /*
- * A simple cache list that contains objects for
- * reuse purpose. In order to save some per-cpu
- * space the list is singular. Even though it is
- * lockless an access has to be protected by the
- * per-cpu lock.
- */
struct llist_head bkvcache;
int nr_bkv_objs;
};
@@ -3445,7 +3500,7 @@ kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
unsigned long count = 0;
/* Snapshot count of all CPUs */
- for_each_online_cpu(cpu) {
+ for_each_possible_cpu(cpu) {
struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
count += READ_ONCE(krcp->count);
@@ -3460,7 +3515,7 @@ kfree_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
int cpu, freed = 0;
unsigned long flags;
- for_each_online_cpu(cpu) {
+ for_each_possible_cpu(cpu) {
int count;
struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
@@ -3493,7 +3548,7 @@ void __init kfree_rcu_scheduler_running(void)
int cpu;
unsigned long flags;
- for_each_online_cpu(cpu) {
+ for_each_possible_cpu(cpu) {
struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
raw_spin_lock_irqsave(&krcp->lock, flags);
@@ -3857,6 +3912,7 @@ rcu_boot_init_percpu_data(int cpu)
/* Set up local state, ensuring consistent view of global state. */
rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu);
+ INIT_WORK(&rdp->strict_work, strict_work_handler);
WARN_ON_ONCE(rdp->dynticks_nesting != 1);
WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp)));
rdp->rcu_ofl_gp_seq = rcu_state.gp_seq;
@@ -3975,8 +4031,6 @@ int rcutree_offline_cpu(unsigned int cpu)
return 0;
}
-static DEFINE_PER_CPU(int, rcu_cpu_started);
-
/*
* Mark the specified CPU as being online so that subsequent grace periods
* (both expedited and normal) will wait on it. Note that this means that
@@ -3996,12 +4050,11 @@ void rcu_cpu_starting(unsigned int cpu)
struct rcu_node *rnp;
bool newcpu;
- if (per_cpu(rcu_cpu_started, cpu))
+ rdp = per_cpu_ptr(&rcu_data, cpu);
+ if (rdp->cpu_started)
return;
+ rdp->cpu_started = true;
- per_cpu(rcu_cpu_started, cpu) = 1;
-
- rdp = per_cpu_ptr(&rcu_data, cpu);
rnp = rdp->mynode;
mask = rdp->grpmask;
raw_spin_lock_irqsave_rcu_node(rnp, flags);
@@ -4061,7 +4114,7 @@ void rcu_report_dead(unsigned int cpu)
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
raw_spin_unlock(&rcu_state.ofl_lock);
- per_cpu(rcu_cpu_started, cpu) = 0;
+ rdp->cpu_started = false;
}
/*
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index c96ae351688b..e4f66b8f7c47 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -156,6 +156,7 @@ struct rcu_data {
bool beenonline; /* CPU online at least once. */
bool gpwrap; /* Possible ->gp_seq wrap. */
bool exp_deferred_qs; /* This CPU awaiting a deferred QS? */
+ bool cpu_started; /* RCU watching this onlining CPU. */
struct rcu_node *mynode; /* This CPU's leaf of hierarchy */
unsigned long grpmask; /* Mask to apply to leaf qsmask. */
unsigned long ticks_this_gp; /* The number of scheduling-clock */
@@ -164,6 +165,7 @@ struct rcu_data {
/* period it is aware of. */
struct irq_work defer_qs_iw; /* Obtain later scheduler attention. */
bool defer_qs_iw_pending; /* Scheduler attention pending? */
+ struct work_struct strict_work; /* Schedule readers for strict GPs. */
/* 2) batch handling */
struct rcu_segcblist cblist; /* Segmented callback list, with */
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 1888c0eb1216..8760b6ead770 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -732,11 +732,9 @@ static void rcu_exp_need_qs(void)
/* Invoked on each online non-idle CPU for expedited quiescent state. */
static void rcu_exp_handler(void *unused)
{
- struct rcu_data *rdp;
- struct rcu_node *rnp;
+ struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
+ struct rcu_node *rnp = rdp->mynode;
- rdp = this_cpu_ptr(&rcu_data);
- rnp = rdp->mynode;
if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
__this_cpu_read(rcu_data.cpu_no_qs.b.exp))
return;
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 982fc5be5269..fd8a52e9a887 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -36,6 +36,8 @@ static void __init rcu_bootup_announce_oddness(void)
pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n");
if (IS_ENABLED(CONFIG_PROVE_RCU))
pr_info("\tRCU lockdep checking is enabled.\n");
+ if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
+ pr_info("\tRCU strict (and thus non-scalable) grace periods enabled.\n");
if (RCU_NUM_LVLS >= 4)
pr_info("\tFour(or more)-level hierarchy is enabled.\n");
if (RCU_FANOUT_LEAF != 16)
@@ -374,6 +376,8 @@ void __rcu_read_lock(void)
rcu_preempt_read_enter();
if (IS_ENABLED(CONFIG_PROVE_LOCKING))
WARN_ON_ONCE(rcu_preempt_depth() > RCU_NEST_PMAX);
+ if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) && rcu_state.gp_kthread)
+ WRITE_ONCE(current->rcu_read_unlock_special.b.need_qs, true);
barrier(); /* critical section after entry code. */
}
EXPORT_SYMBOL_GPL(__rcu_read_lock);
@@ -455,8 +459,14 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
return;
}
t->rcu_read_unlock_special.s = 0;
- if (special.b.need_qs)
- rcu_qs();
+ if (special.b.need_qs) {
+ if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) {
+ rcu_report_qs_rdp(rdp);
+ udelay(rcu_unlock_delay);
+ } else {
+ rcu_qs();
+ }
+ }
/*
* Respond to a request by an expedited grace period for a
@@ -769,6 +779,24 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
#else /* #ifdef CONFIG_PREEMPT_RCU */
/*
+ * If strict grace periods are enabled, and if the calling
+ * __rcu_read_unlock() marks the beginning of a quiescent state, immediately
+ * report that quiescent state and, if requested, spin for a bit.
+ */
+void rcu_read_unlock_strict(void)
+{
+ struct rcu_data *rdp;
+
+ if (!IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ||
+ irqs_disabled() || preempt_count() || !rcu_state.gp_kthread)
+ return;
+ rdp = this_cpu_ptr(&rcu_data);
+ rcu_report_qs_rdp(rdp);
+ udelay(rcu_unlock_delay);
+}
+EXPORT_SYMBOL_GPL(rcu_read_unlock_strict);
+
+/*
* Tell them what RCU they are running.
*/
static void __init rcu_bootup_announce(void)
@@ -1926,6 +1954,7 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
* nearest grace period (if any) to wait for next. The CB kthreads
* and the global grace-period kthread are awakened if needed.
*/
+ WARN_ON_ONCE(my_rdp->nocb_gp_rdp != my_rdp);
for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_cb_rdp) {
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("Check"));
rcu_nocb_lock_irqsave(rdp, flags);
@@ -2411,13 +2440,12 @@ static void show_rcu_nocb_state(struct rcu_data *rdp)
return;
waslocked = raw_spin_is_locked(&rdp->nocb_gp_lock);
- wastimer = timer_pending(&rdp->nocb_timer);
+ wastimer = timer_pending(&rdp->nocb_bypass_timer);
wassleep = swait_active(&rdp->nocb_gp_wq);
- if (!rdp->nocb_defer_wakeup && !rdp->nocb_gp_sleep &&
- !waslocked && !wastimer && !wassleep)
+ if (!rdp->nocb_gp_sleep && !waslocked && !wastimer && !wassleep)
return; /* Nothing untowards. */
- pr_info(" !!! %c%c%c%c %c\n",
+ pr_info(" nocb GP activity on CB-only CPU!!! %c%c%c%c %c\n",
"lL"[waslocked],
"dD"[!!rdp->nocb_defer_wakeup],
"tT"[wastimer],
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index b5d3b4794db4..0fde39b8daab 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -158,7 +158,7 @@ static void rcu_stall_kick_kthreads(void)
{
unsigned long j;
- if (!rcu_kick_kthreads)
+ if (!READ_ONCE(rcu_kick_kthreads))
return;
j = READ_ONCE(rcu_state.jiffies_kick_kthreads);
if (time_after(jiffies, j) && rcu_state.gp_kthread &&
@@ -580,7 +580,7 @@ static void check_cpu_stall(struct rcu_data *rdp)
unsigned long js;
struct rcu_node *rnp;
- if ((rcu_stall_is_suppressed() && !rcu_kick_kthreads) ||
+ if ((rcu_stall_is_suppressed() && !READ_ONCE(rcu_kick_kthreads)) ||
!rcu_gp_in_progress())
return;
rcu_stall_kick_kthreads();
@@ -623,7 +623,7 @@ static void check_cpu_stall(struct rcu_data *rdp)
/* We haven't checked in, so go dump stack. */
print_cpu_stall(gps);
- if (rcu_cpu_stall_ftrace_dump)
+ if (READ_ONCE(rcu_cpu_stall_ftrace_dump))
rcu_ftrace_dump(DUMP_ALL);
} else if (rcu_gp_in_progress() &&
@@ -632,7 +632,7 @@ static void check_cpu_stall(struct rcu_data *rdp)
/* They had a few time units to dump stack, so complain. */
print_other_cpu_stall(gs2, gps);
- if (rcu_cpu_stall_ftrace_dump)
+ if (READ_ONCE(rcu_cpu_stall_ftrace_dump))
rcu_ftrace_dump(DUMP_ALL);
}
}
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 3e0f4bcb558f..39334d2d2b37 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -53,19 +53,6 @@
#endif
#define MODULE_PARAM_PREFIX "rcupdate."
-#ifndef data_race
-#define data_race(expr) \
- ({ \
- expr; \
- })
-#endif
-#ifndef ASSERT_EXCLUSIVE_WRITER
-#define ASSERT_EXCLUSIVE_WRITER(var) do { } while (0)
-#endif
-#ifndef ASSERT_EXCLUSIVE_ACCESS
-#define ASSERT_EXCLUSIVE_ACCESS(var) do { } while (0)
-#endif
-
#ifndef CONFIG_TINY_RCU
module_param(rcu_expedited, int, 0);
module_param(rcu_normal, int, 0);
diff --git a/kernel/relay.c b/kernel/relay.c
index fb4e0c530c08..b08d936d5fa7 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -1002,7 +1002,7 @@ static int relay_file_read_avail(struct rchan_buf *buf)
size_t subbuf_size = buf->chan->subbuf_size;
size_t n_subbufs = buf->chan->n_subbufs;
size_t produced = buf->subbufs_produced;
- size_t consumed = buf->subbufs_consumed;
+ size_t consumed;
relay_file_read_consume(buf, 0, 0);
diff --git a/kernel/resource.c b/kernel/resource.c
index db6582b6cf42..6d227a570f38 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -1248,7 +1248,6 @@ EXPORT_SYMBOL(__release_region);
#ifdef CONFIG_MEMORY_HOTREMOVE
/**
* release_mem_region_adjustable - release a previously reserved memory region
- * @parent: parent resource descriptor
* @start: resource start address
* @size: resource region size
*
@@ -1266,21 +1265,28 @@ EXPORT_SYMBOL(__release_region);
* assumes that all children remain in the lower address entry for
* simplicity. Enhance this logic when necessary.
*/
-int release_mem_region_adjustable(struct resource *parent,
- resource_size_t start, resource_size_t size)
+void release_mem_region_adjustable(resource_size_t start, resource_size_t size)
{
+ struct resource *parent = &iomem_resource;
+ struct resource *new_res = NULL;
+ bool alloc_nofail = false;
struct resource **p;
struct resource *res;
- struct resource *new_res;
resource_size_t end;
- int ret = -EINVAL;
end = start + size - 1;
- if ((start < parent->start) || (end > parent->end))
- return ret;
+ if (WARN_ON_ONCE((start < parent->start) || (end > parent->end)))
+ return;
- /* The alloc_resource() result gets checked later */
- new_res = alloc_resource(GFP_KERNEL);
+ /*
+ * We free up quite a lot of memory on memory hotunplug (esp., memap),
+ * just before releasing the region. This is highly unlikely to
+ * fail - let's play save and make it never fail as the caller cannot
+ * perform any error handling (e.g., trying to re-add memory will fail
+ * similarly).
+ */
+retry:
+ new_res = alloc_resource(GFP_KERNEL | (alloc_nofail ? __GFP_NOFAIL : 0));
p = &parent->child;
write_lock(&resource_lock);
@@ -1306,7 +1312,6 @@ int release_mem_region_adjustable(struct resource *parent,
* so if we are dealing with them, let us just back off here.
*/
if (!(res->flags & IORESOURCE_SYSRAM)) {
- ret = 0;
break;
}
@@ -1323,20 +1328,23 @@ int release_mem_region_adjustable(struct resource *parent,
/* free the whole entry */
*p = res->sibling;
free_resource(res);
- ret = 0;
} else if (res->start == start && res->end != end) {
/* adjust the start */
- ret = __adjust_resource(res, end + 1,
- res->end - end);
+ WARN_ON_ONCE(__adjust_resource(res, end + 1,
+ res->end - end));
} else if (res->start != start && res->end == end) {
/* adjust the end */
- ret = __adjust_resource(res, res->start,
- start - res->start);
+ WARN_ON_ONCE(__adjust_resource(res, res->start,
+ start - res->start));
} else {
- /* split into two entries */
+ /* split into two entries - we need a new resource */
if (!new_res) {
- ret = -ENOMEM;
- break;
+ new_res = alloc_resource(GFP_ATOMIC);
+ if (!new_res) {
+ alloc_nofail = true;
+ write_unlock(&resource_lock);
+ goto retry;
+ }
}
new_res->name = res->name;
new_res->start = end + 1;
@@ -1347,9 +1355,8 @@ int release_mem_region_adjustable(struct resource *parent,
new_res->sibling = res->sibling;
new_res->child = NULL;
- ret = __adjust_resource(res, res->start,
- start - res->start);
- if (ret)
+ if (WARN_ON_ONCE(__adjust_resource(res, res->start,
+ start - res->start)))
break;
res->sibling = new_res;
new_res = NULL;
@@ -1360,10 +1367,69 @@ int release_mem_region_adjustable(struct resource *parent,
write_unlock(&resource_lock);
free_resource(new_res);
- return ret;
}
#endif /* CONFIG_MEMORY_HOTREMOVE */
+#ifdef CONFIG_MEMORY_HOTPLUG
+static bool system_ram_resources_mergeable(struct resource *r1,
+ struct resource *r2)
+{
+ /* We assume either r1 or r2 is IORESOURCE_SYSRAM_MERGEABLE. */
+ return r1->flags == r2->flags && r1->end + 1 == r2->start &&
+ r1->name == r2->name && r1->desc == r2->desc &&
+ !r1->child && !r2->child;
+}
+
+/*
+ * merge_system_ram_resource - mark the System RAM resource mergeable and try to
+ * merge it with adjacent, mergeable resources
+ * @res: resource descriptor
+ *
+ * This interface is intended for memory hotplug, whereby lots of contiguous
+ * system ram resources are added (e.g., via add_memory*()) by a driver, and
+ * the actual resource boundaries are not of interest (e.g., it might be
+ * relevant for DIMMs). Only resources that are marked mergeable, that have the
+ * same parent, and that don't have any children are considered. All mergeable
+ * resources must be immutable during the request.
+ *
+ * Note:
+ * - The caller has to make sure that no pointers to resources that are
+ * marked mergeable are used anymore after this call - the resource might
+ * be freed and the pointer might be stale!
+ * - release_mem_region_adjustable() will split on demand on memory hotunplug
+ */
+void merge_system_ram_resource(struct resource *res)
+{
+ const unsigned long flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+ struct resource *cur;
+
+ if (WARN_ON_ONCE((res->flags & flags) != flags))
+ return;
+
+ write_lock(&resource_lock);
+ res->flags |= IORESOURCE_SYSRAM_MERGEABLE;
+
+ /* Try to merge with next item in the list. */
+ cur = res->sibling;
+ if (cur && system_ram_resources_mergeable(res, cur)) {
+ res->end = cur->end;
+ res->sibling = cur->sibling;
+ free_resource(cur);
+ }
+
+ /* Try to merge with previous item in the list. */
+ cur = res->parent->child;
+ while (cur && cur->sibling != res)
+ cur = cur->sibling;
+ if (cur && system_ram_resources_mergeable(cur, res)) {
+ cur->end = res->end;
+ cur->sibling = res->sibling;
+ free_resource(res);
+ }
+ write_unlock(&resource_lock);
+}
+#endif /* CONFIG_MEMORY_HOTPLUG */
+
/*
* Managed region resource
*/
diff --git a/kernel/scftorture.c b/kernel/scftorture.c
new file mode 100644
index 000000000000..554a521ee235
--- /dev/null
+++ b/kernel/scftorture.c
@@ -0,0 +1,575 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Torture test for smp_call_function() and friends.
+//
+// Copyright (C) Facebook, 2020.
+//
+// Author: Paul E. McKenney <paulmck@kernel.org>
+
+#define pr_fmt(fmt) fmt
+
+#include <linux/atomic.h>
+#include <linux/bitops.h>
+#include <linux/completion.h>
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kthread.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/notifier.h>
+#include <linux/percpu.h>
+#include <linux/rcupdate.h>
+#include <linux/rcupdate_trace.h>
+#include <linux/reboot.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/smp.h>
+#include <linux/stat.h>
+#include <linux/srcu.h>
+#include <linux/slab.h>
+#include <linux/torture.h>
+#include <linux/types.h>
+
+#define SCFTORT_STRING "scftorture"
+#define SCFTORT_FLAG SCFTORT_STRING ": "
+
+#define SCFTORTOUT(s, x...) \
+ pr_alert(SCFTORT_FLAG s, ## x)
+
+#define VERBOSE_SCFTORTOUT(s, x...) \
+ do { if (verbose) pr_alert(SCFTORT_FLAG s, ## x); } while (0)
+
+#define VERBOSE_SCFTORTOUT_ERRSTRING(s, x...) \
+ do { if (verbose) pr_alert(SCFTORT_FLAG "!!! " s, ## x); } while (0)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Paul E. McKenney <paulmck@kernel.org>");
+
+// Wait until there are multiple CPUs before starting test.
+torture_param(int, holdoff, IS_BUILTIN(CONFIG_SCF_TORTURE_TEST) ? 10 : 0,
+ "Holdoff time before test start (s)");
+torture_param(int, longwait, 0, "Include ridiculously long waits? (seconds)");
+torture_param(int, nthreads, -1, "# threads, defaults to -1 for all CPUs.");
+torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)");
+torture_param(int, onoff_interval, 0, "Time between CPU hotplugs (s), 0=disable");
+torture_param(int, shutdown_secs, 0, "Shutdown time (ms), <= zero to disable.");
+torture_param(int, stat_interval, 60, "Number of seconds between stats printk()s.");
+torture_param(int, stutter_cpus, 5, "Number of jiffies to change CPUs under test, 0=disable");
+torture_param(bool, use_cpus_read_lock, 0, "Use cpus_read_lock() to exclude CPU hotplug.");
+torture_param(int, verbose, 0, "Enable verbose debugging printk()s");
+torture_param(int, weight_single, -1, "Testing weight for single-CPU no-wait operations.");
+torture_param(int, weight_single_wait, -1, "Testing weight for single-CPU operations.");
+torture_param(int, weight_many, -1, "Testing weight for multi-CPU no-wait operations.");
+torture_param(int, weight_many_wait, -1, "Testing weight for multi-CPU operations.");
+torture_param(int, weight_all, -1, "Testing weight for all-CPU no-wait operations.");
+torture_param(int, weight_all_wait, -1, "Testing weight for all-CPU operations.");
+
+char *torture_type = "";
+
+#ifdef MODULE
+# define SCFTORT_SHUTDOWN 0
+#else
+# define SCFTORT_SHUTDOWN 1
+#endif
+
+torture_param(bool, shutdown, SCFTORT_SHUTDOWN, "Shutdown at end of torture test.");
+
+struct scf_statistics {
+ struct task_struct *task;
+ int cpu;
+ long long n_single;
+ long long n_single_ofl;
+ long long n_single_wait;
+ long long n_single_wait_ofl;
+ long long n_many;
+ long long n_many_wait;
+ long long n_all;
+ long long n_all_wait;
+};
+
+static struct scf_statistics *scf_stats_p;
+static struct task_struct *scf_torture_stats_task;
+static DEFINE_PER_CPU(long long, scf_invoked_count);
+
+// Data for random primitive selection
+#define SCF_PRIM_SINGLE 0
+#define SCF_PRIM_MANY 1
+#define SCF_PRIM_ALL 2
+#define SCF_NPRIMS (2 * 3) // Need wait and no-wait versions of each.
+
+static char *scf_prim_name[] = {
+ "smp_call_function_single",
+ "smp_call_function_many",
+ "smp_call_function",
+};
+
+struct scf_selector {
+ unsigned long scfs_weight;
+ int scfs_prim;
+ bool scfs_wait;
+};
+static struct scf_selector scf_sel_array[SCF_NPRIMS];
+static int scf_sel_array_len;
+static unsigned long scf_sel_totweight;
+
+// Communicate between caller and handler.
+struct scf_check {
+ bool scfc_in;
+ bool scfc_out;
+ int scfc_cpu; // -1 for not _single().
+ bool scfc_wait;
+};
+
+// Use to wait for all threads to start.
+static atomic_t n_started;
+static atomic_t n_errs;
+static atomic_t n_mb_in_errs;
+static atomic_t n_mb_out_errs;
+static atomic_t n_alloc_errs;
+static bool scfdone;
+static char *bangstr = "";
+
+static DEFINE_TORTURE_RANDOM_PERCPU(scf_torture_rand);
+
+// Print torture statistics. Caller must ensure serialization.
+static void scf_torture_stats_print(void)
+{
+ int cpu;
+ int i;
+ long long invoked_count = 0;
+ bool isdone = READ_ONCE(scfdone);
+ struct scf_statistics scfs = {};
+
+ for_each_possible_cpu(cpu)
+ invoked_count += data_race(per_cpu(scf_invoked_count, cpu));
+ for (i = 0; i < nthreads; i++) {
+ scfs.n_single += scf_stats_p[i].n_single;
+ scfs.n_single_ofl += scf_stats_p[i].n_single_ofl;
+ scfs.n_single_wait += scf_stats_p[i].n_single_wait;
+ scfs.n_single_wait_ofl += scf_stats_p[i].n_single_wait_ofl;
+ scfs.n_many += scf_stats_p[i].n_many;
+ scfs.n_many_wait += scf_stats_p[i].n_many_wait;
+ scfs.n_all += scf_stats_p[i].n_all;
+ scfs.n_all_wait += scf_stats_p[i].n_all_wait;
+ }
+ if (atomic_read(&n_errs) || atomic_read(&n_mb_in_errs) ||
+ atomic_read(&n_mb_out_errs) || atomic_read(&n_alloc_errs))
+ bangstr = "!!! ";
+ pr_alert("%s %sscf_invoked_count %s: %lld single: %lld/%lld single_ofl: %lld/%lld many: %lld/%lld all: %lld/%lld ",
+ SCFTORT_FLAG, bangstr, isdone ? "VER" : "ver", invoked_count,
+ scfs.n_single, scfs.n_single_wait, scfs.n_single_ofl, scfs.n_single_wait_ofl,
+ scfs.n_many, scfs.n_many_wait, scfs.n_all, scfs.n_all_wait);
+ torture_onoff_stats();
+ pr_cont("ste: %d stnmie: %d stnmoe: %d staf: %d\n", atomic_read(&n_errs),
+ atomic_read(&n_mb_in_errs), atomic_read(&n_mb_out_errs),
+ atomic_read(&n_alloc_errs));
+}
+
+// Periodically prints torture statistics, if periodic statistics printing
+// was specified via the stat_interval module parameter.
+static int
+scf_torture_stats(void *arg)
+{
+ VERBOSE_TOROUT_STRING("scf_torture_stats task started");
+ do {
+ schedule_timeout_interruptible(stat_interval * HZ);
+ scf_torture_stats_print();
+ torture_shutdown_absorb("scf_torture_stats");
+ } while (!torture_must_stop());
+ torture_kthread_stopping("scf_torture_stats");
+ return 0;
+}
+
+// Add a primitive to the scf_sel_array[].
+static void scf_sel_add(unsigned long weight, int prim, bool wait)
+{
+ struct scf_selector *scfsp = &scf_sel_array[scf_sel_array_len];
+
+ // If no weight, if array would overflow, if computing three-place
+ // percentages would overflow, or if the scf_prim_name[] array would
+ // overflow, don't bother. In the last three two cases, complain.
+ if (!weight ||
+ WARN_ON_ONCE(scf_sel_array_len >= ARRAY_SIZE(scf_sel_array)) ||
+ WARN_ON_ONCE(0 - 100000 * weight <= 100000 * scf_sel_totweight) ||
+ WARN_ON_ONCE(prim >= ARRAY_SIZE(scf_prim_name)))
+ return;
+ scf_sel_totweight += weight;
+ scfsp->scfs_weight = scf_sel_totweight;
+ scfsp->scfs_prim = prim;
+ scfsp->scfs_wait = wait;
+ scf_sel_array_len++;
+}
+
+// Dump out weighting percentages for scf_prim_name[] array.
+static void scf_sel_dump(void)
+{
+ int i;
+ unsigned long oldw = 0;
+ struct scf_selector *scfsp;
+ unsigned long w;
+
+ for (i = 0; i < scf_sel_array_len; i++) {
+ scfsp = &scf_sel_array[i];
+ w = (scfsp->scfs_weight - oldw) * 100000 / scf_sel_totweight;
+ pr_info("%s: %3lu.%03lu %s(%s)\n", __func__, w / 1000, w % 1000,
+ scf_prim_name[scfsp->scfs_prim],
+ scfsp->scfs_wait ? "wait" : "nowait");
+ oldw = scfsp->scfs_weight;
+ }
+}
+
+// Randomly pick a primitive and wait/nowait, based on weightings.
+static struct scf_selector *scf_sel_rand(struct torture_random_state *trsp)
+{
+ int i;
+ unsigned long w = torture_random(trsp) % (scf_sel_totweight + 1);
+
+ for (i = 0; i < scf_sel_array_len; i++)
+ if (scf_sel_array[i].scfs_weight >= w)
+ return &scf_sel_array[i];
+ WARN_ON_ONCE(1);
+ return &scf_sel_array[0];
+}
+
+// Update statistics and occasionally burn up mass quantities of CPU time,
+// if told to do so via scftorture.longwait. Otherwise, occasionally burn
+// a little bit.
+static void scf_handler(void *scfc_in)
+{
+ int i;
+ int j;
+ unsigned long r = torture_random(this_cpu_ptr(&scf_torture_rand));
+ struct scf_check *scfcp = scfc_in;
+
+ if (likely(scfcp)) {
+ WRITE_ONCE(scfcp->scfc_out, false); // For multiple receivers.
+ if (WARN_ON_ONCE(unlikely(!READ_ONCE(scfcp->scfc_in))))
+ atomic_inc(&n_mb_in_errs);
+ }
+ this_cpu_inc(scf_invoked_count);
+ if (longwait <= 0) {
+ if (!(r & 0xffc0))
+ udelay(r & 0x3f);
+ goto out;
+ }
+ if (r & 0xfff)
+ goto out;
+ r = (r >> 12);
+ if (longwait <= 0) {
+ udelay((r & 0xff) + 1);
+ goto out;
+ }
+ r = r % longwait + 1;
+ for (i = 0; i < r; i++) {
+ for (j = 0; j < 1000; j++) {
+ udelay(1000);
+ cpu_relax();
+ }
+ }
+out:
+ if (unlikely(!scfcp))
+ return;
+ if (scfcp->scfc_wait)
+ WRITE_ONCE(scfcp->scfc_out, true);
+ else
+ kfree(scfcp);
+}
+
+// As above, but check for correct CPU.
+static void scf_handler_1(void *scfc_in)
+{
+ struct scf_check *scfcp = scfc_in;
+
+ if (likely(scfcp) && WARN_ONCE(smp_processor_id() != scfcp->scfc_cpu, "%s: Wanted CPU %d got CPU %d\n", __func__, scfcp->scfc_cpu, smp_processor_id())) {
+ atomic_inc(&n_errs);
+ }
+ scf_handler(scfcp);
+}
+
+// Randomly do an smp_call_function*() invocation.
+static void scftorture_invoke_one(struct scf_statistics *scfp, struct torture_random_state *trsp)
+{
+ uintptr_t cpu;
+ int ret = 0;
+ struct scf_check *scfcp = NULL;
+ struct scf_selector *scfsp = scf_sel_rand(trsp);
+
+ if (use_cpus_read_lock)
+ cpus_read_lock();
+ else
+ preempt_disable();
+ if (scfsp->scfs_prim == SCF_PRIM_SINGLE || scfsp->scfs_wait) {
+ scfcp = kmalloc(sizeof(*scfcp), GFP_ATOMIC);
+ if (WARN_ON_ONCE(!scfcp)) {
+ atomic_inc(&n_alloc_errs);
+ } else {
+ scfcp->scfc_cpu = -1;
+ scfcp->scfc_wait = scfsp->scfs_wait;
+ scfcp->scfc_out = false;
+ }
+ }
+ switch (scfsp->scfs_prim) {
+ case SCF_PRIM_SINGLE:
+ cpu = torture_random(trsp) % nr_cpu_ids;
+ if (scfsp->scfs_wait)
+ scfp->n_single_wait++;
+ else
+ scfp->n_single++;
+ if (scfcp) {
+ scfcp->scfc_cpu = cpu;
+ barrier(); // Prevent race-reduction compiler optimizations.
+ scfcp->scfc_in = true;
+ }
+ ret = smp_call_function_single(cpu, scf_handler_1, (void *)scfcp, scfsp->scfs_wait);
+ if (ret) {
+ if (scfsp->scfs_wait)
+ scfp->n_single_wait_ofl++;
+ else
+ scfp->n_single_ofl++;
+ kfree(scfcp);
+ scfcp = NULL;
+ }
+ break;
+ case SCF_PRIM_MANY:
+ if (scfsp->scfs_wait)
+ scfp->n_many_wait++;
+ else
+ scfp->n_many++;
+ if (scfcp) {
+ barrier(); // Prevent race-reduction compiler optimizations.
+ scfcp->scfc_in = true;
+ }
+ smp_call_function_many(cpu_online_mask, scf_handler, scfcp, scfsp->scfs_wait);
+ break;
+ case SCF_PRIM_ALL:
+ if (scfsp->scfs_wait)
+ scfp->n_all_wait++;
+ else
+ scfp->n_all++;
+ if (scfcp) {
+ barrier(); // Prevent race-reduction compiler optimizations.
+ scfcp->scfc_in = true;
+ }
+ smp_call_function(scf_handler, scfcp, scfsp->scfs_wait);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ if (scfcp)
+ scfcp->scfc_out = true;
+ }
+ if (scfcp && scfsp->scfs_wait) {
+ if (WARN_ON_ONCE((num_online_cpus() > 1 || scfsp->scfs_prim == SCF_PRIM_SINGLE) &&
+ !scfcp->scfc_out))
+ atomic_inc(&n_mb_out_errs); // Leak rather than trash!
+ else
+ kfree(scfcp);
+ barrier(); // Prevent race-reduction compiler optimizations.
+ }
+ if (use_cpus_read_lock)
+ cpus_read_unlock();
+ else
+ preempt_enable();
+ if (!(torture_random(trsp) & 0xfff))
+ schedule_timeout_uninterruptible(1);
+}
+
+// SCF test kthread. Repeatedly does calls to members of the
+// smp_call_function() family of functions.
+static int scftorture_invoker(void *arg)
+{
+ int cpu;
+ DEFINE_TORTURE_RANDOM(rand);
+ struct scf_statistics *scfp = (struct scf_statistics *)arg;
+ bool was_offline = false;
+
+ VERBOSE_SCFTORTOUT("scftorture_invoker %d: task started", scfp->cpu);
+ cpu = scfp->cpu % nr_cpu_ids;
+ set_cpus_allowed_ptr(current, cpumask_of(cpu));
+ set_user_nice(current, MAX_NICE);
+ if (holdoff)
+ schedule_timeout_interruptible(holdoff * HZ);
+
+ VERBOSE_SCFTORTOUT("scftorture_invoker %d: Waiting for all SCF torturers from cpu %d", scfp->cpu, smp_processor_id());
+
+ // Make sure that the CPU is affinitized appropriately during testing.
+ WARN_ON_ONCE(smp_processor_id() != scfp->cpu);
+
+ if (!atomic_dec_return(&n_started))
+ while (atomic_read_acquire(&n_started)) {
+ if (torture_must_stop()) {
+ VERBOSE_SCFTORTOUT("scftorture_invoker %d ended before starting", scfp->cpu);
+ goto end;
+ }
+ schedule_timeout_uninterruptible(1);
+ }
+
+ VERBOSE_SCFTORTOUT("scftorture_invoker %d started", scfp->cpu);
+
+ do {
+ scftorture_invoke_one(scfp, &rand);
+ while (cpu_is_offline(cpu) && !torture_must_stop()) {
+ schedule_timeout_interruptible(HZ / 5);
+ was_offline = true;
+ }
+ if (was_offline) {
+ set_cpus_allowed_ptr(current, cpumask_of(cpu));
+ was_offline = false;
+ }
+ cond_resched();
+ } while (!torture_must_stop());
+
+ VERBOSE_SCFTORTOUT("scftorture_invoker %d ended", scfp->cpu);
+end:
+ torture_kthread_stopping("scftorture_invoker");
+ return 0;
+}
+
+static void
+scftorture_print_module_parms(const char *tag)
+{
+ pr_alert(SCFTORT_FLAG
+ "--- %s: verbose=%d holdoff=%d longwait=%d nthreads=%d onoff_holdoff=%d onoff_interval=%d shutdown_secs=%d stat_interval=%d stutter_cpus=%d use_cpus_read_lock=%d, weight_single=%d, weight_single_wait=%d, weight_many=%d, weight_many_wait=%d, weight_all=%d, weight_all_wait=%d\n", tag,
+ verbose, holdoff, longwait, nthreads, onoff_holdoff, onoff_interval, shutdown, stat_interval, stutter_cpus, use_cpus_read_lock, weight_single, weight_single_wait, weight_many, weight_many_wait, weight_all, weight_all_wait);
+}
+
+static void scf_cleanup_handler(void *unused)
+{
+}
+
+static void scf_torture_cleanup(void)
+{
+ int i;
+
+ if (torture_cleanup_begin())
+ return;
+
+ WRITE_ONCE(scfdone, true);
+ if (nthreads)
+ for (i = 0; i < nthreads; i++)
+ torture_stop_kthread("scftorture_invoker", scf_stats_p[i].task);
+ else
+ goto end;
+ smp_call_function(scf_cleanup_handler, NULL, 0);
+ torture_stop_kthread(scf_torture_stats, scf_torture_stats_task);
+ scf_torture_stats_print(); // -After- the stats thread is stopped!
+ kfree(scf_stats_p); // -After- the last stats print has completed!
+ scf_stats_p = NULL;
+
+ if (atomic_read(&n_errs) || atomic_read(&n_mb_in_errs) || atomic_read(&n_mb_out_errs))
+ scftorture_print_module_parms("End of test: FAILURE");
+ else if (torture_onoff_failures())
+ scftorture_print_module_parms("End of test: LOCK_HOTPLUG");
+ else
+ scftorture_print_module_parms("End of test: SUCCESS");
+
+end:
+ torture_cleanup_end();
+}
+
+static int __init scf_torture_init(void)
+{
+ long i;
+ int firsterr = 0;
+ unsigned long weight_single1 = weight_single;
+ unsigned long weight_single_wait1 = weight_single_wait;
+ unsigned long weight_many1 = weight_many;
+ unsigned long weight_many_wait1 = weight_many_wait;
+ unsigned long weight_all1 = weight_all;
+ unsigned long weight_all_wait1 = weight_all_wait;
+
+ if (!torture_init_begin(SCFTORT_STRING, verbose))
+ return -EBUSY;
+
+ scftorture_print_module_parms("Start of test");
+
+ if (weight_single == -1 && weight_single_wait == -1 &&
+ weight_many == -1 && weight_many_wait == -1 &&
+ weight_all == -1 && weight_all_wait == -1) {
+ weight_single1 = 2 * nr_cpu_ids;
+ weight_single_wait1 = 2 * nr_cpu_ids;
+ weight_many1 = 2;
+ weight_many_wait1 = 2;
+ weight_all1 = 1;
+ weight_all_wait1 = 1;
+ } else {
+ if (weight_single == -1)
+ weight_single1 = 0;
+ if (weight_single_wait == -1)
+ weight_single_wait1 = 0;
+ if (weight_many == -1)
+ weight_many1 = 0;
+ if (weight_many_wait == -1)
+ weight_many_wait1 = 0;
+ if (weight_all == -1)
+ weight_all1 = 0;
+ if (weight_all_wait == -1)
+ weight_all_wait1 = 0;
+ }
+ if (weight_single1 == 0 && weight_single_wait1 == 0 &&
+ weight_many1 == 0 && weight_many_wait1 == 0 &&
+ weight_all1 == 0 && weight_all_wait1 == 0) {
+ VERBOSE_SCFTORTOUT_ERRSTRING("all zero weights makes no sense");
+ firsterr = -EINVAL;
+ goto unwind;
+ }
+ scf_sel_add(weight_single1, SCF_PRIM_SINGLE, false);
+ scf_sel_add(weight_single_wait1, SCF_PRIM_SINGLE, true);
+ scf_sel_add(weight_many1, SCF_PRIM_MANY, false);
+ scf_sel_add(weight_many_wait1, SCF_PRIM_MANY, true);
+ scf_sel_add(weight_all1, SCF_PRIM_ALL, false);
+ scf_sel_add(weight_all_wait1, SCF_PRIM_ALL, true);
+ scf_sel_dump();
+
+ if (onoff_interval > 0) {
+ firsterr = torture_onoff_init(onoff_holdoff * HZ, onoff_interval, NULL);
+ if (firsterr)
+ goto unwind;
+ }
+ if (shutdown_secs > 0) {
+ firsterr = torture_shutdown_init(shutdown_secs, scf_torture_cleanup);
+ if (firsterr)
+ goto unwind;
+ }
+
+ // Worker tasks invoking smp_call_function().
+ if (nthreads < 0)
+ nthreads = num_online_cpus();
+ scf_stats_p = kcalloc(nthreads, sizeof(scf_stats_p[0]), GFP_KERNEL);
+ if (!scf_stats_p) {
+ VERBOSE_SCFTORTOUT_ERRSTRING("out of memory");
+ firsterr = -ENOMEM;
+ goto unwind;
+ }
+
+ VERBOSE_SCFTORTOUT("Starting %d smp_call_function() threads\n", nthreads);
+
+ atomic_set(&n_started, nthreads);
+ for (i = 0; i < nthreads; i++) {
+ scf_stats_p[i].cpu = i;
+ firsterr = torture_create_kthread(scftorture_invoker, (void *)&scf_stats_p[i],
+ scf_stats_p[i].task);
+ if (firsterr)
+ goto unwind;
+ }
+ if (stat_interval > 0) {
+ firsterr = torture_create_kthread(scf_torture_stats, NULL, scf_torture_stats_task);
+ if (firsterr)
+ goto unwind;
+ }
+
+ torture_init_end();
+ return 0;
+
+unwind:
+ torture_init_end();
+ scf_torture_cleanup();
+ return firsterr;
+}
+
+module_init(scf_torture_init);
+module_exit(scf_torture_cleanup);
diff --git a/kernel/smp.c b/kernel/smp.c
index d0ae8eb6bf8b..4d17501433be 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -20,6 +20,9 @@
#include <linux/sched.h>
#include <linux/sched/idle.h>
#include <linux/hypervisor.h>
+#include <linux/sched/clock.h>
+#include <linux/nmi.h>
+#include <linux/sched/debug.h>
#include "smpboot.h"
#include "sched/smp.h"
@@ -96,6 +99,103 @@ void __init call_function_init(void)
smpcfd_prepare_cpu(smp_processor_id());
}
+#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
+
+static DEFINE_PER_CPU(call_single_data_t *, cur_csd);
+static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func);
+static DEFINE_PER_CPU(void *, cur_csd_info);
+
+#define CSD_LOCK_TIMEOUT (5ULL * NSEC_PER_SEC)
+static atomic_t csd_bug_count = ATOMIC_INIT(0);
+
+/* Record current CSD work for current CPU, NULL to erase. */
+static void csd_lock_record(call_single_data_t *csd)
+{
+ if (!csd) {
+ smp_mb(); /* NULL cur_csd after unlock. */
+ __this_cpu_write(cur_csd, NULL);
+ return;
+ }
+ __this_cpu_write(cur_csd_func, csd->func);
+ __this_cpu_write(cur_csd_info, csd->info);
+ smp_wmb(); /* func and info before csd. */
+ __this_cpu_write(cur_csd, csd);
+ smp_mb(); /* Update cur_csd before function call. */
+ /* Or before unlock, as the case may be. */
+}
+
+static __always_inline int csd_lock_wait_getcpu(call_single_data_t *csd)
+{
+ unsigned int csd_type;
+
+ csd_type = CSD_TYPE(csd);
+ if (csd_type == CSD_TYPE_ASYNC || csd_type == CSD_TYPE_SYNC)
+ return csd->dst; /* Other CSD_TYPE_ values might not have ->dst. */
+ return -1;
+}
+
+/*
+ * Complain if too much time spent waiting. Note that only
+ * the CSD_TYPE_SYNC/ASYNC types provide the destination CPU,
+ * so waiting on other types gets much less information.
+ */
+static __always_inline bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, int *bug_id)
+{
+ int cpu = -1;
+ int cpux;
+ bool firsttime;
+ u64 ts2, ts_delta;
+ call_single_data_t *cpu_cur_csd;
+ unsigned int flags = READ_ONCE(csd->flags);
+
+ if (!(flags & CSD_FLAG_LOCK)) {
+ if (!unlikely(*bug_id))
+ return true;
+ cpu = csd_lock_wait_getcpu(csd);
+ pr_alert("csd: CSD lock (#%d) got unstuck on CPU#%02d, CPU#%02d released the lock.\n",
+ *bug_id, raw_smp_processor_id(), cpu);
+ return true;
+ }
+
+ ts2 = sched_clock();
+ ts_delta = ts2 - *ts1;
+ if (likely(ts_delta <= CSD_LOCK_TIMEOUT))
+ return false;
+
+ firsttime = !*bug_id;
+ if (firsttime)
+ *bug_id = atomic_inc_return(&csd_bug_count);
+ cpu = csd_lock_wait_getcpu(csd);
+ if (WARN_ONCE(cpu < 0 || cpu >= nr_cpu_ids, "%s: cpu = %d\n", __func__, cpu))
+ cpux = 0;
+ else
+ cpux = cpu;
+ cpu_cur_csd = smp_load_acquire(&per_cpu(cur_csd, cpux)); /* Before func and info. */
+ pr_alert("csd: %s non-responsive CSD lock (#%d) on CPU#%d, waiting %llu ns for CPU#%02d %pS(%ps).\n",
+ firsttime ? "Detected" : "Continued", *bug_id, raw_smp_processor_id(), ts2 - ts0,
+ cpu, csd->func, csd->info);
+ if (cpu_cur_csd && csd != cpu_cur_csd) {
+ pr_alert("\tcsd: CSD lock (#%d) handling prior %pS(%ps) request.\n",
+ *bug_id, READ_ONCE(per_cpu(cur_csd_func, cpux)),
+ READ_ONCE(per_cpu(cur_csd_info, cpux)));
+ } else {
+ pr_alert("\tcsd: CSD lock (#%d) %s.\n",
+ *bug_id, !cpu_cur_csd ? "unresponsive" : "handling this request");
+ }
+ if (cpu >= 0) {
+ if (!trigger_single_cpu_backtrace(cpu))
+ dump_cpu_task(cpu);
+ if (!cpu_cur_csd) {
+ pr_alert("csd: Re-sending CSD lock (#%d) IPI from CPU#%02d to CPU#%02d\n", *bug_id, raw_smp_processor_id(), cpu);
+ arch_send_call_function_single_ipi(cpu);
+ }
+ }
+ dump_stack();
+ *ts1 = ts2;
+
+ return false;
+}
+
/*
* csd_lock/csd_unlock used to serialize access to per-cpu csd resources
*
@@ -105,8 +205,28 @@ void __init call_function_init(void)
*/
static __always_inline void csd_lock_wait(call_single_data_t *csd)
{
+ int bug_id = 0;
+ u64 ts0, ts1;
+
+ ts1 = ts0 = sched_clock();
+ for (;;) {
+ if (csd_lock_wait_toolong(csd, ts0, &ts1, &bug_id))
+ break;
+ cpu_relax();
+ }
+ smp_acquire__after_ctrl_dep();
+}
+
+#else
+static void csd_lock_record(call_single_data_t *csd)
+{
+}
+
+static __always_inline void csd_lock_wait(call_single_data_t *csd)
+{
smp_cond_load_acquire(&csd->flags, !(VAL & CSD_FLAG_LOCK));
}
+#endif
static __always_inline void csd_lock(call_single_data_t *csd)
{
@@ -166,9 +286,11 @@ static int generic_exec_single(int cpu, call_single_data_t *csd)
* We can unlock early even for the synchronous on-stack case,
* since we're doing this from the same CPU..
*/
+ csd_lock_record(csd);
csd_unlock(csd);
local_irq_save(flags);
func(info);
+ csd_lock_record(NULL);
local_irq_restore(flags);
return 0;
}
@@ -268,8 +390,10 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
entry = &csd_next->llist;
}
+ csd_lock_record(csd);
func(info);
csd_unlock(csd);
+ csd_lock_record(NULL);
} else {
prev = &csd->llist;
}
@@ -296,8 +420,10 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
smp_call_func_t func = csd->func;
void *info = csd->info;
+ csd_lock_record(csd);
csd_unlock(csd);
func(info);
+ csd_lock_record(NULL);
} else if (type == CSD_TYPE_IRQ_WORK) {
irq_work_single(csd);
}
@@ -375,6 +501,10 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
csd->func = func;
csd->info = info;
+#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
+ csd->src = smp_processor_id();
+ csd->dst = cpu;
+#endif
err = generic_exec_single(cpu, csd);
@@ -540,6 +670,10 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
csd->flags |= CSD_TYPE_SYNC;
csd->func = func;
csd->info = info;
+#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
+ csd->src = smp_processor_id();
+ csd->dst = cpu;
+#endif
if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)))
__cpumask_set_cpu(cpu, cfd->cpumask_ipi);
}
@@ -741,7 +875,7 @@ EXPORT_SYMBOL(on_each_cpu_mask);
* for all the required CPUs to finish. This may include the local
* processor.
* @cond_func: A callback function that is passed a cpu id and
- * the the info parameter. The function is called
+ * the info parameter. The function is called
* with preemption disabled. The function should
* return a blooean value indicating whether to IPI
* the specified CPU.
diff --git a/kernel/sys.c b/kernel/sys.c
index ab6c409b1159..6401880dff74 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2034,7 +2034,7 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data
* VMAs already unmapped and kernel uses these members for statistics
* output in procfs mostly, except
*
- * - @start_brk/@brk which are used in do_brk but kernel lookups
+ * - @start_brk/@brk which are used in do_brk_flags but kernel lookups
* for VMAs when updating these memvers so anything wrong written
* here cause kernel to swear at userspace program but won't lead
* to any problem in kernel itself
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index c925d1e1777e..f27ac94d5fa7 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -280,6 +280,7 @@ COND_SYSCALL(mlockall);
COND_SYSCALL(munlockall);
COND_SYSCALL(mincore);
COND_SYSCALL(madvise);
+COND_SYSCALL(process_madvise);
COND_SYSCALL(remap_file_pages);
COND_SYSCALL(mbind);
COND_SYSCALL_COMPAT(mbind);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index f0199a4ba1ad..81632cd5e3b7 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -927,7 +927,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
if (ratelimit < 10 &&
(local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
- pr_warn("NOHZ: local_softirq_pending %02x\n",
+ pr_warn("NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #%02x!!!\n",
(unsigned int) local_softirq_pending());
ratelimit++;
}
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 29ca60f058ef..47a71f96e5bc 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2498,7 +2498,7 @@ __trace_add_new_event(struct trace_event_call *call, struct trace_array *tr)
* for enabling events at boot. We want to enable events before
* the filesystem is initialized.
*/
-static __init int
+static int
__trace_early_add_new_event(struct trace_event_call *call,
struct trace_array *tr)
{
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 87804e0371fe..e703d5d9cbe8 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -515,7 +515,7 @@ EXPORT_SYMBOL(from_kgid_munged);
*
* When there is no mapping defined for the user-namespace projid
* pair INVALID_PROJID is returned. Callers are expected to test
- * for and handle handle INVALID_PROJID being returned. INVALID_PROJID
+ * for and handle INVALID_PROJID being returned. INVALID_PROJID
* may be tested for using projid_valid().
*/
kprojid_t make_kprojid(struct user_namespace *ns, projid_t projid)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ac088ce6059b..437935e7a199 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1212,11 +1212,14 @@ out_put:
* stable state - idle, on timer or on worklist.
*
* Return:
+ *
+ * ======== ================================================================
* 1 if @work was pending and we successfully stole PENDING
* 0 if @work was idle and we claimed PENDING
* -EAGAIN if PENDING couldn't be grabbed at the moment, safe to busy-retry
* -ENOENT if someone else is canceling @work, this state may persist
* for arbitrarily long
+ * ======== ================================================================
*
* Note:
* On >= 0 return, the caller owns @work's PENDING bit. To avoid getting
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 491789a793ae..66d44d35cc97 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1367,6 +1367,27 @@ config WW_MUTEX_SELFTEST
Say M if you want these self tests to build as a module.
Say N if you are unsure.
+config SCF_TORTURE_TEST
+ tristate "torture tests for smp_call_function*()"
+ depends on DEBUG_KERNEL
+ select TORTURE_TEST
+ help
+ This option provides a kernel module that runs torture tests
+ on the smp_call_function() family of primitives. The kernel
+ module may be built after the fact on the running kernel to
+ be tested, if desired.
+
+config CSD_LOCK_WAIT_DEBUG
+ bool "Debugging for csd_lock_wait(), called from smp_call_function*()"
+ depends on DEBUG_KERNEL
+ depends on 64BIT
+ default n
+ help
+ This option enables debug prints when CPUs are slow to respond
+ to the smp_call_function*() IPI wrappers. These debug prints
+ include the IPI handler function currently executing (if any)
+ and relevant stack traces.
+
endmenu # lock debugging
config TRACE_IRQFLAGS
@@ -1768,6 +1789,13 @@ config FAIL_PAGE_ALLOC
help
Provide fault-injection capability for alloc_pages().
+config FAULT_INJECTION_USERCOPY
+ bool "Fault injection capability for usercopy functions"
+ depends on FAULT_INJECTION
+ help
+ Provides fault-injection capability to inject failures
+ in usercopy functions (copy_from_user(), get_user(), ...).
+
config FAIL_MAKE_REQUEST
bool "Fault-injection capability for disk IO"
depends on FAULT_INJECTION && BLOCK
@@ -2035,13 +2063,6 @@ config TEST_BITMAP
If unsure, say N.
-config TEST_BITFIELD
- tristate "Test bitfield functions at runtime"
- help
- Enable this option to test the bitfield functions at boot.
-
- If unsure, say N.
-
config TEST_UUID
tristate "Test functions located in the uuid module at runtime"
@@ -2191,6 +2212,22 @@ config TEST_SYSCTL
If unsure, say N.
+config BITFIELD_KUNIT
+ tristate "KUnit test bitfield functions at runtime"
+ depends on KUNIT
+ help
+ Enable this option to test the bitfield functions at boot.
+
+ KUnit tests run during boot and output the results to the debug log
+ in TAP format (http://testanything.org/). Only useful for kernel devs
+ running the KUnit test harness, and not intended for inclusion into a
+ production build.
+
+ For more information on KUnit and unit tests in general please refer
+ to the KUnit documentation in Documentation/dev-tools/kunit/.
+
+ If unsure, say N.
+
config SYSCTL_KUNIT_TEST
tristate "KUnit test for sysctl" if !KUNIT_ALL_TESTS
depends on KUNIT
diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb
index 256f2486f9bd..05dae05b6cc9 100644
--- a/lib/Kconfig.kgdb
+++ b/lib/Kconfig.kgdb
@@ -24,6 +24,21 @@ menuconfig KGDB
if KGDB
+config KGDB_HONOUR_BLOCKLIST
+ bool "KGDB: use kprobe blocklist to prohibit unsafe breakpoints"
+ depends on HAVE_KPROBES
+ depends on MODULES
+ select KPROBES
+ default y
+ help
+ If set to Y the debug core will use the kprobe blocklist to
+ identify symbols where it is unsafe to set breakpoints.
+ In particular this disallows instrumentation of functions
+ called during debug trap handling and thus makes it very
+ difficult to inadvertently provoke recursive trap handling.
+
+ If unsure, say Y.
+
config KGDB_SERIAL_CONSOLE
tristate "KGDB: use kgdb over the serial console"
select CONSOLE_POLL
diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
index 774315de555a..58f8d03d037b 100644
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -47,6 +47,20 @@ config UBSAN_BOUNDS
to the {str,mem}*cpy() family of functions (that is addressed
by CONFIG_FORTIFY_SOURCE).
+config UBSAN_LOCAL_BOUNDS
+ bool "Perform array local bounds checking"
+ depends on UBSAN_TRAP
+ depends on CC_IS_CLANG
+ depends on !UBSAN_KCOV_BROKEN
+ help
+ This option enables -fsanitize=local-bounds which traps when an
+ exception/error is detected. Therefore, it should be enabled only
+ if trapping is expected.
+ Enabling this option detects errors due to accesses through a
+ pointer that is derived from an object of a statically-known size,
+ where an added offset (which may not be known statically) is
+ out-of-bounds.
+
config UBSAN_MISC
bool "Enable all other Undefined Behavior sanity checks"
default UBSAN
diff --git a/lib/Makefile b/lib/Makefile
index 49a2a9e36224..ce45af50983a 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -87,7 +87,6 @@ obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o
obj-$(CONFIG_TEST_PRINTF) += test_printf.o
obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o
obj-$(CONFIG_TEST_STRSCPY) += test_strscpy.o
-obj-$(CONFIG_TEST_BITFIELD) += test_bitfield.o
obj-$(CONFIG_TEST_UUID) += test_uuid.o
obj-$(CONFIG_TEST_XARRAY) += test_xarray.o
obj-$(CONFIG_TEST_PARMAN) += test_parman.o
@@ -210,6 +209,7 @@ obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o
obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o
obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o
+obj-$(CONFIG_FAULT_INJECTION_USERCOPY) += fault-inject-usercopy.o
obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o
obj-$(CONFIG_PM_NOTIFIER_ERROR_INJECT) += pm-notifier-error-inject.o
obj-$(CONFIG_NETDEV_NOTIFIER_ERROR_INJECT) += netdev-notifier-error-inject.o
@@ -348,6 +348,7 @@ obj-$(CONFIG_OBJAGG) += objagg.o
obj-$(CONFIG_PLDMFW) += pldmfw/
# KUnit tests
+obj-$(CONFIG_BITFIELD_KUNIT) += bitfield_kunit.o
obj-$(CONFIG_LIST_KUNIT_TEST) += list-test.o
obj-$(CONFIG_LINEAR_RANGES_TEST) += test_linear_ranges.o
obj-$(CONFIG_BITS_TEST) += test_bits.o
diff --git a/lib/test_bitfield.c b/lib/bitfield_kunit.c
index 5b8f4108662d..1473d8b4bf0f 100644
--- a/lib/test_bitfield.c
+++ b/lib/bitfield_kunit.c
@@ -5,8 +5,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/kernel.h>
-#include <linux/module.h>
+#include <kunit/test.h>
#include <linux/bitfield.h>
#define CHECK_ENC_GET_U(tp, v, field, res) do { \
@@ -14,13 +13,11 @@
u##tp _res; \
\
_res = u##tp##_encode_bits(v, field); \
- if (_res != res) { \
- pr_warn("u" #tp "_encode_bits(" #v ", " #field ") is 0x%llx != " #res "\n",\
- (u64)_res); \
- return -EINVAL; \
- } \
- if (u##tp##_get_bits(_res, field) != v) \
- return -EINVAL; \
+ KUNIT_ASSERT_FALSE_MSG(context, _res != res, \
+ "u" #tp "_encode_bits(" #v ", " #field ") is 0x%llx != " #res "\n", \
+ (u64)_res); \
+ KUNIT_ASSERT_FALSE(context, \
+ u##tp##_get_bits(_res, field) != v); \
} \
} while (0)
@@ -29,14 +26,13 @@
__le##tp _res; \
\
_res = le##tp##_encode_bits(v, field); \
- if (_res != cpu_to_le##tp(res)) { \
- pr_warn("le" #tp "_encode_bits(" #v ", " #field ") is 0x%llx != 0x%llx\n",\
- (u64)le##tp##_to_cpu(_res), \
- (u64)(res)); \
- return -EINVAL; \
- } \
- if (le##tp##_get_bits(_res, field) != v) \
- return -EINVAL; \
+ KUNIT_ASSERT_FALSE_MSG(context, \
+ _res != cpu_to_le##tp(res), \
+ "le" #tp "_encode_bits(" #v ", " #field ") is 0x%llx != 0x%llx",\
+ (u64)le##tp##_to_cpu(_res), \
+ (u64)(res)); \
+ KUNIT_ASSERT_FALSE(context, \
+ le##tp##_get_bits(_res, field) != v);\
} \
} while (0)
@@ -45,14 +41,13 @@
__be##tp _res; \
\
_res = be##tp##_encode_bits(v, field); \
- if (_res != cpu_to_be##tp(res)) { \
- pr_warn("be" #tp "_encode_bits(" #v ", " #field ") is 0x%llx != 0x%llx\n",\
- (u64)be##tp##_to_cpu(_res), \
- (u64)(res)); \
- return -EINVAL; \
- } \
- if (be##tp##_get_bits(_res, field) != v) \
- return -EINVAL; \
+ KUNIT_ASSERT_FALSE_MSG(context, \
+ _res != cpu_to_be##tp(res), \
+ "be" #tp "_encode_bits(" #v ", " #field ") is 0x%llx != 0x%llx", \
+ (u64)be##tp##_to_cpu(_res), \
+ (u64)(res)); \
+ KUNIT_ASSERT_FALSE(context, \
+ be##tp##_get_bits(_res, field) != v);\
} \
} while (0)
@@ -62,7 +57,7 @@
CHECK_ENC_GET_BE(tp, v, field, res); \
} while (0)
-static int test_constants(void)
+static void __init test_bitfields_constants(struct kunit *context)
{
/*
* NOTE
@@ -95,19 +90,17 @@ static int test_constants(void)
CHECK_ENC_GET(64, 7, 0x00f0000000000000ull, 0x0070000000000000ull);
CHECK_ENC_GET(64, 14, 0x0f00000000000000ull, 0x0e00000000000000ull);
CHECK_ENC_GET(64, 15, 0xf000000000000000ull, 0xf000000000000000ull);
-
- return 0;
}
#define CHECK(tp, mask) do { \
u64 v; \
\
for (v = 0; v < 1 << hweight32(mask); v++) \
- if (tp##_encode_bits(v, mask) != v << __ffs64(mask)) \
- return -EINVAL; \
+ KUNIT_ASSERT_FALSE(context, \
+ tp##_encode_bits(v, mask) != v << __ffs64(mask));\
} while (0)
-static int test_variables(void)
+static void __init test_bitfields_variables(struct kunit *context)
{
CHECK(u8, 0x0f);
CHECK(u8, 0xf0);
@@ -130,39 +123,32 @@ static int test_variables(void)
CHECK(u64, 0x000000007f000000ull);
CHECK(u64, 0x0000000018000000ull);
CHECK(u64, 0x0000001f8000000ull);
-
- return 0;
}
-static int __init test_bitfields(void)
-{
- int ret = test_constants();
-
- if (ret) {
- pr_warn("constant tests failed!\n");
- return ret;
- }
-
- ret = test_variables();
- if (ret) {
- pr_warn("variable tests failed!\n");
- return ret;
- }
-
#ifdef TEST_BITFIELD_COMPILE
+static void __init test_bitfields_compile(struct kunit *context)
+{
/* these should fail compilation */
CHECK_ENC_GET(16, 16, 0x0f00, 0x1000);
u32_encode_bits(7, 0x06000000);
/* this should at least give a warning */
u16_encode_bits(0, 0x60000);
+}
#endif
- pr_info("tests passed\n");
+static struct kunit_case __refdata bitfields_test_cases[] = {
+ KUNIT_CASE(test_bitfields_constants),
+ KUNIT_CASE(test_bitfields_variables),
+ {}
+};
- return 0;
-}
-module_init(test_bitfields)
+static struct kunit_suite bitfields_test_suite = {
+ .name = "bitfields",
+ .test_cases = bitfields_test_cases,
+};
+
+kunit_test_suites(&bitfields_test_suite);
MODULE_AUTHOR("Johannes Berg <johannes@sipsolutions.net>");
MODULE_LICENSE("GPL");
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 61394890788e..75006c4036e9 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -23,7 +23,7 @@
/**
* DOC: bitmap introduction
*
- * bitmaps provide an array of bits, implemented using an an
+ * bitmaps provide an array of bits, implemented using an
* array of unsigned longs. The number of valid bits in a
* given bitmap does _not_ need to be an exact multiple of
* BITS_PER_LONG.
diff --git a/lib/crc32.c b/lib/crc32.c
index 35a03d03f973..2a68dfd3b96c 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -331,7 +331,7 @@ static inline u32 __pure crc32_be_generic(u32 crc, unsigned char const *p,
return crc;
}
-#if CRC_LE_BITS == 1
+#if CRC_BE_BITS == 1
u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
{
return crc32_be_generic(crc, p, len, NULL, CRC32_POLY_BE);
diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c
index f9628f3924ce..c72c865032fa 100644
--- a/lib/decompress_bunzip2.c
+++ b/lib/decompress_bunzip2.c
@@ -390,7 +390,7 @@ static int INIT get_next_block(struct bunzip_data *bd)
j = (bd->inbufBits >> bd->inbufBitCount)&
((1 << hufGroup->maxLen)-1);
got_huff_bits:
- /* Figure how how many bits are in next symbol and
+ /* Figure how many bits are in next symbol and
* unget extras */
i = hufGroup->minLen;
while (j > limit[i])
diff --git a/lib/decompress_unzstd.c b/lib/decompress_unzstd.c
index 0ad2c15479ed..790abc472f5b 100644
--- a/lib/decompress_unzstd.c
+++ b/lib/decompress_unzstd.c
@@ -178,8 +178,13 @@ static int INIT __unzstd(unsigned char *in_buf, long in_len,
int err;
size_t ret;
+ /*
+ * ZSTD decompression code won't be happy if the buffer size is so big
+ * that its end address overflows. When the size is not provided, make
+ * it as big as possible without having the end address overflow.
+ */
if (out_len == 0)
- out_len = LONG_MAX; /* no limit */
+ out_len = UINTPTR_MAX - (uintptr_t)out_buf;
if (fill == NULL && flush == NULL)
/*
diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c
index e659a027036e..fde0aa244148 100644
--- a/lib/dynamic_queue_limits.c
+++ b/lib/dynamic_queue_limits.c
@@ -60,8 +60,8 @@ void dql_completed(struct dql *dql, unsigned int count)
* A decrease is only considered if the queue has been busy in
* the whole interval (the check above).
*
- * If there is slack, the amount of execess data queued above
- * the the amount needed to prevent starvation, the queue limit
+ * If there is slack, the amount of excess data queued above
+ * the amount needed to prevent starvation, the queue limit
* can be decreased. To avoid hysteresis we consider the
* minimum amount of slack found over several iterations of the
* completion routine.
diff --git a/lib/earlycpio.c b/lib/earlycpio.c
index c001e084829e..e83628882001 100644
--- a/lib/earlycpio.c
+++ b/lib/earlycpio.c
@@ -42,7 +42,7 @@ enum cpio_fields {
/**
* cpio_data find_cpio_data - Search for files in an uncompressed cpio
* @path: The directory to search for, including a slash at the end
- * @data: Pointer to the the cpio archive or a header inside
+ * @data: Pointer to the cpio archive or a header inside
* @len: Remaining length of the cpio based on data pointer
* @nextoff: When a matching file is found, this is the offset from the
* beginning of the cpio to the beginning of the next file, not the
diff --git a/lib/fault-inject-usercopy.c b/lib/fault-inject-usercopy.c
new file mode 100644
index 000000000000..77558b6c29ca
--- /dev/null
+++ b/lib/fault-inject-usercopy.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/fault-inject.h>
+#include <linux/fault-inject-usercopy.h>
+
+static struct {
+ struct fault_attr attr;
+} fail_usercopy = {
+ .attr = FAULT_ATTR_INITIALIZER,
+};
+
+static int __init setup_fail_usercopy(char *str)
+{
+ return setup_fault_attr(&fail_usercopy.attr, str);
+}
+__setup("fail_usercopy=", setup_fail_usercopy);
+
+#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
+
+static int __init fail_usercopy_debugfs(void)
+{
+ struct dentry *dir;
+
+ dir = fault_create_debugfs_attr("fail_usercopy", NULL,
+ &fail_usercopy.attr);
+ if (IS_ERR(dir))
+ return PTR_ERR(dir);
+
+ return 0;
+}
+
+late_initcall(fail_usercopy_debugfs);
+
+#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
+
+bool should_fail_usercopy(void)
+{
+ return should_fail(&fail_usercopy.attr, 1);
+}
+EXPORT_SYMBOL_GPL(should_fail_usercopy);
diff --git a/lib/find_bit.c b/lib/find_bit.c
index 49f875f1baf7..4a8751010d59 100644
--- a/lib/find_bit.c
+++ b/lib/find_bit.c
@@ -16,6 +16,7 @@
#include <linux/bitmap.h>
#include <linux/export.h>
#include <linux/kernel.h>
+#include <linux/minmax.h>
#if !defined(find_next_bit) || !defined(find_next_zero_bit) || \
!defined(find_next_bit_le) || !defined(find_next_zero_bit_le) || \
diff --git a/lib/hexdump.c b/lib/hexdump.c
index 147133f8eb2f..9301578f98e8 100644
--- a/lib/hexdump.c
+++ b/lib/hexdump.c
@@ -7,6 +7,7 @@
#include <linux/ctype.h>
#include <linux/errno.h>
#include <linux/kernel.h>
+#include <linux/minmax.h>
#include <linux/export.h>
#include <asm/unaligned.h>
diff --git a/lib/idr.c b/lib/idr.c
index c2cf2c52bbde..f4ab4f4aa3c7 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -372,7 +372,8 @@ EXPORT_SYMBOL(idr_replace);
* Allocate an ID between @min and @max, inclusive. The allocated ID will
* not exceed %INT_MAX, even if @max is larger.
*
- * Context: Any context.
+ * Context: Any context. It is safe to call this function without
+ * locking in your code.
* Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
* or %-ENOSPC if there are no free IDs.
*/
@@ -470,6 +471,7 @@ alloc:
goto retry;
nospc:
xas_unlock_irqrestore(&xas, flags);
+ kfree(alloc);
return -ENOSPC;
}
EXPORT_SYMBOL(ida_alloc_range);
@@ -479,7 +481,8 @@ EXPORT_SYMBOL(ida_alloc_range);
* @ida: IDA handle.
* @id: Previously allocated ID.
*
- * Context: Any context.
+ * Context: Any context. It is safe to call this function without
+ * locking in your code.
*/
void ida_free(struct ida *ida, unsigned int id)
{
@@ -531,7 +534,8 @@ EXPORT_SYMBOL(ida_free);
* or freed. If the IDA is already empty, there is no need to call this
* function.
*
- * Context: Any context.
+ * Context: Any context. It is safe to call this function without
+ * locking in your code.
*/
void ida_destroy(struct ida *ida)
{
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 14cae2584db4..1635111c5bd2 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -2,6 +2,7 @@
#include <crypto/hash.h>
#include <linux/export.h>
#include <linux/bvec.h>
+#include <linux/fault-inject-usercopy.h>
#include <linux/uio.h>
#include <linux/pagemap.h>
#include <linux/slab.h>
@@ -140,6 +141,8 @@
static int copyout(void __user *to, const void *from, size_t n)
{
+ if (should_fail_usercopy())
+ return n;
if (access_ok(to, n)) {
instrument_copy_to_user(to, from, n);
n = raw_copy_to_user(to, from, n);
@@ -149,6 +152,8 @@ static int copyout(void __user *to, const void *from, size_t n)
static int copyin(void *to, const void __user *from, size_t n)
{
+ if (should_fail_usercopy())
+ return n;
if (access_ok(from, n)) {
instrument_copy_from_user(to, from, n);
n = raw_copy_from_user(to, from, n);
diff --git a/lib/kunit/Makefile b/lib/kunit/Makefile
index 724b94311ca3..c49f4ffb6273 100644
--- a/lib/kunit/Makefile
+++ b/lib/kunit/Makefile
@@ -3,7 +3,8 @@ obj-$(CONFIG_KUNIT) += kunit.o
kunit-objs += test.o \
string-stream.o \
assert.o \
- try-catch.o
+ try-catch.o \
+ executor.o
ifeq ($(CONFIG_KUNIT_DEBUGFS),y)
kunit-objs += debugfs.o
diff --git a/lib/kunit/executor.c b/lib/kunit/executor.c
new file mode 100644
index 000000000000..a95742a4ece7
--- /dev/null
+++ b/lib/kunit/executor.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <kunit/test.h>
+
+/*
+ * These symbols point to the .kunit_test_suites section and are defined in
+ * include/asm-generic/vmlinux.lds.h, and consequently must be extern.
+ */
+extern struct kunit_suite * const * const __kunit_suites_start[];
+extern struct kunit_suite * const * const __kunit_suites_end[];
+
+#if IS_BUILTIN(CONFIG_KUNIT)
+
+static void kunit_print_tap_header(void)
+{
+ struct kunit_suite * const * const *suites, * const *subsuite;
+ int num_of_suites = 0;
+
+ for (suites = __kunit_suites_start;
+ suites < __kunit_suites_end;
+ suites++)
+ for (subsuite = *suites; *subsuite != NULL; subsuite++)
+ num_of_suites++;
+
+ pr_info("TAP version 14\n");
+ pr_info("1..%d\n", num_of_suites);
+}
+
+int kunit_run_all_tests(void)
+{
+ struct kunit_suite * const * const *suites;
+
+ kunit_print_tap_header();
+
+ for (suites = __kunit_suites_start;
+ suites < __kunit_suites_end;
+ suites++)
+ __kunit_test_suites_init(*suites);
+
+ return 0;
+}
+
+#endif /* IS_BUILTIN(CONFIG_KUNIT) */
diff --git a/lib/kunit/test.c b/lib/kunit/test.c
index dcc35fd30d95..750704abe89a 100644
--- a/lib/kunit/test.c
+++ b/lib/kunit/test.c
@@ -16,16 +16,6 @@
#include "string-stream.h"
#include "try-catch-impl.h"
-static void kunit_print_tap_version(void)
-{
- static bool kunit_has_printed_tap_version;
-
- if (!kunit_has_printed_tap_version) {
- pr_info("TAP version 14\n");
- kunit_has_printed_tap_version = true;
- }
-}
-
/*
* Append formatted message to log, size of which is limited to
* KUNIT_LOG_SIZE bytes (including null terminating byte).
@@ -65,7 +55,6 @@ EXPORT_SYMBOL_GPL(kunit_suite_num_test_cases);
static void kunit_print_subtest_start(struct kunit_suite *suite)
{
- kunit_print_tap_version();
kunit_log(KERN_INFO, suite, KUNIT_SUBTEST_INDENT "# Subtest: %s",
suite->name);
kunit_log(KERN_INFO, suite, KUNIT_SUBTEST_INDENT "1..%zd",
@@ -381,7 +370,7 @@ static void kunit_init_suite(struct kunit_suite *suite)
kunit_debugfs_create_suite(suite);
}
-int __kunit_test_suites_init(struct kunit_suite **suites)
+int __kunit_test_suites_init(struct kunit_suite * const * const suites)
{
unsigned int i;
diff --git a/lib/libcrc32c.c b/lib/libcrc32c.c
index 77ab839644c5..5ca0d815a95d 100644
--- a/lib/libcrc32c.c
+++ b/lib/libcrc32c.c
@@ -12,7 +12,7 @@
* pages = {},
* month = {June},
*}
- * Used by the iSCSI driver, possibly others, and derived from the
+ * Used by the iSCSI driver, possibly others, and derived from
* the iscsi-crc.c module of the linux-iscsi driver at
* http://linux-iscsi.sourceforge.net.
*
diff --git a/lib/math/rational.c b/lib/math/rational.c
index df75c8809693..9781d521963d 100644
--- a/lib/math/rational.c
+++ b/lib/math/rational.c
@@ -11,7 +11,7 @@
#include <linux/rational.h>
#include <linux/compiler.h>
#include <linux/export.h>
-#include <linux/kernel.h>
+#include <linux/minmax.h>
/*
* calculate best rational approximation for a given fraction
diff --git a/lib/math/reciprocal_div.c b/lib/math/reciprocal_div.c
index bf043258fa00..32436dd4171e 100644
--- a/lib/math/reciprocal_div.c
+++ b/lib/math/reciprocal_div.c
@@ -4,6 +4,7 @@
#include <asm/div64.h>
#include <linux/reciprocal_div.h>
#include <linux/export.h>
+#include <linux/minmax.h>
/*
* For a description of the algorithm please have a look at
diff --git a/lib/mpi/mpi-bit.c b/lib/mpi/mpi-bit.c
index a5119a2bcdd4..142b680835df 100644
--- a/lib/mpi/mpi-bit.c
+++ b/lib/mpi/mpi-bit.c
@@ -1,4 +1,4 @@
-/* mpi-bit.c - MPI bit level fucntions
+/* mpi-bit.c - MPI bit level functions
* Copyright (C) 1998, 1999 Free Software Foundation, Inc.
*
* This file is part of GnuPG.
diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c
index 15ca78e1c7d4..8abe1870dba4 100644
--- a/lib/nmi_backtrace.c
+++ b/lib/nmi_backtrace.c
@@ -85,12 +85,16 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
put_cpu();
}
+// Dump stacks even for idle CPUs.
+static bool backtrace_idle;
+module_param(backtrace_idle, bool, 0644);
+
bool nmi_cpu_backtrace(struct pt_regs *regs)
{
int cpu = smp_processor_id();
if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
- if (regs && cpu_in_idle(instruction_pointer(regs))) {
+ if (!READ_ONCE(backtrace_idle) && regs && cpu_in_idle(instruction_pointer(regs))) {
pr_warn("NMI backtrace for cpu %d skipped: idling at %pS\n",
cpu, (void *)instruction_pointer(regs));
} else {
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index f61689a96e85..00f666d94486 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -85,7 +85,7 @@ void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch)
preempt_disable();
count = __this_cpu_read(*fbc->counters) + amount;
- if (count >= batch || count <= -batch) {
+ if (abs(count) >= batch) {
unsigned long flags;
raw_spin_lock_irqsave(&fbc->lock, flags);
fbc->count += count;
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 8e4a3a4397f2..3a4da11b804d 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -20,7 +20,6 @@
#include <linux/kernel.h>
#include <linux/kmemleak.h>
#include <linux/percpu.h>
-#include <linux/local_lock.h>
#include <linux/preempt.h> /* in_interrupt() */
#include <linux/radix-tree.h>
#include <linux/rcupdate.h>
@@ -325,7 +324,7 @@ static __must_check int __radix_tree_preload(gfp_t gfp_mask, unsigned nr)
int ret = -ENOMEM;
/*
- * Nodes preloaded by one cgroup can be be used by another cgroup, so
+ * Nodes preloaded by one cgroup can be used by another cgroup, so
* they should never be accounted to any particular memory cgroup.
*/
gfp_mask &= ~__GFP_ACCOUNT;
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 5d63a8857f36..9a4992dc8e8c 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -365,6 +365,37 @@ int sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask)
}
EXPORT_SYMBOL(sg_alloc_table);
+static struct scatterlist *get_next_sg(struct sg_table *table,
+ struct scatterlist *cur,
+ unsigned long needed_sges,
+ gfp_t gfp_mask)
+{
+ struct scatterlist *new_sg, *next_sg;
+ unsigned int alloc_size;
+
+ if (cur) {
+ next_sg = sg_next(cur);
+ /* Check if last entry should be keeped for chainning */
+ if (!sg_is_last(next_sg) || needed_sges == 1)
+ return next_sg;
+ }
+
+ alloc_size = min_t(unsigned long, needed_sges, SG_MAX_SINGLE_ALLOC);
+ new_sg = sg_kmalloc(alloc_size, gfp_mask);
+ if (!new_sg)
+ return ERR_PTR(-ENOMEM);
+ sg_init_table(new_sg, alloc_size);
+ if (cur) {
+ __sg_chain(next_sg, new_sg);
+ table->orig_nents += alloc_size - 1;
+ } else {
+ table->sgl = new_sg;
+ table->orig_nents = alloc_size;
+ table->nents = 0;
+ }
+ return new_sg;
+}
+
/**
* __sg_alloc_table_from_pages - Allocate and initialize an sg table from
* an array of pages
@@ -373,30 +404,69 @@ EXPORT_SYMBOL(sg_alloc_table);
* @n_pages: Number of pages in the pages array
* @offset: Offset from start of the first page to the start of a buffer
* @size: Number of valid bytes in the buffer (after offset)
- * @max_segment: Maximum size of a scatterlist node in bytes (page aligned)
+ * @max_segment: Maximum size of a scatterlist element in bytes
+ * @prv: Last populated sge in sgt
+ * @left_pages: Left pages caller have to set after this call
* @gfp_mask: GFP allocation mask
*
- * Description:
- * Allocate and initialize an sg table from a list of pages. Contiguous
- * ranges of the pages are squashed into a single scatterlist node up to the
- * maximum size specified in @max_segment. An user may provide an offset at a
- * start and a size of valid data in a buffer specified by the page array.
- * The returned sg table is released by sg_free_table.
+ * Description:
+ * If @prv is NULL, allocate and initialize an sg table from a list of pages,
+ * else reuse the scatterlist passed in at @prv.
+ * Contiguous ranges of the pages are squashed into a single scatterlist
+ * entry up to the maximum size specified in @max_segment. A user may
+ * provide an offset at a start and a size of valid data in a buffer
+ * specified by the page array.
*
* Returns:
- * 0 on success, negative error on failure
+ * Last SGE in sgt on success, PTR_ERR on otherwise.
+ * The allocation in @sgt must be released by sg_free_table.
+ *
+ * Notes:
+ * If this function returns non-0 (eg failure), the caller must call
+ * sg_free_table() to cleanup any leftover allocations.
*/
-int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
- unsigned int n_pages, unsigned int offset,
- unsigned long size, unsigned int max_segment,
- gfp_t gfp_mask)
+struct scatterlist *__sg_alloc_table_from_pages(struct sg_table *sgt,
+ struct page **pages, unsigned int n_pages, unsigned int offset,
+ unsigned long size, unsigned int max_segment,
+ struct scatterlist *prv, unsigned int left_pages,
+ gfp_t gfp_mask)
{
- unsigned int chunks, cur_page, seg_len, i;
- int ret;
- struct scatterlist *s;
+ unsigned int chunks, cur_page, seg_len, i, prv_len = 0;
+ unsigned int added_nents = 0;
+ struct scatterlist *s = prv;
- if (WARN_ON(!max_segment || offset_in_page(max_segment)))
- return -EINVAL;
+ /*
+ * The algorithm below requires max_segment to be aligned to PAGE_SIZE
+ * otherwise it can overshoot.
+ */
+ max_segment = ALIGN_DOWN(max_segment, PAGE_SIZE);
+ if (WARN_ON(max_segment < PAGE_SIZE))
+ return ERR_PTR(-EINVAL);
+
+ if (IS_ENABLED(CONFIG_ARCH_NO_SG_CHAIN) && prv)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (prv) {
+ unsigned long paddr = (page_to_pfn(sg_page(prv)) * PAGE_SIZE +
+ prv->offset + prv->length) /
+ PAGE_SIZE;
+
+ if (WARN_ON(offset))
+ return ERR_PTR(-EINVAL);
+
+ /* Merge contiguous pages into the last SG */
+ prv_len = prv->length;
+ while (n_pages && page_to_pfn(pages[0]) == paddr) {
+ if (prv->length + PAGE_SIZE > max_segment)
+ break;
+ prv->length += PAGE_SIZE;
+ paddr++;
+ pages++;
+ n_pages--;
+ }
+ if (!n_pages)
+ goto out;
+ }
/* compute number of contiguous chunks */
chunks = 1;
@@ -410,13 +480,9 @@ int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
}
}
- ret = sg_alloc_table(sgt, chunks, gfp_mask);
- if (unlikely(ret))
- return ret;
-
/* merging chunks and putting them into the scatterlist */
cur_page = 0;
- for_each_sg(sgt->sgl, s, sgt->orig_nents, i) {
+ for (i = 0; i < chunks; i++) {
unsigned int j, chunk_size;
/* look for the end of the current chunk */
@@ -429,15 +495,30 @@ int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
break;
}
+ /* Pass how many chunks might be left */
+ s = get_next_sg(sgt, s, chunks - i + left_pages, gfp_mask);
+ if (IS_ERR(s)) {
+ /*
+ * Adjust entry length to be as before function was
+ * called.
+ */
+ if (prv)
+ prv->length = prv_len;
+ return s;
+ }
chunk_size = ((j - cur_page) << PAGE_SHIFT) - offset;
sg_set_page(s, pages[cur_page],
min_t(unsigned long, size, chunk_size), offset);
+ added_nents++;
size -= chunk_size;
offset = 0;
cur_page = j;
}
-
- return 0;
+ sgt->nents += added_nents;
+out:
+ if (!left_pages)
+ sg_mark_end(s);
+ return s;
}
EXPORT_SYMBOL(__sg_alloc_table_from_pages);
@@ -465,8 +546,8 @@ int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
unsigned int n_pages, unsigned int offset,
unsigned long size, gfp_t gfp_mask)
{
- return __sg_alloc_table_from_pages(sgt, pages, n_pages, offset, size,
- SCATTERLIST_MAX_SEGMENT, gfp_mask);
+ return PTR_ERR_OR_ZERO(__sg_alloc_table_from_pages(sgt, pages, n_pages,
+ offset, size, UINT_MAX, NULL, 0, gfp_mask));
}
EXPORT_SYMBOL(sg_alloc_table_from_pages);
@@ -504,7 +585,7 @@ struct scatterlist *sgl_alloc_order(unsigned long long length,
nalloc++;
}
sgl = kmalloc_array(nalloc, sizeof(struct scatterlist),
- (gfp & ~GFP_DMA) | __GFP_ZERO);
+ gfp & ~GFP_DMA);
if (!sgl)
return NULL;
diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c
index 34696a348864..e6d5fcc2cdf3 100644
--- a/lib/strncpy_from_user.c
+++ b/lib/strncpy_from_user.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/compiler.h>
#include <linux/export.h>
+#include <linux/fault-inject-usercopy.h>
#include <linux/kasan-checks.h>
#include <linux/thread_info.h>
#include <linux/uaccess.h>
@@ -99,6 +100,8 @@ long strncpy_from_user(char *dst, const char __user *src, long count)
unsigned long max_addr, src_addr;
might_fault();
+ if (should_fail_usercopy())
+ return -EFAULT;
if (unlikely(count <= 0))
return 0;
diff --git a/lib/syscall.c b/lib/syscall.c
index fb328e7ccb08..8533d2fea2d7 100644
--- a/lib/syscall.c
+++ b/lib/syscall.c
@@ -44,7 +44,7 @@ static int collect_syscall(struct task_struct *target, struct syscall_info *info
* .data.instruction_pointer - filled with user PC
*
* If @target is blocked in a system call, returns zero with @info.data.nr
- * set to the the call's number and @info.data.args filled in with its
+ * set to the call's number and @info.data.args filled in with its
* arguments. Registers not used for system call arguments may not be available
* and it is not kosher to use &struct user_regset calls while the system
* call is still in progress. Note we may get this result if @target
diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index e151a7f10519..80a78877bd93 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -461,7 +461,7 @@ static bool dmirror_allocate_chunk(struct dmirror_device *mdevice,
devmem = kzalloc(sizeof(*devmem), GFP_KERNEL);
if (!devmem)
- return -ENOMEM;
+ return false;
res = request_free_mem_region(&iomem_resource, DEVMEM_CHUNK_SIZE,
"hmm_dmirror");
diff --git a/lib/test_sysctl.c b/lib/test_sysctl.c
index 98bc92a91662..3750323973f4 100644
--- a/lib/test_sysctl.c
+++ b/lib/test_sysctl.c
@@ -16,7 +16,7 @@
*/
/*
- * This module provides an interface to the the proc sysctl interfaces. This
+ * This module provides an interface to the proc sysctl interfaces. This
* driver requires CONFIG_PROC_SYSCTL. It will not normally be loaded by the
* system unless explicitly requested by name. You can also build this driver
* into your kernel.
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index d4f97925dbd8..8294f43f4981 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -289,6 +289,27 @@ static noinline void check_xa_mark_2(struct xarray *xa)
xa_destroy(xa);
}
+static noinline void check_xa_mark_3(struct xarray *xa)
+{
+#ifdef CONFIG_XARRAY_MULTI
+ XA_STATE(xas, xa, 0x41);
+ void *entry;
+ int count = 0;
+
+ xa_store_order(xa, 0x40, 2, xa_mk_index(0x40), GFP_KERNEL);
+ xa_set_mark(xa, 0x41, XA_MARK_0);
+
+ rcu_read_lock();
+ xas_for_each_marked(&xas, entry, ULONG_MAX, XA_MARK_0) {
+ count++;
+ XA_BUG_ON(xa, entry != xa_mk_index(0x40));
+ }
+ XA_BUG_ON(xa, count != 1);
+ rcu_read_unlock();
+ xa_destroy(xa);
+#endif
+}
+
static noinline void check_xa_mark(struct xarray *xa)
{
unsigned long index;
@@ -297,6 +318,7 @@ static noinline void check_xa_mark(struct xarray *xa)
check_xa_mark_1(xa, index);
check_xa_mark_2(xa);
+ check_xa_mark_3(xa);
}
static noinline void check_xa_shrink(struct xarray *xa)
@@ -393,6 +415,9 @@ static noinline void check_cmpxchg(struct xarray *xa)
XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, FIVE, LOTS, GFP_KERNEL) != FIVE);
XA_BUG_ON(xa, xa_cmpxchg(xa, 5, FIVE, NULL, GFP_KERNEL) != NULL);
XA_BUG_ON(xa, xa_cmpxchg(xa, 5, NULL, FIVE, GFP_KERNEL) != NULL);
+ XA_BUG_ON(xa, xa_insert(xa, 5, FIVE, GFP_KERNEL) != -EBUSY);
+ XA_BUG_ON(xa, xa_cmpxchg(xa, 5, FIVE, NULL, GFP_KERNEL) != FIVE);
+ XA_BUG_ON(xa, xa_insert(xa, 5, FIVE, GFP_KERNEL) == -EBUSY);
xa_erase_index(xa, 12345678);
xa_erase_index(xa, 5);
XA_BUG_ON(xa, !xa_empty(xa));
@@ -1503,6 +1528,49 @@ static noinline void check_store_range(struct xarray *xa)
}
}
+#ifdef CONFIG_XARRAY_MULTI
+static void check_split_1(struct xarray *xa, unsigned long index,
+ unsigned int order)
+{
+ XA_STATE(xas, xa, index);
+ void *entry;
+ unsigned int i = 0;
+
+ xa_store_order(xa, index, order, xa, GFP_KERNEL);
+
+ xas_split_alloc(&xas, xa, order, GFP_KERNEL);
+ xas_lock(&xas);
+ xas_split(&xas, xa, order);
+ xas_unlock(&xas);
+
+ xa_for_each(xa, index, entry) {
+ XA_BUG_ON(xa, entry != xa);
+ i++;
+ }
+ XA_BUG_ON(xa, i != 1 << order);
+
+ xa_set_mark(xa, index, XA_MARK_0);
+ XA_BUG_ON(xa, !xa_get_mark(xa, index, XA_MARK_0));
+
+ xa_destroy(xa);
+}
+
+static noinline void check_split(struct xarray *xa)
+{
+ unsigned int order;
+
+ XA_BUG_ON(xa, !xa_empty(xa));
+
+ for (order = 1; order < 2 * XA_CHUNK_SHIFT; order++) {
+ check_split_1(xa, 0, order);
+ check_split_1(xa, 1UL << order, order);
+ check_split_1(xa, 3UL << order, order);
+ }
+}
+#else
+static void check_split(struct xarray *xa) { }
+#endif
+
static void check_align_1(struct xarray *xa, char *name)
{
int i;
@@ -1575,14 +1643,9 @@ static noinline void shadow_remove(struct xarray *xa)
xa_lock(xa);
while ((node = list_first_entry_or_null(&shadow_nodes,
struct xa_node, private_list))) {
- XA_STATE(xas, node->array, 0);
XA_BUG_ON(xa, node->array != xa);
list_del_init(&node->private_list);
- xas.xa_node = xa_parent_locked(node->array, node);
- xas.xa_offset = node->offset;
- xas.xa_shift = node->shift + XA_CHUNK_SHIFT;
- xas_set_update(&xas, test_update_node);
- xas_store(&xas, NULL);
+ xa_delete_node(node, test_update_node);
}
xa_unlock(xa);
}
@@ -1649,6 +1712,26 @@ static noinline void check_account(struct xarray *xa)
#endif
}
+static noinline void check_get_order(struct xarray *xa)
+{
+ unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1;
+ unsigned int order;
+ unsigned long i, j;
+
+ for (i = 0; i < 3; i++)
+ XA_BUG_ON(xa, xa_get_order(xa, i) != 0);
+
+ for (order = 0; order < max_order; order++) {
+ for (i = 0; i < 10; i++) {
+ xa_store_order(xa, i << order, order,
+ xa_mk_index(i << order), GFP_KERNEL);
+ for (j = i << order; j < (i + 1) << order; j++)
+ XA_BUG_ON(xa, xa_get_order(xa, j) != order);
+ xa_erase(xa, i << order);
+ }
+ }
+}
+
static noinline void check_destroy(struct xarray *xa)
{
unsigned long index;
@@ -1697,6 +1780,7 @@ static int xarray_checks(void)
check_reserve(&array);
check_reserve(&xa0);
check_multi_store(&array);
+ check_get_order(&array);
check_xa_alloc();
check_find(&array);
check_find_entry(&array);
@@ -1708,6 +1792,7 @@ static int xarray_checks(void)
check_store_range(&array);
check_store_iter(&array);
check_align(&xa0);
+ check_split(&array);
check_workingset(&array, 0);
check_workingset(&array, 64);
diff --git a/lib/usercopy.c b/lib/usercopy.c
index b26509f112f9..7413dd300516 100644
--- a/lib/usercopy.c
+++ b/lib/usercopy.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/bitops.h>
+#include <linux/fault-inject-usercopy.h>
#include <linux/instrumented.h>
#include <linux/uaccess.h>
@@ -10,7 +11,7 @@ unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n
{
unsigned long res = n;
might_fault();
- if (likely(access_ok(from, n))) {
+ if (!should_fail_usercopy() && likely(access_ok(from, n))) {
instrument_copy_from_user(to, from, n);
res = raw_copy_from_user(to, from, n);
}
@@ -25,6 +26,8 @@ EXPORT_SYMBOL(_copy_from_user);
unsigned long _copy_to_user(void __user *to, const void *from, unsigned long n)
{
might_fault();
+ if (should_fail_usercopy())
+ return n;
if (likely(access_ok(to, n))) {
instrument_copy_to_user(to, from, n);
n = raw_copy_to_user(to, from, n);
diff --git a/lib/xarray.c b/lib/xarray.c
index e9e641d3c0c3..5fa51614802a 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -266,13 +266,14 @@ static void xa_node_free(struct xa_node *node)
*/
static void xas_destroy(struct xa_state *xas)
{
- struct xa_node *node = xas->xa_alloc;
+ struct xa_node *next, *node = xas->xa_alloc;
- if (!node)
- return;
- XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
- kmem_cache_free(radix_tree_node_cachep, node);
- xas->xa_alloc = NULL;
+ while (node) {
+ XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
+ next = rcu_dereference_raw(node->parent);
+ radix_tree_node_rcu_free(&node->rcu_head);
+ xas->xa_alloc = node = next;
+ }
}
/**
@@ -304,6 +305,7 @@ bool xas_nomem(struct xa_state *xas, gfp_t gfp)
xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
if (!xas->xa_alloc)
return false;
+ xas->xa_alloc->parent = NULL;
XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list));
xas->xa_node = XAS_RESTART;
return true;
@@ -339,6 +341,7 @@ static bool __xas_nomem(struct xa_state *xas, gfp_t gfp)
}
if (!xas->xa_alloc)
return false;
+ xas->xa_alloc->parent = NULL;
XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list));
xas->xa_node = XAS_RESTART;
return true;
@@ -403,7 +406,7 @@ static unsigned long xas_size(const struct xa_state *xas)
/*
* Use this to calculate the maximum index that will need to be created
* in order to add the entry described by @xas. Because we cannot store a
- * multiple-index entry at index 0, the calculation is a little more complex
+ * multi-index entry at index 0, the calculation is a little more complex
* than you might expect.
*/
static unsigned long xas_max(struct xa_state *xas)
@@ -703,7 +706,7 @@ void xas_create_range(struct xa_state *xas)
unsigned char shift = xas->xa_shift;
unsigned char sibs = xas->xa_sibs;
- xas->xa_index |= ((sibs + 1) << shift) - 1;
+ xas->xa_index |= ((sibs + 1UL) << shift) - 1;
if (xas_is_node(xas) && xas->xa_node->shift == xas->xa_shift)
xas->xa_offset |= sibs;
xas->xa_shift = 0;
@@ -946,6 +949,153 @@ void xas_init_marks(const struct xa_state *xas)
}
EXPORT_SYMBOL_GPL(xas_init_marks);
+#ifdef CONFIG_XARRAY_MULTI
+static unsigned int node_get_marks(struct xa_node *node, unsigned int offset)
+{
+ unsigned int marks = 0;
+ xa_mark_t mark = XA_MARK_0;
+
+ for (;;) {
+ if (node_get_mark(node, offset, mark))
+ marks |= 1 << (__force unsigned int)mark;
+ if (mark == XA_MARK_MAX)
+ break;
+ mark_inc(mark);
+ }
+
+ return marks;
+}
+
+static void node_set_marks(struct xa_node *node, unsigned int offset,
+ struct xa_node *child, unsigned int marks)
+{
+ xa_mark_t mark = XA_MARK_0;
+
+ for (;;) {
+ if (marks & (1 << (__force unsigned int)mark)) {
+ node_set_mark(node, offset, mark);
+ if (child)
+ node_mark_all(child, mark);
+ }
+ if (mark == XA_MARK_MAX)
+ break;
+ mark_inc(mark);
+ }
+}
+
+/**
+ * xas_split_alloc() - Allocate memory for splitting an entry.
+ * @xas: XArray operation state.
+ * @entry: New entry which will be stored in the array.
+ * @order: New entry order.
+ * @gfp: Memory allocation flags.
+ *
+ * This function should be called before calling xas_split().
+ * If necessary, it will allocate new nodes (and fill them with @entry)
+ * to prepare for the upcoming split of an entry of @order size into
+ * entries of the order stored in the @xas.
+ *
+ * Context: May sleep if @gfp flags permit.
+ */
+void xas_split_alloc(struct xa_state *xas, void *entry, unsigned int order,
+ gfp_t gfp)
+{
+ unsigned int sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1;
+ unsigned int mask = xas->xa_sibs;
+
+ /* XXX: no support for splitting really large entries yet */
+ if (WARN_ON(xas->xa_shift + 2 * XA_CHUNK_SHIFT < order))
+ goto nomem;
+ if (xas->xa_shift + XA_CHUNK_SHIFT > order)
+ return;
+
+ do {
+ unsigned int i;
+ void *sibling;
+ struct xa_node *node;
+
+ node = kmem_cache_alloc(radix_tree_node_cachep, gfp);
+ if (!node)
+ goto nomem;
+ node->array = xas->xa;
+ for (i = 0; i < XA_CHUNK_SIZE; i++) {
+ if ((i & mask) == 0) {
+ RCU_INIT_POINTER(node->slots[i], entry);
+ sibling = xa_mk_sibling(0);
+ } else {
+ RCU_INIT_POINTER(node->slots[i], sibling);
+ }
+ }
+ RCU_INIT_POINTER(node->parent, xas->xa_alloc);
+ xas->xa_alloc = node;
+ } while (sibs-- > 0);
+
+ return;
+nomem:
+ xas_destroy(xas);
+ xas_set_err(xas, -ENOMEM);
+}
+EXPORT_SYMBOL_GPL(xas_split_alloc);
+
+/**
+ * xas_split() - Split a multi-index entry into smaller entries.
+ * @xas: XArray operation state.
+ * @entry: New entry to store in the array.
+ * @order: New entry order.
+ *
+ * The value in the entry is copied to all the replacement entries.
+ *
+ * Context: Any context. The caller should hold the xa_lock.
+ */
+void xas_split(struct xa_state *xas, void *entry, unsigned int order)
+{
+ unsigned int sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1;
+ unsigned int offset, marks;
+ struct xa_node *node;
+ void *curr = xas_load(xas);
+ int values = 0;
+
+ node = xas->xa_node;
+ if (xas_top(node))
+ return;
+
+ marks = node_get_marks(node, xas->xa_offset);
+
+ offset = xas->xa_offset + sibs;
+ do {
+ if (xas->xa_shift < node->shift) {
+ struct xa_node *child = xas->xa_alloc;
+
+ xas->xa_alloc = rcu_dereference_raw(child->parent);
+ child->shift = node->shift - XA_CHUNK_SHIFT;
+ child->offset = offset;
+ child->count = XA_CHUNK_SIZE;
+ child->nr_values = xa_is_value(entry) ?
+ XA_CHUNK_SIZE : 0;
+ RCU_INIT_POINTER(child->parent, node);
+ node_set_marks(node, offset, child, marks);
+ rcu_assign_pointer(node->slots[offset],
+ xa_mk_node(child));
+ if (xa_is_value(curr))
+ values--;
+ } else {
+ unsigned int canon = offset - xas->xa_sibs;
+
+ node_set_marks(node, canon, NULL, marks);
+ rcu_assign_pointer(node->slots[canon], entry);
+ while (offset > canon)
+ rcu_assign_pointer(node->slots[offset--],
+ xa_mk_sibling(canon));
+ values += (xa_is_value(entry) - xa_is_value(curr)) *
+ (xas->xa_sibs + 1);
+ }
+ } while (offset-- > xas->xa_offset);
+
+ node->nr_values += values;
+}
+EXPORT_SYMBOL_GPL(xas_split);
+#endif
+
/**
* xas_pause() - Pause a walk to drop a lock.
* @xas: XArray operation state.
@@ -1407,7 +1557,7 @@ EXPORT_SYMBOL(__xa_store);
* @gfp: Memory allocation flags.
*
* After this function returns, loads from this index will return @entry.
- * Storing into an existing multislot entry updates the entry of every index.
+ * Storing into an existing multi-index entry updates the entry of every index.
* The marks associated with @index are unaffected unless @entry is %NULL.
*
* Context: Any context. Takes and releases the xa_lock.
@@ -1549,7 +1699,7 @@ static void xas_set_range(struct xa_state *xas, unsigned long first,
*
* After this function returns, loads from any index between @first and @last,
* inclusive will return @entry.
- * Storing into an existing multislot entry updates the entry of every index.
+ * Storing into an existing multi-index entry updates the entry of every index.
* The marks associated with @index are unaffected unless @entry is %NULL.
*
* Context: Process context. Takes and releases the xa_lock. May sleep
@@ -1592,6 +1742,46 @@ unlock:
return xas_result(&xas, NULL);
}
EXPORT_SYMBOL(xa_store_range);
+
+/**
+ * xa_get_order() - Get the order of an entry.
+ * @xa: XArray.
+ * @index: Index of the entry.
+ *
+ * Return: A number between 0 and 63 indicating the order of the entry.
+ */
+int xa_get_order(struct xarray *xa, unsigned long index)
+{
+ XA_STATE(xas, xa, index);
+ void *entry;
+ int order = 0;
+
+ rcu_read_lock();
+ entry = xas_load(&xas);
+
+ if (!entry)
+ goto unlock;
+
+ if (!xas.xa_node)
+ goto unlock;
+
+ for (;;) {
+ unsigned int slot = xas.xa_offset + (1 << order);
+
+ if (slot >= XA_CHUNK_SIZE)
+ break;
+ if (!xa_is_sibling(xas.xa_node->slots[slot]))
+ break;
+ order++;
+ }
+
+ order += xas.xa_node->shift;
+unlock:
+ rcu_read_unlock();
+
+ return order;
+}
+EXPORT_SYMBOL(xa_get_order);
#endif /* CONFIG_XARRAY_MULTI */
/**
@@ -1974,6 +2164,29 @@ unsigned int xa_extract(struct xarray *xa, void **dst, unsigned long start,
EXPORT_SYMBOL(xa_extract);
/**
+ * xa_delete_node() - Private interface for workingset code.
+ * @node: Node to be removed from the tree.
+ * @update: Function to call to update ancestor nodes.
+ *
+ * Context: xa_lock must be held on entry and will not be released.
+ */
+void xa_delete_node(struct xa_node *node, xa_update_node_t update)
+{
+ struct xa_state xas = {
+ .xa = node->array,
+ .xa_index = (unsigned long)node->offset <<
+ (node->shift + XA_CHUNK_SHIFT),
+ .xa_shift = node->shift + XA_CHUNK_SHIFT,
+ .xa_offset = node->offset,
+ .xa_node = xa_parent_locked(node->array, node),
+ .xa_update = update,
+ };
+
+ xas_store(&xas, NULL);
+}
+EXPORT_SYMBOL_GPL(xa_delete_node); /* For the benefit of the test suite */
+
+/**
* xa_destroy() - Free all internal data structures.
* @xa: XArray.
*
diff --git a/mm/Kconfig b/mm/Kconfig
index e72e61c1d62e..d42423f884a7 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -152,6 +152,7 @@ config HAVE_BOOTMEM_INFO_NODE
# eventually, we can have this option just 'select SPARSEMEM'
config MEMORY_HOTPLUG
bool "Allow for memory hot-add"
+ select MEMORY_ISOLATION
depends on SPARSEMEM || X86_64_ACPI_NUMA
depends on ARCH_ENABLE_MEMORY_HOTPLUG
depends on 64BIT || BROKEN
@@ -178,7 +179,6 @@ config MEMORY_HOTPLUG_DEFAULT_ONLINE
config MEMORY_HOTREMOVE
bool "Allow for memory hot remove"
- select MEMORY_ISOLATION
select HAVE_BOOTMEM_INFO_NODE if (X86_64 || PPC64)
depends on MEMORY_HOTPLUG && ARCH_ENABLE_MEMORY_HOTREMOVE
depends on MIGRATION
@@ -816,6 +816,9 @@ config DEVICE_PRIVATE
memory; i.e., memory that is only accessible from the device (or
group of devices). You likely also want to select HMM_MIRROR.
+config VMAP_PFN
+ bool
+
config FRAME_VECTOR
bool
diff --git a/mm/compaction.c b/mm/compaction.c
index 6c63844fc061..6e0ee5641788 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -625,7 +625,7 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
}
/* Found a free page, will break it into order-0 pages */
- order = page_order(page);
+ order = buddy_order(page);
isolated = __isolate_free_page(page, order);
if (!isolated)
break;
@@ -898,7 +898,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
* potential isolation targets.
*/
if (PageBuddy(page)) {
- unsigned long freepage_order = page_order_unsafe(page);
+ unsigned long freepage_order = buddy_order_unsafe(page);
/*
* Without lock, we cannot be sure that what we got is
@@ -1172,7 +1172,7 @@ static bool suitable_migration_target(struct compact_control *cc,
* the only small danger is that we skip a potentially suitable
* pageblock, so it's not worth to check order for valid range.
*/
- if (page_order_unsafe(page) >= pageblock_order)
+ if (buddy_order_unsafe(page) >= pageblock_order)
return false;
}
diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 086309fb9b6f..c05d9dcf7891 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -28,6 +28,7 @@
#include <linux/swapops.h>
#include <linux/start_kernel.h>
#include <linux/sched/mm.h>
+#include <linux/io.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
@@ -44,10 +45,17 @@
* entry type. But these bits might affect the ability to clear entries with
* pxx_clear() because of how dynamic page table folding works on s390. So
* while loading up the entries do not change the lower 4 bits. It does not
- * have affect any other platform.
+ * have affect any other platform. Also avoid the 62nd bit on ppc64 that is
+ * used to mark a pte entry.
*/
-#define S390_MASK_BITS 4
-#define RANDOM_ORVALUE GENMASK(BITS_PER_LONG - 1, S390_MASK_BITS)
+#define S390_SKIP_MASK GENMASK(3, 0)
+#if __BITS_PER_LONG == 64
+#define PPC64_SKIP_MASK GENMASK(62, 62)
+#else
+#define PPC64_SKIP_MASK 0x0
+#endif
+#define ARCH_SKIP_MASK (S390_SKIP_MASK | PPC64_SKIP_MASK)
+#define RANDOM_ORVALUE (GENMASK(BITS_PER_LONG - 1, 0) & ~ARCH_SKIP_MASK)
#define RANDOM_NZVALUE GENMASK(7, 0)
static void __init pte_basic_tests(unsigned long pfn, pgprot_t prot)
@@ -71,15 +79,18 @@ static void __init pte_advanced_tests(struct mm_struct *mm,
{
pte_t pte = pfn_pte(pfn, prot);
+ /*
+ * Architectures optimize set_pte_at by avoiding TLB flush.
+ * This requires set_pte_at to be not used to update an
+ * existing pte entry. Clear pte before we do set_pte_at
+ */
+
pr_debug("Validating PTE advanced\n");
pte = pfn_pte(pfn, prot);
set_pte_at(mm, vaddr, ptep, pte);
ptep_set_wrprotect(mm, vaddr, ptep);
pte = ptep_get(ptep);
WARN_ON(pte_write(pte));
-
- pte = pfn_pte(pfn, prot);
- set_pte_at(mm, vaddr, ptep, pte);
ptep_get_and_clear(mm, vaddr, ptep);
pte = ptep_get(ptep);
WARN_ON(!pte_none(pte));
@@ -93,13 +104,11 @@ static void __init pte_advanced_tests(struct mm_struct *mm,
ptep_set_access_flags(vma, vaddr, ptep, pte, 1);
pte = ptep_get(ptep);
WARN_ON(!(pte_write(pte) && pte_dirty(pte)));
-
- pte = pfn_pte(pfn, prot);
- set_pte_at(mm, vaddr, ptep, pte);
ptep_get_and_clear_full(mm, vaddr, ptep, 1);
pte = ptep_get(ptep);
WARN_ON(!pte_none(pte));
+ pte = pfn_pte(pfn, prot);
pte = pte_mkyoung(pte);
set_pte_at(mm, vaddr, ptep, pte);
ptep_test_and_clear_young(vma, vaddr, ptep);
@@ -111,10 +120,14 @@ static void __init pte_savedwrite_tests(unsigned long pfn, pgprot_t prot)
{
pte_t pte = pfn_pte(pfn, prot);
+ if (!IS_ENABLED(CONFIG_NUMA_BALANCING))
+ return;
+
pr_debug("Validating PTE saved write\n");
WARN_ON(!pte_savedwrite(pte_mk_savedwrite(pte_clear_savedwrite(pte))));
WARN_ON(pte_savedwrite(pte_clear_savedwrite(pte_mk_savedwrite(pte))));
}
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static void __init pmd_basic_tests(unsigned long pfn, pgprot_t prot)
{
@@ -141,7 +154,7 @@ static void __init pmd_basic_tests(unsigned long pfn, pgprot_t prot)
static void __init pmd_advanced_tests(struct mm_struct *mm,
struct vm_area_struct *vma, pmd_t *pmdp,
unsigned long pfn, unsigned long vaddr,
- pgprot_t prot)
+ pgprot_t prot, pgtable_t pgtable)
{
pmd_t pmd = pfn_pmd(pfn, prot);
@@ -152,14 +165,13 @@ static void __init pmd_advanced_tests(struct mm_struct *mm,
/* Align the address wrt HPAGE_PMD_SIZE */
vaddr = (vaddr & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE;
+ pgtable_trans_huge_deposit(mm, pmdp, pgtable);
+
pmd = pfn_pmd(pfn, prot);
set_pmd_at(mm, vaddr, pmdp, pmd);
pmdp_set_wrprotect(mm, vaddr, pmdp);
pmd = READ_ONCE(*pmdp);
WARN_ON(pmd_write(pmd));
-
- pmd = pfn_pmd(pfn, prot);
- set_pmd_at(mm, vaddr, pmdp, pmd);
pmdp_huge_get_and_clear(mm, vaddr, pmdp);
pmd = READ_ONCE(*pmdp);
WARN_ON(!pmd_none(pmd));
@@ -173,18 +185,20 @@ static void __init pmd_advanced_tests(struct mm_struct *mm,
pmdp_set_access_flags(vma, vaddr, pmdp, pmd, 1);
pmd = READ_ONCE(*pmdp);
WARN_ON(!(pmd_write(pmd) && pmd_dirty(pmd)));
-
- pmd = pmd_mkhuge(pfn_pmd(pfn, prot));
- set_pmd_at(mm, vaddr, pmdp, pmd);
pmdp_huge_get_and_clear_full(vma, vaddr, pmdp, 1);
pmd = READ_ONCE(*pmdp);
WARN_ON(!pmd_none(pmd));
+ pmd = pmd_mkhuge(pfn_pmd(pfn, prot));
pmd = pmd_mkyoung(pmd);
set_pmd_at(mm, vaddr, pmdp, pmd);
pmdp_test_and_clear_young(vma, vaddr, pmdp);
pmd = READ_ONCE(*pmdp);
WARN_ON(pmd_young(pmd));
+
+ /* Clear the pte entries */
+ pmdp_huge_get_and_clear(mm, vaddr, pmdp);
+ pgtable = pgtable_trans_huge_withdraw(mm, pmdp);
}
static void __init pmd_leaf_tests(unsigned long pfn, pgprot_t prot)
@@ -199,11 +213,12 @@ static void __init pmd_leaf_tests(unsigned long pfn, pgprot_t prot)
WARN_ON(!pmd_leaf(pmd));
}
+#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
static void __init pmd_huge_tests(pmd_t *pmdp, unsigned long pfn, pgprot_t prot)
{
pmd_t pmd;
- if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP))
+ if (!arch_ioremap_pmd_supported())
return;
pr_debug("Validating PMD huge\n");
@@ -217,11 +232,17 @@ static void __init pmd_huge_tests(pmd_t *pmdp, unsigned long pfn, pgprot_t prot)
pmd = READ_ONCE(*pmdp);
WARN_ON(!pmd_none(pmd));
}
+#else /* CONFIG_HAVE_ARCH_HUGE_VMAP */
+static void __init pmd_huge_tests(pmd_t *pmdp, unsigned long pfn, pgprot_t prot) { }
+#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
static void __init pmd_savedwrite_tests(unsigned long pfn, pgprot_t prot)
{
pmd_t pmd = pfn_pmd(pfn, prot);
+ if (!IS_ENABLED(CONFIG_NUMA_BALANCING))
+ return;
+
pr_debug("Validating PMD saved write\n");
WARN_ON(!pmd_savedwrite(pmd_mk_savedwrite(pmd_clear_savedwrite(pmd))));
WARN_ON(pmd_savedwrite(pmd_clear_savedwrite(pmd_mk_savedwrite(pmd))));
@@ -272,17 +293,9 @@ static void __init pud_advanced_tests(struct mm_struct *mm,
WARN_ON(pud_write(pud));
#ifndef __PAGETABLE_PMD_FOLDED
- pud = pfn_pud(pfn, prot);
- set_pud_at(mm, vaddr, pudp, pud);
pudp_huge_get_and_clear(mm, vaddr, pudp);
pud = READ_ONCE(*pudp);
WARN_ON(!pud_none(pud));
-
- pud = pfn_pud(pfn, prot);
- set_pud_at(mm, vaddr, pudp, pud);
- pudp_huge_get_and_clear_full(mm, vaddr, pudp, 1);
- pud = READ_ONCE(*pudp);
- WARN_ON(!pud_none(pud));
#endif /* __PAGETABLE_PMD_FOLDED */
pud = pfn_pud(pfn, prot);
pud = pud_wrprotect(pud);
@@ -294,11 +307,20 @@ static void __init pud_advanced_tests(struct mm_struct *mm,
pud = READ_ONCE(*pudp);
WARN_ON(!(pud_write(pud) && pud_dirty(pud)));
+#ifndef __PAGETABLE_PMD_FOLDED
+ pudp_huge_get_and_clear_full(mm, vaddr, pudp, 1);
+ pud = READ_ONCE(*pudp);
+ WARN_ON(!pud_none(pud));
+#endif /* __PAGETABLE_PMD_FOLDED */
+
+ pud = pfn_pud(pfn, prot);
pud = pud_mkyoung(pud);
set_pud_at(mm, vaddr, pudp, pud);
pudp_test_and_clear_young(vma, vaddr, pudp);
pud = READ_ONCE(*pudp);
WARN_ON(pud_young(pud));
+
+ pudp_huge_get_and_clear(mm, vaddr, pudp);
}
static void __init pud_leaf_tests(unsigned long pfn, pgprot_t prot)
@@ -313,11 +335,12 @@ static void __init pud_leaf_tests(unsigned long pfn, pgprot_t prot)
WARN_ON(!pud_leaf(pud));
}
+#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
static void __init pud_huge_tests(pud_t *pudp, unsigned long pfn, pgprot_t prot)
{
pud_t pud;
- if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP))
+ if (!arch_ioremap_pud_supported())
return;
pr_debug("Validating PUD huge\n");
@@ -331,6 +354,10 @@ static void __init pud_huge_tests(pud_t *pudp, unsigned long pfn, pgprot_t prot)
pud = READ_ONCE(*pudp);
WARN_ON(!pud_none(pud));
}
+#else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */
+static void __init pud_huge_tests(pud_t *pudp, unsigned long pfn, pgprot_t prot) { }
+#endif /* !CONFIG_HAVE_ARCH_HUGE_VMAP */
+
#else /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
static void __init pud_basic_tests(unsigned long pfn, pgprot_t prot) { }
static void __init pud_advanced_tests(struct mm_struct *mm,
@@ -350,7 +377,7 @@ static void __init pud_basic_tests(unsigned long pfn, pgprot_t prot) { }
static void __init pmd_advanced_tests(struct mm_struct *mm,
struct vm_area_struct *vma, pmd_t *pmdp,
unsigned long pfn, unsigned long vaddr,
- pgprot_t prot)
+ pgprot_t prot, pgtable_t pgtable)
{
}
static void __init pud_advanced_tests(struct mm_struct *mm,
@@ -417,8 +444,6 @@ static void __init pud_populate_tests(struct mm_struct *mm, pud_t *pudp,
* This entry points to next level page table page.
* Hence this must not qualify as pud_bad().
*/
- pmd_clear(pmdp);
- pud_clear(pudp);
pud_populate(mm, pudp, pmdp);
pud = READ_ONCE(*pudp);
WARN_ON(pud_bad(pud));
@@ -515,12 +540,15 @@ static void __init pgd_populate_tests(struct mm_struct *mm, pgd_t *pgdp,
#endif /* PAGETABLE_P4D_FOLDED */
static void __init pte_clear_tests(struct mm_struct *mm, pte_t *ptep,
- unsigned long vaddr)
+ unsigned long pfn, unsigned long vaddr,
+ pgprot_t prot)
{
- pte_t pte = ptep_get(ptep);
+ pte_t pte = pfn_pte(pfn, prot);
pr_debug("Validating PTE clear\n");
+#ifndef CONFIG_RISCV
pte = __pte(pte_val(pte) | RANDOM_ORVALUE);
+#endif
set_pte_at(mm, vaddr, ptep, pte);
barrier();
pte_clear(mm, vaddr, ptep);
@@ -550,7 +578,6 @@ static void __init pmd_populate_tests(struct mm_struct *mm, pmd_t *pmdp,
* This entry points to next level page table page.
* Hence this must not qualify as pmd_bad().
*/
- pmd_clear(pmdp);
pmd_populate(mm, pmdp, pgtable);
pmd = READ_ONCE(*pmdp);
WARN_ON(pmd_bad(pmd));
@@ -784,57 +811,8 @@ static void __init hugetlb_basic_tests(unsigned long pfn, pgprot_t prot)
WARN_ON(!pte_huge(pte_mkhuge(pte)));
#endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */
}
-
-static void __init hugetlb_advanced_tests(struct mm_struct *mm,
- struct vm_area_struct *vma,
- pte_t *ptep, unsigned long pfn,
- unsigned long vaddr, pgprot_t prot)
-{
- struct page *page = pfn_to_page(pfn);
- pte_t pte = ptep_get(ptep);
- unsigned long paddr = __pfn_to_phys(pfn) & PMD_MASK;
-
- pr_debug("Validating HugeTLB advanced\n");
- pte = pte_mkhuge(mk_pte(pfn_to_page(PHYS_PFN(paddr)), prot));
- set_huge_pte_at(mm, vaddr, ptep, pte);
- barrier();
- WARN_ON(!pte_same(pte, huge_ptep_get(ptep)));
- huge_pte_clear(mm, vaddr, ptep, PMD_SIZE);
- pte = huge_ptep_get(ptep);
- WARN_ON(!huge_pte_none(pte));
-
- pte = mk_huge_pte(page, prot);
- set_huge_pte_at(mm, vaddr, ptep, pte);
- barrier();
- huge_ptep_set_wrprotect(mm, vaddr, ptep);
- pte = huge_ptep_get(ptep);
- WARN_ON(huge_pte_write(pte));
-
- pte = mk_huge_pte(page, prot);
- set_huge_pte_at(mm, vaddr, ptep, pte);
- barrier();
- huge_ptep_get_and_clear(mm, vaddr, ptep);
- pte = huge_ptep_get(ptep);
- WARN_ON(!huge_pte_none(pte));
-
- pte = mk_huge_pte(page, prot);
- pte = huge_pte_wrprotect(pte);
- set_huge_pte_at(mm, vaddr, ptep, pte);
- barrier();
- pte = huge_pte_mkwrite(pte);
- pte = huge_pte_mkdirty(pte);
- huge_ptep_set_access_flags(vma, vaddr, ptep, pte, 1);
- pte = huge_ptep_get(ptep);
- WARN_ON(!(huge_pte_write(pte) && huge_pte_dirty(pte)));
-}
#else /* !CONFIG_HUGETLB_PAGE */
static void __init hugetlb_basic_tests(unsigned long pfn, pgprot_t prot) { }
-static void __init hugetlb_advanced_tests(struct mm_struct *mm,
- struct vm_area_struct *vma,
- pte_t *ptep, unsigned long pfn,
- unsigned long vaddr, pgprot_t prot)
-{
-}
#endif /* CONFIG_HUGETLB_PAGE */
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -965,7 +943,13 @@ static int __init debug_vm_pgtable(void)
p4dp = p4d_alloc(mm, pgdp, vaddr);
pudp = pud_alloc(mm, p4dp, vaddr);
pmdp = pmd_alloc(mm, pudp, vaddr);
- ptep = pte_alloc_map_lock(mm, pmdp, vaddr, &ptl);
+ /*
+ * Allocate pgtable_t
+ */
+ if (pte_alloc(mm, pmdp)) {
+ pr_err("pgtable allocation failed\n");
+ return 1;
+ }
/*
* Save all the page table page addresses as the page table
@@ -985,32 +969,11 @@ static int __init debug_vm_pgtable(void)
p4d_basic_tests(p4d_aligned, prot);
pgd_basic_tests(pgd_aligned, prot);
- pte_clear_tests(mm, ptep, vaddr);
- pmd_clear_tests(mm, pmdp);
- pud_clear_tests(mm, pudp);
- p4d_clear_tests(mm, p4dp);
- pgd_clear_tests(mm, pgdp);
-
- pte_advanced_tests(mm, vma, ptep, pte_aligned, vaddr, prot);
- pmd_advanced_tests(mm, vma, pmdp, pmd_aligned, vaddr, prot);
- pud_advanced_tests(mm, vma, pudp, pud_aligned, vaddr, prot);
- hugetlb_advanced_tests(mm, vma, ptep, pte_aligned, vaddr, prot);
-
pmd_leaf_tests(pmd_aligned, prot);
pud_leaf_tests(pud_aligned, prot);
- pmd_huge_tests(pmdp, pmd_aligned, prot);
- pud_huge_tests(pudp, pud_aligned, prot);
-
- pte_savedwrite_tests(pte_aligned, prot);
- pmd_savedwrite_tests(pmd_aligned, prot);
-
- pte_unmap_unlock(ptep, ptl);
-
- pmd_populate_tests(mm, pmdp, saved_ptep);
- pud_populate_tests(mm, pudp, saved_pmdp);
- p4d_populate_tests(mm, p4dp, saved_pudp);
- pgd_populate_tests(mm, pgdp, saved_p4dp);
+ pte_savedwrite_tests(pte_aligned, protnone);
+ pmd_savedwrite_tests(pmd_aligned, protnone);
pte_special_tests(pte_aligned, prot);
pte_protnone_tests(pte_aligned, protnone);
@@ -1029,11 +992,43 @@ static int __init debug_vm_pgtable(void)
pmd_swap_tests(pmd_aligned, prot);
swap_migration_tests();
- hugetlb_basic_tests(pte_aligned, prot);
pmd_thp_tests(pmd_aligned, prot);
pud_thp_tests(pud_aligned, prot);
+ hugetlb_basic_tests(pte_aligned, prot);
+
+ /*
+ * Page table modifying tests. They need to hold
+ * proper page table lock.
+ */
+
+ ptep = pte_offset_map_lock(mm, pmdp, vaddr, &ptl);
+ pte_clear_tests(mm, ptep, pte_aligned, vaddr, prot);
+ pte_advanced_tests(mm, vma, ptep, pte_aligned, vaddr, prot);
+ pte_unmap_unlock(ptep, ptl);
+
+ ptl = pmd_lock(mm, pmdp);
+ pmd_clear_tests(mm, pmdp);
+ pmd_advanced_tests(mm, vma, pmdp, pmd_aligned, vaddr, prot, saved_ptep);
+ pmd_huge_tests(pmdp, pmd_aligned, prot);
+ pmd_populate_tests(mm, pmdp, saved_ptep);
+ spin_unlock(ptl);
+
+ ptl = pud_lock(mm, pudp);
+ pud_clear_tests(mm, pudp);
+ pud_advanced_tests(mm, vma, pudp, pud_aligned, vaddr, prot);
+ pud_huge_tests(pudp, pud_aligned, prot);
+ pud_populate_tests(mm, pudp, saved_pmdp);
+ spin_unlock(ptl);
+
+ spin_lock(&mm->page_table_lock);
+ p4d_clear_tests(mm, p4dp);
+ pgd_clear_tests(mm, pgdp);
+ p4d_populate_tests(mm, p4dp, saved_pudp);
+ pgd_populate_tests(mm, pgdp, saved_p4dp);
+ spin_unlock(&mm->page_table_lock);
+
p4d_free(mm, saved_p4dp);
pud_free(mm, saved_pudp);
pmd_free(mm, saved_pmdp);
diff --git a/mm/filemap.c b/mm/filemap.c
index e3b8987153e6..e4101b5bfa82 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -249,7 +249,7 @@ static void page_cache_free_page(struct address_space *mapping,
freepage(page);
if (PageTransHuge(page) && !PageHuge(page)) {
- page_ref_sub(page, HPAGE_PMD_NR);
+ page_ref_sub(page, thp_nr_pages(page));
VM_BUG_ON_PAGE(page_count(page) <= 0, page);
} else {
put_page(page);
@@ -829,13 +829,12 @@ EXPORT_SYMBOL_GPL(replace_page_cache_page);
noinline int __add_to_page_cache_locked(struct page *page,
struct address_space *mapping,
- pgoff_t offset, gfp_t gfp_mask,
+ pgoff_t offset, gfp_t gfp,
void **shadowp)
{
XA_STATE(xas, &mapping->i_pages, offset);
int huge = PageHuge(page);
int error;
- void *old;
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(PageSwapBacked(page), page);
@@ -846,25 +845,46 @@ noinline int __add_to_page_cache_locked(struct page *page,
page->index = offset;
if (!huge) {
- error = mem_cgroup_charge(page, current->mm, gfp_mask);
+ error = mem_cgroup_charge(page, current->mm, gfp);
if (error)
goto error;
}
+ gfp &= GFP_RECLAIM_MASK;
+
do {
+ unsigned int order = xa_get_order(xas.xa, xas.xa_index);
+ void *entry, *old = NULL;
+
+ if (order > thp_order(page))
+ xas_split_alloc(&xas, xa_load(xas.xa, xas.xa_index),
+ order, gfp);
xas_lock_irq(&xas);
- old = xas_load(&xas);
- if (old && !xa_is_value(old))
- xas_set_err(&xas, -EEXIST);
+ xas_for_each_conflict(&xas, entry) {
+ old = entry;
+ if (!xa_is_value(entry)) {
+ xas_set_err(&xas, -EEXIST);
+ goto unlock;
+ }
+ }
+
+ if (old) {
+ if (shadowp)
+ *shadowp = old;
+ /* entry may have been split before we acquired lock */
+ order = xa_get_order(xas.xa, xas.xa_index);
+ if (order > thp_order(page)) {
+ xas_split(&xas, old, order);
+ xas_reset(&xas);
+ }
+ }
+
xas_store(&xas, page);
if (xas_error(&xas))
goto unlock;
- if (xa_is_value(old)) {
+ if (old)
mapping->nrexceptional--;
- if (shadowp)
- *shadowp = old;
- }
mapping->nrpages++;
/* hugetlb pages do not participate in page cache accounting */
@@ -872,7 +892,7 @@ noinline int __add_to_page_cache_locked(struct page *page,
__inc_lruvec_page_state(page, NR_FILE_PAGES);
unlock:
xas_unlock_irq(&xas);
- } while (xas_nomem(&xas, gfp_mask & GFP_RECLAIM_MASK));
+ } while (xas_nomem(&xas, gfp));
if (xas_error(&xas)) {
error = xas_error(&xas);
@@ -1425,7 +1445,7 @@ static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem
* unlock_page - unlock a locked page
* @page: the page
*
- * Unlocks the page and wakes up sleepers in ___wait_on_page_locked().
+ * Unlocks the page and wakes up sleepers in wait_on_page_locked().
* Also wakes sleepers in wait_on_page_writeback() because the wakeup
* mechanism between PageLocked pages and PageWriteback pages is shared.
* But that's OK - sleepers in wait_on_page_writeback() just go back to sleep.
@@ -2179,6 +2199,14 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
last_index = (*ppos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
offset = *ppos & ~PAGE_MASK;
+ /*
+ * If we've already successfully copied some data, then we
+ * can no longer safely return -EIOCBQUEUED. Hence mark
+ * an async read NOWAIT at that point.
+ */
+ if (written && (iocb->ki_flags & IOCB_WAITQ))
+ iocb->ki_flags |= IOCB_NOWAIT;
+
for (;;) {
struct page *page;
pgoff_t end_index;
@@ -2568,8 +2596,8 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
struct file *file = vmf->vma->vm_file;
struct file_ra_state *ra = &file->f_ra;
struct address_space *mapping = file->f_mapping;
+ DEFINE_READAHEAD(ractl, file, mapping, vmf->pgoff);
struct file *fpin = NULL;
- pgoff_t offset = vmf->pgoff;
unsigned int mmap_miss;
/* If we don't want any read-ahead, don't bother */
@@ -2580,8 +2608,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
if (vmf->vma->vm_flags & VM_SEQ_READ) {
fpin = maybe_unlock_mmap_for_io(vmf, fpin);
- page_cache_sync_readahead(mapping, ra, file, offset,
- ra->ra_pages);
+ page_cache_sync_ra(&ractl, ra, ra->ra_pages);
return fpin;
}
@@ -2601,10 +2628,11 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
* mmap read-around
*/
fpin = maybe_unlock_mmap_for_io(vmf, fpin);
- ra->start = max_t(long, 0, offset - ra->ra_pages / 2);
+ ra->start = max_t(long, 0, vmf->pgoff - ra->ra_pages / 2);
ra->size = ra->ra_pages;
ra->async_size = ra->ra_pages / 4;
- ra_submit(ra, mapping, file);
+ ractl._index = ra->start;
+ do_page_cache_ra(&ractl, ra->size, ra->async_size);
return fpin;
}
@@ -2984,7 +3012,7 @@ filler:
goto out;
/*
- * Page is not up to date and may be locked due one of the following
+ * Page is not up to date and may be locked due to one of the following
* case a: Page is being filled and the page lock is held
* case b: Read/write error clearing the page uptodate status
* case c: Truncation in progress (page locked)
diff --git a/mm/gup.c b/mm/gup.c
index ad617e7f22f5..102877ed77a4 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1490,35 +1490,6 @@ int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
mmap_read_unlock(mm);
return ret; /* 0 or negative error code */
}
-
-/**
- * get_dump_page() - pin user page in memory while writing it to core dump
- * @addr: user address
- *
- * Returns struct page pointer of user page pinned for dump,
- * to be freed afterwards by put_page().
- *
- * Returns NULL on any kind of failure - a hole must then be inserted into
- * the corefile, to preserve alignment with its headers; and also returns
- * NULL wherever the ZERO_PAGE, or an anonymous pte_none, has been found -
- * allowing a hole to be left in the corefile to save diskspace.
- *
- * Called without mmap_lock, but after all other threads have been killed.
- */
-#ifdef CONFIG_ELF_CORE
-struct page *get_dump_page(unsigned long addr)
-{
- struct vm_area_struct *vma;
- struct page *page;
-
- if (__get_user_pages(current->mm, addr, 1,
- FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma,
- NULL) < 1)
- return NULL;
- flush_cache_page(vma, addr, page_to_pfn(page));
- return page;
-}
-#endif /* CONFIG_ELF_CORE */
#else /* CONFIG_MMU */
static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start,
unsigned long nr_pages, struct page **pages,
@@ -1564,6 +1535,38 @@ finish_or_fault:
}
#endif /* !CONFIG_MMU */
+/**
+ * get_dump_page() - pin user page in memory while writing it to core dump
+ * @addr: user address
+ *
+ * Returns struct page pointer of user page pinned for dump,
+ * to be freed afterwards by put_page().
+ *
+ * Returns NULL on any kind of failure - a hole must then be inserted into
+ * the corefile, to preserve alignment with its headers; and also returns
+ * NULL wherever the ZERO_PAGE, or an anonymous pte_none, has been found -
+ * allowing a hole to be left in the corefile to save diskspace.
+ *
+ * Called without mmap_lock (takes and releases the mmap_lock by itself).
+ */
+#ifdef CONFIG_ELF_CORE
+struct page *get_dump_page(unsigned long addr)
+{
+ struct mm_struct *mm = current->mm;
+ struct page *page;
+ int locked = 1;
+ int ret;
+
+ if (mmap_read_lock_killable(mm))
+ return NULL;
+ ret = __get_user_pages_locked(mm, addr, 1, &page, NULL, &locked,
+ FOLL_FORCE | FOLL_DUMP | FOLL_GET);
+ if (locked)
+ mmap_read_unlock(mm);
+ return (ret == 1) ? page : NULL;
+}
+#endif /* CONFIG_ELF_CORE */
+
#if defined(CONFIG_FS_DAX) || defined (CONFIG_CMA)
static bool check_dax_vmas(struct vm_area_struct **vmas, long nr_pages)
{
diff --git a/mm/gup_benchmark.c b/mm/gup_benchmark.c
index 464cae1fa3ea..8b3e5b5cd8fa 100644
--- a/mm/gup_benchmark.c
+++ b/mm/gup_benchmark.c
@@ -72,6 +72,8 @@ static int __gup_benchmark_ioctl(unsigned int cmd,
int nr;
struct page **pages;
int ret = 0;
+ bool needs_mmap_lock =
+ cmd != GUP_FAST_BENCHMARK && cmd != PIN_FAST_BENCHMARK;
if (gup->size > ULONG_MAX)
return -EINVAL;
@@ -81,6 +83,11 @@ static int __gup_benchmark_ioctl(unsigned int cmd,
if (!pages)
return -ENOMEM;
+ if (needs_mmap_lock && mmap_read_lock_killable(current->mm)) {
+ ret = -EINTR;
+ goto free_pages;
+ }
+
i = 0;
nr = gup->nr_pages_per_call;
start_time = ktime_get();
@@ -120,9 +127,8 @@ static int __gup_benchmark_ioctl(unsigned int cmd,
pages + i, NULL);
break;
default:
- kvfree(pages);
ret = -EINVAL;
- goto out;
+ goto unlock;
}
if (nr <= 0)
@@ -150,8 +156,11 @@ static int __gup_benchmark_ioctl(unsigned int cmd,
end_time = ktime_get();
gup->put_delta_usec = ktime_us_delta(end_time, start_time);
+unlock:
+ if (needs_mmap_lock)
+ mmap_read_unlock(current->mm);
+free_pages:
kvfree(pages);
-out:
return ret;
}
diff --git a/mm/highmem.c b/mm/highmem.c
index 64d8dea47dd1..1352a27951e3 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -369,7 +369,7 @@ void kunmap_high(struct page *page)
}
EXPORT_SYMBOL(kunmap_high);
-#endif
+#endif /* CONFIG_HIGHMEM */
#if defined(HASHED_PAGE_VIRTUAL)
@@ -481,4 +481,4 @@ void __init page_address_init(void)
}
}
-#endif /* defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) */
+#endif /* defined(HASHED_PAGE_VIRTUAL) */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 65c289c13b58..9474dbc150ed 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2335,13 +2335,13 @@ static void unmap_page(struct page *page)
VM_BUG_ON_PAGE(!unmap_success, page);
}
-static void remap_page(struct page *page)
+static void remap_page(struct page *page, unsigned int nr)
{
int i;
if (PageTransHuge(page)) {
remove_migration_ptes(page, page, true);
} else {
- for (i = 0; i < HPAGE_PMD_NR; i++)
+ for (i = 0; i < nr; i++)
remove_migration_ptes(page + i, page + i, true);
}
}
@@ -2419,6 +2419,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
struct lruvec *lruvec;
struct address_space *swap_cache = NULL;
unsigned long offset = 0;
+ unsigned int nr = thp_nr_pages(head);
int i;
lruvec = mem_cgroup_page_lruvec(head, pgdat);
@@ -2434,7 +2435,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
xa_lock(&swap_cache->i_pages);
}
- for (i = HPAGE_PMD_NR - 1; i >= 1; i--) {
+ for (i = nr - 1; i >= 1; i--) {
__split_huge_page_tail(head, i, lruvec, list);
/* Some pages can be beyond i_size: drop them from page cache */
if (head[i].index >= end) {
@@ -2454,7 +2455,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
ClearPageCompound(head);
- split_page_owner(head, HPAGE_PMD_ORDER);
+ split_page_owner(head, nr);
/* See comment in __split_huge_page_tail() */
if (PageAnon(head)) {
@@ -2473,9 +2474,15 @@ static void __split_huge_page(struct page *page, struct list_head *list,
spin_unlock_irqrestore(&pgdat->lru_lock, flags);
- remap_page(head);
+ remap_page(head, nr);
- for (i = 0; i < HPAGE_PMD_NR; i++) {
+ if (PageSwapCache(head)) {
+ swp_entry_t entry = { .val = page_private(head) };
+
+ split_swap_cluster(entry);
+ }
+
+ for (i = 0; i < nr; i++) {
struct page *subpage = head + i;
if (subpage == page)
continue;
@@ -2494,7 +2501,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
int total_mapcount(struct page *page)
{
- int i, compound, ret;
+ int i, compound, nr, ret;
VM_BUG_ON_PAGE(PageTail(page), page);
@@ -2502,16 +2509,17 @@ int total_mapcount(struct page *page)
return atomic_read(&page->_mapcount) + 1;
compound = compound_mapcount(page);
+ nr = compound_nr(page);
if (PageHuge(page))
return compound;
ret = compound;
- for (i = 0; i < HPAGE_PMD_NR; i++)
+ for (i = 0; i < nr; i++)
ret += atomic_read(&page[i]._mapcount) + 1;
/* File pages has compound_mapcount included in _mapcount */
if (!PageAnon(page))
- return ret - compound * HPAGE_PMD_NR;
+ return ret - compound * nr;
if (PageDoubleMap(page))
- ret -= HPAGE_PMD_NR;
+ ret -= nr;
return ret;
}
@@ -2556,14 +2564,14 @@ int page_trans_huge_mapcount(struct page *page, int *total_mapcount)
page = compound_head(page);
_total_mapcount = ret = 0;
- for (i = 0; i < HPAGE_PMD_NR; i++) {
+ for (i = 0; i < thp_nr_pages(page); i++) {
mapcount = atomic_read(&page[i]._mapcount) + 1;
ret = max(ret, mapcount);
_total_mapcount += mapcount;
}
if (PageDoubleMap(page)) {
ret -= 1;
- _total_mapcount -= HPAGE_PMD_NR;
+ _total_mapcount -= thp_nr_pages(page);
}
mapcount = compound_mapcount(page);
ret += mapcount;
@@ -2580,9 +2588,9 @@ bool can_split_huge_page(struct page *page, int *pextra_pins)
/* Additional pins from page cache */
if (PageAnon(page))
- extra_pins = PageSwapCache(page) ? HPAGE_PMD_NR : 0;
+ extra_pins = PageSwapCache(page) ? thp_nr_pages(page) : 0;
else
- extra_pins = HPAGE_PMD_NR;
+ extra_pins = thp_nr_pages(page);
if (pextra_pins)
*pextra_pins = extra_pins;
return total_mapcount(page) == page_count(page) - extra_pins - 1;
@@ -2709,12 +2717,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
}
__split_huge_page(page, list, end, flags);
- if (PageSwapCache(head)) {
- swp_entry_t entry = { .val = page_private(head) };
-
- ret = split_swap_cluster(entry);
- } else
- ret = 0;
+ ret = 0;
} else {
if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) {
pr_alert("total_mapcount: %u, page_count(): %u\n",
@@ -2728,7 +2731,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
fail: if (mapping)
xa_unlock(&mapping->i_pages);
spin_unlock_irqrestore(&pgdata->lru_lock, flags);
- remap_page(head);
+ remap_page(head, thp_nr_pages(head));
ret = -EBUSY;
}
diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c
index e488876b168a..1ae1ebc2b9b1 100644
--- a/mm/hwpoison-inject.c
+++ b/mm/hwpoison-inject.c
@@ -26,11 +26,6 @@ static int hwpoison_inject(void *data, u64 val)
p = pfn_to_page(pfn);
hpage = compound_head(p);
- /*
- * This implies unable to support free buddy pages.
- */
- if (!get_hwpoison_page(p))
- return 0;
if (!hwpoison_filter_enable)
goto inject;
@@ -40,23 +35,20 @@ static int hwpoison_inject(void *data, u64 val)
* This implies unable to support non-LRU pages.
*/
if (!PageLRU(hpage) && !PageHuge(p))
- goto put_out;
+ return 0;
/*
- * do a racy check with elevated page count, to make sure PG_hwpoison
- * will only be set for the targeted owner (or on a free page).
+ * do a racy check to make sure PG_hwpoison will only be set for
+ * the targeted owner (or on a free page).
* memory_failure() will redo the check reliably inside page lock.
*/
err = hwpoison_filter(hpage);
if (err)
- goto put_out;
+ return 0;
inject:
pr_info("Injecting memory failure at pfn %#lx\n", pfn);
- return memory_failure(pfn, MF_COUNT_INCREASED);
-put_out:
- put_hwpoison_page(p);
- return 0;
+ return memory_failure(pfn, 0);
}
static int hwpoison_unpoison(void *data, u64 val)
diff --git a/mm/internal.h b/mm/internal.h
index a801a4d51f26..c43ccdddb0f6 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -49,20 +49,15 @@ void unmap_page_range(struct mmu_gather *tlb,
unsigned long addr, unsigned long end,
struct zap_details *details);
-void force_page_cache_readahead(struct address_space *, struct file *,
- pgoff_t index, unsigned long nr_to_read);
-void __do_page_cache_readahead(struct address_space *, struct file *,
- pgoff_t index, unsigned long nr_to_read,
+void do_page_cache_ra(struct readahead_control *, unsigned long nr_to_read,
unsigned long lookahead_size);
-
-/*
- * Submit IO for the read-ahead request in file_ra_state.
- */
-static inline void ra_submit(struct file_ra_state *ra,
- struct address_space *mapping, struct file *filp)
+void force_page_cache_ra(struct readahead_control *, struct file_ra_state *,
+ unsigned long nr);
+static inline void force_page_cache_readahead(struct address_space *mapping,
+ struct file *file, pgoff_t index, unsigned long nr_to_read)
{
- __do_page_cache_readahead(mapping, filp,
- ra->start, ra->size, ra->async_size);
+ DEFINE_READAHEAD(ractl, file, mapping, index);
+ force_page_cache_ra(&ractl, &file->f_ra, nr_to_read);
}
struct page *find_get_entry(struct address_space *mapping, pgoff_t index);
@@ -275,16 +270,16 @@ int find_suitable_fallback(struct free_area *area, unsigned int order,
* page from being allocated in parallel and returning garbage as the order.
* If a caller does not hold page_zone(page)->lock, it must guarantee that the
* page cannot be allocated or merged in parallel. Alternatively, it must
- * handle invalid values gracefully, and use page_order_unsafe() below.
+ * handle invalid values gracefully, and use buddy_order_unsafe() below.
*/
-static inline unsigned int page_order(struct page *page)
+static inline unsigned int buddy_order(struct page *page)
{
/* PageBuddy() must be checked by the caller */
return page_private(page);
}
/*
- * Like page_order(), but for callers who cannot afford to hold the zone lock.
+ * Like buddy_order(), but for callers who cannot afford to hold the zone lock.
* PageBuddy() should be checked first by the caller to minimize race window,
* and invalid values must be handled gracefully.
*
@@ -294,7 +289,7 @@ static inline unsigned int page_order(struct page *page)
* times, potentially observing different values in the tests and the actual
* use of the result.
*/
-#define page_order_unsafe(page) READ_ONCE(page_private(page))
+#define buddy_order_unsafe(page) READ_ONCE(page_private(page))
static inline bool is_cow_mapping(vm_flags_t flags)
{
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 58b0d9c502a1..4e3dff13eb70 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -434,7 +434,7 @@ static void insert_to_mm_slots_hash(struct mm_struct *mm,
static inline int khugepaged_test_exit(struct mm_struct *mm)
{
- return atomic_read(&mm->mm_users) == 0 || !mmget_still_valid(mm);
+ return atomic_read(&mm->mm_users) == 0;
}
static bool hugepage_vma_check(struct vm_area_struct *vma,
diff --git a/mm/ksm.c b/mm/ksm.c
index 9afccc36dbd2..0960750bb316 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -81,7 +81,7 @@
* different KSM page copy of that content
*
* Internally, the regular nodes, "dups" and "chains" are represented
- * using the same :c:type:`struct stable_node` structure.
+ * using the same struct stable_node structure.
*
* In addition to the stable tree, KSM uses a second data structure called the
* unstable tree: this tree holds pointers to pages which have been found to
diff --git a/mm/madvise.c b/mm/madvise.c
index 9b065d412e5f..416a56b8e757 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -17,6 +17,8 @@
#include <linux/falloc.h>
#include <linux/fadvise.h>
#include <linux/sched.h>
+#include <linux/sched/mm.h>
+#include <linux/uio.h>
#include <linux/ksm.h>
#include <linux/fs.h>
#include <linux/file.h>
@@ -27,7 +29,6 @@
#include <linux/swapops.h>
#include <linux/shmem_fs.h>
#include <linux/mmu_notifier.h>
-#include <linux/sched/mm.h>
#include <asm/tlb.h>
@@ -258,6 +259,7 @@ static long madvise_willneed(struct vm_area_struct *vma,
struct vm_area_struct **prev,
unsigned long start, unsigned long end)
{
+ struct mm_struct *mm = vma->vm_mm;
struct file *file = vma->vm_file;
loff_t offset;
@@ -294,10 +296,10 @@ static long madvise_willneed(struct vm_area_struct *vma,
get_file(file);
offset = (loff_t)(start - vma->vm_start)
+ ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
- mmap_read_unlock(current->mm);
+ mmap_read_unlock(mm);
vfs_fadvise(file, offset, end - start, POSIX_FADV_WILLNEED);
fput(file);
- mmap_read_lock(current->mm);
+ mmap_read_lock(mm);
return 0;
}
@@ -766,6 +768,8 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
unsigned long start, unsigned long end,
int behavior)
{
+ struct mm_struct *mm = vma->vm_mm;
+
*prev = vma;
if (!can_madv_lru_vma(vma))
return -EINVAL;
@@ -773,8 +777,8 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
if (!userfaultfd_remove(vma, start, end)) {
*prev = NULL; /* mmap_lock has been dropped, prev is stale */
- mmap_read_lock(current->mm);
- vma = find_vma(current->mm, start);
+ mmap_read_lock(mm);
+ vma = find_vma(mm, start);
if (!vma)
return -ENOMEM;
if (start < vma->vm_start) {
@@ -828,6 +832,7 @@ static long madvise_remove(struct vm_area_struct *vma,
loff_t offset;
int error;
struct file *f;
+ struct mm_struct *mm = vma->vm_mm;
*prev = NULL; /* tell sys_madvise we drop mmap_lock */
@@ -855,13 +860,13 @@ static long madvise_remove(struct vm_area_struct *vma,
get_file(f);
if (userfaultfd_remove(vma, start, end)) {
/* mmap_lock was not released by userfaultfd_remove() */
- mmap_read_unlock(current->mm);
+ mmap_read_unlock(mm);
}
error = vfs_fallocate(f,
FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
offset, end - start);
fput(f);
- mmap_read_lock(current->mm);
+ mmap_read_lock(mm);
return error;
}
@@ -872,7 +877,6 @@ static long madvise_remove(struct vm_area_struct *vma,
static int madvise_inject_error(int behavior,
unsigned long start, unsigned long end)
{
- struct page *page;
struct zone *zone;
unsigned long size;
@@ -882,6 +886,7 @@ static int madvise_inject_error(int behavior,
for (; start < end; start += size) {
unsigned long pfn;
+ struct page *page;
int ret;
ret = get_user_pages_fast(start, 1, 0, &page);
@@ -896,32 +901,23 @@ static int madvise_inject_error(int behavior,
*/
size = page_size(compound_head(page));
- if (PageHWPoison(page)) {
- put_page(page);
- continue;
- }
-
if (behavior == MADV_SOFT_OFFLINE) {
pr_info("Soft offlining pfn %#lx at process virtual address %#lx\n",
- pfn, start);
-
+ pfn, start);
ret = soft_offline_page(pfn, MF_COUNT_INCREASED);
- if (ret)
- return ret;
- continue;
+ } else {
+ pr_info("Injecting memory failure for pfn %#lx at process virtual address %#lx\n",
+ pfn, start);
+ /*
+ * Drop the page reference taken by get_user_pages_fast(). In
+ * the absence of MF_COUNT_INCREASED the memory_failure()
+ * routine is responsible for pinning the page to prevent it
+ * from being released back to the page allocator.
+ */
+ put_page(page);
+ ret = memory_failure(pfn, 0);
}
- pr_info("Injecting memory failure for pfn %#lx at process virtual address %#lx\n",
- pfn, start);
-
- /*
- * Drop the page reference taken by get_user_pages_fast(). In
- * the absence of MF_COUNT_INCREASED the memory_failure()
- * routine is responsible for pinning the page to prevent it
- * from being released back to the page allocator.
- */
- put_page(page);
- ret = memory_failure(pfn, 0);
if (ret)
return ret;
}
@@ -993,6 +989,18 @@ madvise_behavior_valid(int behavior)
}
}
+static bool
+process_madvise_behavior_valid(int behavior)
+{
+ switch (behavior) {
+ case MADV_COLD:
+ case MADV_PAGEOUT:
+ return true;
+ default:
+ return false;
+ }
+}
+
/*
* The madvise(2) system call.
*
@@ -1040,6 +1048,11 @@ madvise_behavior_valid(int behavior)
* MADV_DONTDUMP - the application wants to prevent pages in the given range
* from being included in its core dump.
* MADV_DODUMP - cancel MADV_DONTDUMP: no longer exclude from core dump.
+ * MADV_COLD - the application is not expected to use this memory soon,
+ * deactivate pages in this range so that they can be reclaimed
+ * easily if memory pressure hanppens.
+ * MADV_PAGEOUT - the application is not expected to use this memory soon,
+ * page out the pages in this range immediately.
*
* return values:
* zero - success
@@ -1054,7 +1067,7 @@ madvise_behavior_valid(int behavior)
* -EBADF - map exists, but area maps something that isn't a file.
* -EAGAIN - a kernel resource was temporarily unavailable.
*/
-int do_madvise(unsigned long start, size_t len_in, int behavior)
+int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior)
{
unsigned long end, tmp;
struct vm_area_struct *vma, *prev;
@@ -1092,27 +1105,10 @@ int do_madvise(unsigned long start, size_t len_in, int behavior)
write = madvise_need_mmap_write(behavior);
if (write) {
- if (mmap_write_lock_killable(current->mm))
- return -EINTR;
-
- /*
- * We may have stolen the mm from another process
- * that is undergoing core dumping.
- *
- * Right now that's io_ring, in the future it may
- * be remote process management and not "current"
- * at all.
- *
- * We need to fix core dumping to not do this,
- * but for now we have the mmget_still_valid()
- * model.
- */
- if (!mmget_still_valid(current->mm)) {
- mmap_write_unlock(current->mm);
+ if (mmap_write_lock_killable(mm))
return -EINTR;
- }
} else {
- mmap_read_lock(current->mm);
+ mmap_read_lock(mm);
}
/*
@@ -1120,7 +1116,7 @@ int do_madvise(unsigned long start, size_t len_in, int behavior)
* ranges, just ignore them, but return -ENOMEM at the end.
* - different from the way of handling in mlock etc.
*/
- vma = find_vma_prev(current->mm, start, &prev);
+ vma = find_vma_prev(mm, start, &prev);
if (vma && start > vma->vm_start)
prev = vma;
@@ -1157,19 +1153,92 @@ int do_madvise(unsigned long start, size_t len_in, int behavior)
if (prev)
vma = prev->vm_next;
else /* madvise_remove dropped mmap_lock */
- vma = find_vma(current->mm, start);
+ vma = find_vma(mm, start);
}
out:
blk_finish_plug(&plug);
if (write)
- mmap_write_unlock(current->mm);
+ mmap_write_unlock(mm);
else
- mmap_read_unlock(current->mm);
+ mmap_read_unlock(mm);
return error;
}
SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
{
- return do_madvise(start, len_in, behavior);
+ return do_madvise(current->mm, start, len_in, behavior);
+}
+
+SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec,
+ size_t, vlen, int, behavior, unsigned int, flags)
+{
+ ssize_t ret;
+ struct iovec iovstack[UIO_FASTIOV], iovec;
+ struct iovec *iov = iovstack;
+ struct iov_iter iter;
+ struct pid *pid;
+ struct task_struct *task;
+ struct mm_struct *mm;
+ size_t total_len;
+ unsigned int f_flags;
+
+ if (flags != 0) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = import_iovec(READ, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
+ if (ret < 0)
+ goto out;
+
+ pid = pidfd_get_pid(pidfd, &f_flags);
+ if (IS_ERR(pid)) {
+ ret = PTR_ERR(pid);
+ goto free_iov;
+ }
+
+ task = get_pid_task(pid, PIDTYPE_PID);
+ if (!task) {
+ ret = -ESRCH;
+ goto put_pid;
+ }
+
+ if (task->mm != current->mm &&
+ !process_madvise_behavior_valid(behavior)) {
+ ret = -EINVAL;
+ goto release_task;
+ }
+
+ mm = mm_access(task, PTRACE_MODE_ATTACH_FSCREDS);
+ if (IS_ERR_OR_NULL(mm)) {
+ ret = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH;
+ goto release_task;
+ }
+
+ total_len = iov_iter_count(&iter);
+
+ while (iov_iter_count(&iter)) {
+ iovec = iov_iter_iovec(&iter);
+ ret = do_madvise(mm, (unsigned long)iovec.iov_base,
+ iovec.iov_len, behavior);
+ if (ret < 0)
+ break;
+ iov_iter_advance(&iter, iovec.iov_len);
+ }
+
+ if (ret == 0)
+ ret = total_len - iov_iter_count(&iter);
+
+ mmput(mm);
+ return ret;
+
+release_task:
+ put_task_struct(task);
+put_pid:
+ put_pid(pid);
+free_iov:
+ kfree(iov);
+out:
+ return ret;
}
diff --git a/mm/memblock.c b/mm/memblock.c
index 165f40a8a254..b68ee86788af 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -48,12 +48,12 @@
* boot regardless of the possible restrictions and memory hot(un)plug;
* the ``physmem`` type is only available on some architectures.
*
- * Each region is represented by :c:type:`struct memblock_region` that
+ * Each region is represented by struct memblock_region that
* defines the region extents, its attributes and NUMA node id on NUMA
- * systems. Every memory type is described by the :c:type:`struct
- * memblock_type` which contains an array of memory regions along with
+ * systems. Every memory type is described by the struct memblock_type
+ * which contains an array of memory regions along with
* the allocator metadata. The "memory" and "reserved" types are nicely
- * wrapped with :c:type:`struct memblock`. This structure is statically
+ * wrapped with struct memblock. This structure is statically
* initialized at build time. The region arrays are initially sized to
* %INIT_MEMBLOCK_REGIONS for "memory" and %INIT_MEMBLOCK_RESERVED_REGIONS
* for "reserved". The region array for "physmem" is initially sized to
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b98911c88bab..7fe93470f59f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -73,6 +73,9 @@ EXPORT_SYMBOL(memory_cgrp_subsys);
struct mem_cgroup *root_mem_cgroup __read_mostly;
+/* Active memory cgroup to use from an interrupt context */
+DEFINE_PER_CPU(struct mem_cgroup *, int_active_memcg);
+
/* Socket memory accounting disabled? */
static bool cgroup_memory_nosocket;
@@ -1061,23 +1064,56 @@ struct mem_cgroup *get_mem_cgroup_from_page(struct page *page)
}
EXPORT_SYMBOL(get_mem_cgroup_from_page);
-/**
- * If current->active_memcg is non-NULL, do not fallback to current->mm->memcg.
- */
-static __always_inline struct mem_cgroup *get_mem_cgroup_from_current(void)
+static __always_inline struct mem_cgroup *active_memcg(void)
{
- if (unlikely(current->active_memcg)) {
- struct mem_cgroup *memcg;
+ if (in_interrupt())
+ return this_cpu_read(int_active_memcg);
+ else
+ return current->active_memcg;
+}
- rcu_read_lock();
+static __always_inline struct mem_cgroup *get_active_memcg(void)
+{
+ struct mem_cgroup *memcg;
+
+ rcu_read_lock();
+ memcg = active_memcg();
+ if (memcg) {
/* current->active_memcg must hold a ref. */
- if (WARN_ON_ONCE(!css_tryget(&current->active_memcg->css)))
+ if (WARN_ON_ONCE(!css_tryget(&memcg->css)))
memcg = root_mem_cgroup;
else
memcg = current->active_memcg;
- rcu_read_unlock();
- return memcg;
}
+ rcu_read_unlock();
+
+ return memcg;
+}
+
+static __always_inline bool memcg_kmem_bypass(void)
+{
+ /* Allow remote memcg charging from any context. */
+ if (unlikely(active_memcg()))
+ return false;
+
+ /* Memcg to charge can't be determined. */
+ if (in_interrupt() || !current->mm || (current->flags & PF_KTHREAD))
+ return true;
+
+ return false;
+}
+
+/**
+ * If active memcg is set, do not fallback to current->mm->memcg.
+ */
+static __always_inline struct mem_cgroup *get_mem_cgroup_from_current(void)
+{
+ if (memcg_kmem_bypass())
+ return NULL;
+
+ if (unlikely(active_memcg()))
+ return get_active_memcg();
+
return get_mem_cgroup_from_mm(current->mm);
}
@@ -2933,12 +2969,12 @@ __always_inline struct obj_cgroup *get_obj_cgroup_from_current(void)
struct obj_cgroup *objcg = NULL;
struct mem_cgroup *memcg;
- if (unlikely(!current->mm && !current->active_memcg))
+ if (memcg_kmem_bypass())
return NULL;
rcu_read_lock();
- if (unlikely(current->active_memcg))
- memcg = rcu_dereference(current->active_memcg);
+ if (unlikely(active_memcg()))
+ memcg = active_memcg();
else
memcg = mem_cgroup_from_task(current);
@@ -3059,19 +3095,16 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order)
struct mem_cgroup *memcg;
int ret = 0;
- if (memcg_kmem_bypass())
- return 0;
-
memcg = get_mem_cgroup_from_current();
- if (!mem_cgroup_is_root(memcg)) {
+ if (memcg && !mem_cgroup_is_root(memcg)) {
ret = __memcg_kmem_charge(memcg, gfp, 1 << order);
if (!ret) {
page->mem_cgroup = memcg;
__SetPageKmemcg(page);
return 0;
}
+ css_put(&memcg->css);
}
- css_put(&memcg->css);
return ret;
}
@@ -5296,12 +5329,12 @@ static struct cgroup_subsys_state * __ref
mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
{
struct mem_cgroup *parent = mem_cgroup_from_css(parent_css);
- struct mem_cgroup *memcg;
+ struct mem_cgroup *memcg, *old_memcg;
long error = -ENOMEM;
- memalloc_use_memcg(parent);
+ old_memcg = set_active_memcg(parent);
memcg = mem_cgroup_alloc();
- memalloc_unuse_memcg();
+ set_active_memcg(old_memcg);
if (IS_ERR(memcg))
return ERR_CAST(memcg);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 990e3b2e37d5..c0bb186bba62 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -65,6 +65,33 @@ int sysctl_memory_failure_recovery __read_mostly = 1;
atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0);
+static bool page_handle_poison(struct page *page, bool hugepage_or_freepage, bool release)
+{
+ if (hugepage_or_freepage) {
+ /*
+ * Doing this check for free pages is also fine since dissolve_free_huge_page
+ * returns 0 for non-hugetlb pages as well.
+ */
+ if (dissolve_free_huge_page(page) || !take_page_off_buddy(page))
+ /*
+ * We could fail to take off the target page from buddy
+ * for example due to racy page allocaiton, but that's
+ * acceptable because soft-offlined page is not broken
+ * and if someone really want to use it, they should
+ * take it.
+ */
+ return false;
+ }
+
+ SetPageHWPoison(page);
+ if (release)
+ put_page(page);
+ page_ref_inc(page);
+ num_poisoned_pages_inc();
+
+ return true;
+}
+
#if defined(CONFIG_HWPOISON_INJECT) || defined(CONFIG_HWPOISON_INJECT_MODULE)
u32 hwpoison_filter_enable = 0;
@@ -555,6 +582,7 @@ static const char * const action_page_types[] = {
[MF_MSG_BUDDY] = "free buddy page",
[MF_MSG_BUDDY_2ND] = "free buddy page (2nd try)",
[MF_MSG_DAX] = "dax page",
+ [MF_MSG_UNSPLIT_THP] = "unsplit thp",
[MF_MSG_UNKNOWN] = "unknown page",
};
@@ -924,7 +952,7 @@ static int page_action(struct page_state *ps, struct page *p,
* Return: return 0 if failed to grab the refcount, otherwise true (some
* non-zero value.)
*/
-int get_hwpoison_page(struct page *page)
+static int get_hwpoison_page(struct page *page)
{
struct page *head = compound_head(page);
@@ -953,7 +981,6 @@ int get_hwpoison_page(struct page *page)
return 0;
}
-EXPORT_SYMBOL_GPL(get_hwpoison_page);
/*
* Do all that is necessary to remove user space mappings. Unmap
@@ -1103,6 +1130,25 @@ static int identify_page_state(unsigned long pfn, struct page *p,
return page_action(ps, p, pfn);
}
+static int try_to_split_thp_page(struct page *page, const char *msg)
+{
+ lock_page(page);
+ if (!PageAnon(page) || unlikely(split_huge_page(page))) {
+ unsigned long pfn = page_to_pfn(page);
+
+ unlock_page(page);
+ if (!PageAnon(page))
+ pr_info("%s: %#lx: non anonymous thp\n", msg, pfn);
+ else
+ pr_info("%s: %#lx: thp split failed\n", msg, pfn);
+ put_page(page);
+ return -EBUSY;
+ }
+ unlock_page(page);
+
+ return 0;
+}
+
static int memory_failure_hugetlb(unsigned long pfn, int flags)
{
struct page *p = pfn_to_page(pfn);
@@ -1144,7 +1190,7 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
pr_err("Memory failure: %#lx: just unpoisoned\n", pfn);
num_poisoned_pages_dec();
unlock_page(head);
- put_hwpoison_page(head);
+ put_page(head);
return 0;
}
@@ -1325,23 +1371,11 @@ int memory_failure(unsigned long pfn, int flags)
}
if (PageTransHuge(hpage)) {
- lock_page(p);
- if (!PageAnon(p) || unlikely(split_huge_page(p))) {
- unlock_page(p);
- if (!PageAnon(p))
- pr_err("Memory failure: %#lx: non anonymous thp\n",
- pfn);
- else
- pr_err("Memory failure: %#lx: thp split failed\n",
- pfn);
- if (TestClearPageHWPoison(p))
- num_poisoned_pages_dec();
- put_hwpoison_page(p);
+ if (try_to_split_thp_page(p, "Memory Failure") < 0) {
+ action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED);
return -EBUSY;
}
- unlock_page(p);
VM_BUG_ON_PAGE(!page_count(p), p);
- hpage = compound_head(p);
}
/*
@@ -1381,10 +1415,7 @@ int memory_failure(unsigned long pfn, int flags)
* page_remove_rmap() in try_to_unmap_one(). So to determine page status
* correctly, we save a copy of the page flags at this time.
*/
- if (PageHuge(p))
- page_flags = hpage->flags;
- else
- page_flags = p->flags;
+ page_flags = p->flags;
/*
* unpoison always clear PG_hwpoison inside page lock
@@ -1393,14 +1424,14 @@ int memory_failure(unsigned long pfn, int flags)
pr_err("Memory failure: %#lx: just unpoisoned\n", pfn);
num_poisoned_pages_dec();
unlock_page(p);
- put_hwpoison_page(p);
+ put_page(p);
return 0;
}
if (hwpoison_filter(p)) {
if (TestClearPageHWPoison(p))
num_poisoned_pages_dec();
unlock_page(p);
- put_hwpoison_page(p);
+ put_page(p);
return 0;
}
@@ -1416,11 +1447,8 @@ int memory_failure(unsigned long pfn, int flags)
/*
* Now take care of user space mappings.
* Abort on fail: __delete_from_page_cache() assumes unmapped page.
- *
- * When the raw error page is thp tail page, hpage points to the raw
- * page after thp split.
*/
- if (!hwpoison_user_mappings(p, pfn, flags, &hpage)) {
+ if (!hwpoison_user_mappings(p, pfn, flags, &p)) {
action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);
res = -EBUSY;
goto out;
@@ -1637,24 +1665,14 @@ int unpoison_memory(unsigned long pfn)
}
unlock_page(page);
- put_hwpoison_page(page);
+ put_page(page);
if (freeit && !(pfn == my_zero_pfn(0) && page_count(p) == 1))
- put_hwpoison_page(page);
+ put_page(page);
return 0;
}
EXPORT_SYMBOL(unpoison_memory);
-static struct page *new_page(struct page *p, unsigned long private)
-{
- struct migration_target_control mtc = {
- .nid = page_to_nid(p),
- .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
- };
-
- return alloc_migration_target(p, (unsigned long)&mtc);
-}
-
/*
* Safely get reference count of an arbitrary page.
* Returns 0 for a free page, -EIO for a zero refcount page
@@ -1679,6 +1697,9 @@ static int __get_any_page(struct page *p, unsigned long pfn, int flags)
} else if (is_free_buddy_page(p)) {
pr_info("%s: %#lx free buddy page\n", __func__, pfn);
ret = 0;
+ } else if (page_count(p)) {
+ /* raced with allocation */
+ ret = -EBUSY;
} else {
pr_info("%s: %#lx: unknown zero refcount page type %lx\n",
__func__, pfn, p->flags);
@@ -1695,12 +1716,15 @@ static int get_any_page(struct page *page, unsigned long pfn, int flags)
{
int ret = __get_any_page(page, pfn, flags);
+ if (ret == -EBUSY)
+ ret = __get_any_page(page, pfn, flags);
+
if (ret == 1 && !PageHuge(page) &&
!PageLRU(page) && !__PageMovable(page)) {
/*
* Try to free it.
*/
- put_hwpoison_page(page);
+ put_page(page);
shake_page(page, 1);
/*
@@ -1709,7 +1733,7 @@ static int get_any_page(struct page *page, unsigned long pfn, int flags)
ret = __get_any_page(page, pfn, 0);
if (ret == 1 && !PageLRU(page)) {
/* Drop page reference which is from __get_any_page() */
- put_hwpoison_page(page);
+ put_page(page);
pr_info("soft_offline: %#lx: unknown non LRU page type %lx (%pGp)\n",
pfn, page->flags, &page->flags);
return -EIO;
@@ -1718,69 +1742,55 @@ static int get_any_page(struct page *page, unsigned long pfn, int flags)
return ret;
}
-static int soft_offline_huge_page(struct page *page, int flags)
+static bool isolate_page(struct page *page, struct list_head *pagelist)
{
- int ret;
- unsigned long pfn = page_to_pfn(page);
- struct page *hpage = compound_head(page);
- LIST_HEAD(pagelist);
+ bool isolated = false;
+ bool lru = PageLRU(page);
- /*
- * This double-check of PageHWPoison is to avoid the race with
- * memory_failure(). See also comment in __soft_offline_page().
- */
- lock_page(hpage);
- if (PageHWPoison(hpage)) {
- unlock_page(hpage);
- put_hwpoison_page(hpage);
- pr_info("soft offline: %#lx hugepage already poisoned\n", pfn);
- return -EBUSY;
+ if (PageHuge(page)) {
+ isolated = isolate_huge_page(page, pagelist);
+ } else {
+ if (lru)
+ isolated = !isolate_lru_page(page);
+ else
+ isolated = !isolate_movable_page(page, ISOLATE_UNEVICTABLE);
+
+ if (isolated)
+ list_add(&page->lru, pagelist);
}
- unlock_page(hpage);
- ret = isolate_huge_page(hpage, &pagelist);
+ if (isolated && lru)
+ inc_node_page_state(page, NR_ISOLATED_ANON +
+ page_is_file_lru(page));
+
/*
- * get_any_page() and isolate_huge_page() takes a refcount each,
- * so need to drop one here.
+ * If we succeed to isolate the page, we grabbed another refcount on
+ * the page, so we can safely drop the one we got from get_any_pages().
+ * If we failed to isolate the page, it means that we cannot go further
+ * and we will return an error, so drop the reference we got from
+ * get_any_pages() as well.
*/
- put_hwpoison_page(hpage);
- if (!ret) {
- pr_info("soft offline: %#lx hugepage failed to isolate\n", pfn);
- return -EBUSY;
- }
-
- ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
- MIGRATE_SYNC, MR_MEMORY_FAILURE);
- if (ret) {
- pr_info("soft offline: %#lx: hugepage migration failed %d, type %lx (%pGp)\n",
- pfn, ret, page->flags, &page->flags);
- if (!list_empty(&pagelist))
- putback_movable_pages(&pagelist);
- if (ret > 0)
- ret = -EIO;
- } else {
- /*
- * We set PG_hwpoison only when the migration source hugepage
- * was successfully dissolved, because otherwise hwpoisoned
- * hugepage remains on free hugepage list, then userspace will
- * find it as SIGBUS by allocation failure. That's not expected
- * in soft-offlining.
- */
- ret = dissolve_free_huge_page(page);
- if (!ret) {
- if (set_hwpoison_free_buddy_page(page))
- num_poisoned_pages_inc();
- else
- ret = -EBUSY;
- }
- }
- return ret;
+ put_page(page);
+ return isolated;
}
-static int __soft_offline_page(struct page *page, int flags)
+/*
+ * __soft_offline_page handles hugetlb-pages and non-hugetlb pages.
+ * If the page is a non-dirty unmapped page-cache page, it simply invalidates.
+ * If the page is mapped, it migrates the contents over.
+ */
+static int __soft_offline_page(struct page *page)
{
- int ret;
+ int ret = 0;
unsigned long pfn = page_to_pfn(page);
+ struct page *hpage = compound_head(page);
+ char const *msg_page[] = {"page", "hugepage"};
+ bool huge = PageHuge(page);
+ LIST_HEAD(pagelist);
+ struct migration_target_control mtc = {
+ .nid = NUMA_NO_NODE,
+ .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
+ };
/*
* Check PageHWPoison again inside page lock because PageHWPoison
@@ -1789,121 +1799,75 @@ static int __soft_offline_page(struct page *page, int flags)
* so there's no race between soft_offline_page() and memory_failure().
*/
lock_page(page);
- wait_on_page_writeback(page);
+ if (!PageHuge(page))
+ wait_on_page_writeback(page);
if (PageHWPoison(page)) {
unlock_page(page);
- put_hwpoison_page(page);
+ put_page(page);
pr_info("soft offline: %#lx page already poisoned\n", pfn);
- return -EBUSY;
+ return 0;
}
- /*
- * Try to invalidate first. This should work for
- * non dirty unmapped page cache pages.
- */
- ret = invalidate_inode_page(page);
+
+ if (!PageHuge(page))
+ /*
+ * Try to invalidate first. This should work for
+ * non dirty unmapped page cache pages.
+ */
+ ret = invalidate_inode_page(page);
unlock_page(page);
+
/*
* RED-PEN would be better to keep it isolated here, but we
* would need to fix isolation locking first.
*/
- if (ret == 1) {
- put_hwpoison_page(page);
+ if (ret) {
pr_info("soft_offline: %#lx: invalidated\n", pfn);
- SetPageHWPoison(page);
- num_poisoned_pages_inc();
+ page_handle_poison(page, false, true);
return 0;
}
- /*
- * Simple invalidation didn't work.
- * Try to migrate to a new page instead. migrate.c
- * handles a large number of cases for us.
- */
- if (PageLRU(page))
- ret = isolate_lru_page(page);
- else
- ret = isolate_movable_page(page, ISOLATE_UNEVICTABLE);
- /*
- * Drop page reference which is came from get_any_page()
- * successful isolate_lru_page() already took another one.
- */
- put_hwpoison_page(page);
- if (!ret) {
- LIST_HEAD(pagelist);
- /*
- * After isolated lru page, the PageLRU will be cleared,
- * so use !__PageMovable instead for LRU page's mapping
- * cannot have PAGE_MAPPING_MOVABLE.
- */
- if (!__PageMovable(page))
- inc_node_page_state(page, NR_ISOLATED_ANON +
- page_is_file_lru(page));
- list_add(&page->lru, &pagelist);
- ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
- MIGRATE_SYNC, MR_MEMORY_FAILURE);
- if (ret) {
+ if (isolate_page(hpage, &pagelist)) {
+ ret = migrate_pages(&pagelist, alloc_migration_target, NULL,
+ (unsigned long)&mtc, MIGRATE_SYNC, MR_MEMORY_FAILURE);
+ if (!ret) {
+ bool release = !huge;
+
+ if (!page_handle_poison(page, huge, release))
+ ret = -EBUSY;
+ } else {
if (!list_empty(&pagelist))
putback_movable_pages(&pagelist);
- pr_info("soft offline: %#lx: migration failed %d, type %lx (%pGp)\n",
- pfn, ret, page->flags, &page->flags);
+ pr_info("soft offline: %#lx: %s migration failed %d, type %lx (%pGp)\n",
+ pfn, msg_page[huge], ret, page->flags, &page->flags);
if (ret > 0)
ret = -EIO;
}
} else {
- pr_info("soft offline: %#lx: isolation failed: %d, page count %d, type %lx (%pGp)\n",
- pfn, ret, page_count(page), page->flags, &page->flags);
+ pr_info("soft offline: %#lx: %s isolation failed: %d, page count %d, type %lx (%pGp)\n",
+ pfn, msg_page[huge], ret, page_count(page), page->flags, &page->flags);
+ ret = -EBUSY;
}
return ret;
}
-static int soft_offline_in_use_page(struct page *page, int flags)
+static int soft_offline_in_use_page(struct page *page)
{
- int ret;
- int mt;
struct page *hpage = compound_head(page);
- if (!PageHuge(page) && PageTransHuge(hpage)) {
- lock_page(page);
- if (!PageAnon(page) || unlikely(split_huge_page(page))) {
- unlock_page(page);
- if (!PageAnon(page))
- pr_info("soft offline: %#lx: non anonymous thp\n", page_to_pfn(page));
- else
- pr_info("soft offline: %#lx: thp split failed\n", page_to_pfn(page));
- put_hwpoison_page(page);
+ if (!PageHuge(page) && PageTransHuge(hpage))
+ if (try_to_split_thp_page(page, "soft offline") < 0)
return -EBUSY;
- }
- unlock_page(page);
- }
-
- /*
- * Setting MIGRATE_ISOLATE here ensures that the page will be linked
- * to free list immediately (not via pcplist) when released after
- * successful page migration. Otherwise we can't guarantee that the
- * page is really free after put_page() returns, so
- * set_hwpoison_free_buddy_page() highly likely fails.
- */
- mt = get_pageblock_migratetype(page);
- set_pageblock_migratetype(page, MIGRATE_ISOLATE);
- if (PageHuge(page))
- ret = soft_offline_huge_page(page, flags);
- else
- ret = __soft_offline_page(page, flags);
- set_pageblock_migratetype(page, mt);
- return ret;
+ return __soft_offline_page(page);
}
static int soft_offline_free_page(struct page *page)
{
- int rc = dissolve_free_huge_page(page);
+ int rc = 0;
+
+ if (!page_handle_poison(page, true, false))
+ rc = -EBUSY;
- if (!rc) {
- if (set_hwpoison_free_buddy_page(page))
- num_poisoned_pages_inc();
- else
- rc = -EBUSY;
- }
return rc;
}
@@ -1933,6 +1897,7 @@ int soft_offline_page(unsigned long pfn, int flags)
{
int ret;
struct page *page;
+ bool try_again = true;
if (!pfn_valid(pfn))
return -ENXIO;
@@ -1944,18 +1909,22 @@ int soft_offline_page(unsigned long pfn, int flags)
if (PageHWPoison(page)) {
pr_info("soft offline: %#lx page already poisoned\n", pfn);
if (flags & MF_COUNT_INCREASED)
- put_hwpoison_page(page);
- return -EBUSY;
+ put_page(page);
+ return 0;
}
+retry:
get_online_mems();
ret = get_any_page(page, pfn, flags);
put_online_mems();
if (ret > 0)
- ret = soft_offline_in_use_page(page, flags);
+ ret = soft_offline_in_use_page(page);
else if (ret == 0)
- ret = soft_offline_free_page(page);
+ if (soft_offline_free_page(page) && try_again) {
+ try_again = false;
+ goto retry;
+ }
return ret;
}
diff --git a/mm/memory.c b/mm/memory.c
index 2afb01ea1307..c48f8df6e502 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2391,13 +2391,15 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
arch_enter_lazy_mmu_mode();
- do {
- if (create || !pte_none(*pte)) {
- err = fn(pte++, addr, data);
- if (err)
- break;
- }
- } while (addr += PAGE_SIZE, addr != end);
+ if (fn) {
+ do {
+ if (create || !pte_none(*pte)) {
+ err = fn(pte++, addr, data);
+ if (err)
+ break;
+ }
+ } while (addr += PAGE_SIZE, addr != end);
+ }
*mask |= PGTBL_PTE_MODIFIED;
arch_leave_lazy_mmu_mode();
@@ -3709,13 +3711,14 @@ static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
pmd_t entry;
int i;
- vm_fault_t ret;
+ vm_fault_t ret = VM_FAULT_FALLBACK;
if (!transhuge_vma_suitable(vma, haddr))
- return VM_FAULT_FALLBACK;
+ return ret;
- ret = VM_FAULT_FALLBACK;
page = compound_head(page);
+ if (compound_order(page) != HPAGE_PMD_ORDER)
+ return ret;
/*
* Archs like ppc64 need additonal space to store information
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 8e9e2d44cdad..b44d4c7ba73b 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -105,7 +105,7 @@ static struct resource *register_memory_resource(u64 start, u64 size,
unsigned long flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
if (strcmp(resource_name, "System RAM"))
- flags |= IORESOURCE_MEM_DRIVER_MANAGED;
+ flags |= IORESOURCE_SYSRAM_DRIVER_MANAGED;
/*
* Make sure value parsed from 'mem=' only restricts memory adding
@@ -625,31 +625,22 @@ void generic_online_page(struct page *page, unsigned int order)
}
EXPORT_SYMBOL_GPL(generic_online_page);
-static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,
- void *arg)
+static void online_pages_range(unsigned long start_pfn, unsigned long nr_pages)
{
const unsigned long end_pfn = start_pfn + nr_pages;
unsigned long pfn;
- int order;
/*
- * Online the pages. The callback might decide to keep some pages
- * PG_reserved (to add them to the buddy later), but we still account
- * them as being online/belonging to this zone ("present").
+ * Online the pages in MAX_ORDER - 1 aligned chunks. The callback might
+ * decide to not expose all pages to the buddy (e.g., expose them
+ * later). We account all pages as being online and belonging to this
+ * zone ("present").
*/
- for (pfn = start_pfn; pfn < end_pfn; pfn += 1ul << order) {
- order = min(MAX_ORDER - 1, get_order(PFN_PHYS(end_pfn - pfn)));
- /* __free_pages_core() wants pfns to be aligned to the order */
- if (WARN_ON_ONCE(!IS_ALIGNED(pfn, 1ul << order)))
- order = 0;
- (*online_page_callback)(pfn_to_page(pfn), order);
- }
+ for (pfn = start_pfn; pfn < end_pfn; pfn += MAX_ORDER_NR_PAGES)
+ (*online_page_callback)(pfn_to_page(pfn), MAX_ORDER - 1);
/* mark all involved sections as online */
online_mem_sections(start_pfn, end_pfn);
-
- *(unsigned long *)arg += nr_pages;
- return 0;
}
/* check which state of node_states will be changed when online memory */
@@ -710,9 +701,14 @@ static void __meminit resize_pgdat_range(struct pglist_data *pgdat, unsigned lon
* Associate the pfn range with the given zone, initializing the memmaps
* and resizing the pgdat/zone data to span the added pages. After this
* call, all affected pages are PG_reserved.
+ *
+ * All aligned pageblocks are initialized to the specified migratetype
+ * (usually MIGRATE_MOVABLE). Besides setting the migratetype, no related
+ * zone stats (e.g., nr_isolate_pageblock) are touched.
*/
void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
- unsigned long nr_pages, struct vmem_altmap *altmap)
+ unsigned long nr_pages,
+ struct vmem_altmap *altmap, int migratetype)
{
struct pglist_data *pgdat = zone->zone_pgdat;
int nid = pgdat->node_id;
@@ -737,7 +733,7 @@ void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
* are reserved so nobody should be touching them so we should be safe
*/
memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn,
- MEMINIT_HOTPLUG, altmap);
+ MEMINIT_HOTPLUG, altmap, migratetype);
set_zone_contiguous(zone);
}
@@ -803,17 +799,21 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
int online_type, int nid)
{
unsigned long flags;
- unsigned long onlined_pages = 0;
struct zone *zone;
int need_zonelists_rebuild = 0;
int ret;
struct memory_notify arg;
+ /* We can only online full sections (e.g., SECTION_IS_ONLINE) */
+ if (WARN_ON_ONCE(!nr_pages ||
+ !IS_ALIGNED(pfn | nr_pages, PAGES_PER_SECTION)))
+ return -EINVAL;
+
mem_hotplug_begin();
/* associate pfn range with the zone */
zone = zone_for_pfn_range(online_type, nid, pfn, nr_pages);
- move_pfn_range_to_zone(zone, pfn, nr_pages, NULL);
+ move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_ISOLATE);
arg.start_pfn = pfn;
arg.nr_pages = nr_pages;
@@ -825,6 +825,14 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
goto failed_addition;
/*
+ * Fixup the number of isolated pageblocks before marking the sections
+ * onlining, such that undo_isolate_page_range() works correctly.
+ */
+ spin_lock_irqsave(&zone->lock, flags);
+ zone->nr_isolate_pageblock += nr_pages / pageblock_nr_pages;
+ spin_unlock_irqrestore(&zone->lock, flags);
+
+ /*
* If this zone is not populated, then it is not in zonelist.
* This means the page allocator ignores this zone.
* So, zonelist must be updated after online.
@@ -834,36 +842,29 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
setup_zone_pageset(zone);
}
- ret = walk_system_ram_range(pfn, nr_pages, &onlined_pages,
- online_pages_range);
- if (ret) {
- /* not a single memory resource was applicable */
- if (need_zonelists_rebuild)
- zone_pcp_reset(zone);
- goto failed_addition;
- }
-
- zone->present_pages += onlined_pages;
+ online_pages_range(pfn, nr_pages);
+ zone->present_pages += nr_pages;
pgdat_resize_lock(zone->zone_pgdat, &flags);
- zone->zone_pgdat->node_present_pages += onlined_pages;
+ zone->zone_pgdat->node_present_pages += nr_pages;
pgdat_resize_unlock(zone->zone_pgdat, &flags);
- /*
- * When exposing larger, physically contiguous memory areas to the
- * buddy, shuffling in the buddy (when freeing onlined pages, putting
- * them either to the head or the tail of the freelist) is only helpful
- * for maintaining the shuffle, but not for creating the initial
- * shuffle. Shuffle the whole zone to make sure the just onlined pages
- * are properly distributed across the whole freelist.
- */
- shuffle_zone(zone);
-
node_states_set_node(nid, &arg);
if (need_zonelists_rebuild)
build_all_zonelists(NULL);
zone_pcp_update(zone);
+ /* Basic onlining is complete, allow allocation of onlined pages. */
+ undo_isolate_page_range(pfn, pfn + nr_pages, MIGRATE_MOVABLE);
+
+ /*
+ * Freshly onlined pages aren't shuffled (e.g., all pages are placed to
+ * the tail of the freelist when undoing isolation). Shuffle the whole
+ * zone to make sure the just onlined pages are properly distributed
+ * across the whole freelist - to create an initial shuffle.
+ */
+ shuffle_zone(zone);
+
init_per_zone_wmark_min();
kswapd_run(nid);
@@ -1035,7 +1036,7 @@ static int online_memory_block(struct memory_block *mem, void *arg)
*
* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG
*/
-int __ref add_memory_resource(int nid, struct resource *res)
+int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
{
struct mhp_params params = { .pgprot = PAGE_KERNEL };
u64 start, size;
@@ -1088,9 +1089,8 @@ int __ref add_memory_resource(int nid, struct resource *res)
}
/* link memory sections under this node.*/
- ret = link_mem_sections(nid, PFN_DOWN(start), PFN_UP(start + size - 1),
- MEMINIT_HOTPLUG);
- BUG_ON(ret);
+ link_mem_sections(nid, PFN_DOWN(start), PFN_UP(start + size - 1),
+ MEMINIT_HOTPLUG);
/* create new memmap entry */
if (!strcmp(res->name, "System RAM"))
@@ -1099,6 +1099,13 @@ int __ref add_memory_resource(int nid, struct resource *res)
/* device_online() will take the lock when calling online_pages() */
mem_hotplug_done();
+ /*
+ * In case we're allowed to merge the resource, flag it and trigger
+ * merging now that adding succeeded.
+ */
+ if (mhp_flags & MEMHP_MERGE_RESOURCE)
+ merge_system_ram_resource(res);
+
/* online pages if requested */
if (memhp_default_online_type != MMOP_OFFLINE)
walk_memory_blocks(start, size, NULL, online_memory_block);
@@ -1115,7 +1122,7 @@ error:
}
/* requires device_hotplug_lock, see add_memory_resource() */
-int __ref __add_memory(int nid, u64 start, u64 size)
+int __ref __add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags)
{
struct resource *res;
int ret;
@@ -1124,18 +1131,18 @@ int __ref __add_memory(int nid, u64 start, u64 size)
if (IS_ERR(res))
return PTR_ERR(res);
- ret = add_memory_resource(nid, res);
+ ret = add_memory_resource(nid, res, mhp_flags);
if (ret < 0)
release_memory_resource(res);
return ret;
}
-int add_memory(int nid, u64 start, u64 size)
+int add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags)
{
int rc;
lock_device_hotplug();
- rc = __add_memory(nid, start, size);
+ rc = __add_memory(nid, start, size, mhp_flags);
unlock_device_hotplug();
return rc;
@@ -1157,14 +1164,14 @@ EXPORT_SYMBOL_GPL(add_memory);
*
* For this memory, no entries in /sys/firmware/memmap ("raw firmware-provided
* memory map") are created. Also, the created memory resource is flagged
- * with IORESOURCE_MEM_DRIVER_MANAGED, so in-kernel users can special-case
+ * with IORESOURCE_SYSRAM_DRIVER_MANAGED, so in-kernel users can special-case
* this memory as well (esp., not place kexec images onto it).
*
* The resource_name (visible via /proc/iomem) has to have the format
* "System RAM ($DRIVER)".
*/
int add_memory_driver_managed(int nid, u64 start, u64 size,
- const char *resource_name)
+ const char *resource_name, mhp_t mhp_flags)
{
struct resource *res;
int rc;
@@ -1182,7 +1189,7 @@ int add_memory_driver_managed(int nid, u64 start, u64 size,
goto out_unlock;
}
- rc = add_memory_resource(nid, res);
+ rc = add_memory_resource(nid, res, mhp_flags);
if (rc < 0)
release_memory_resource(res);
@@ -1283,27 +1290,6 @@ found:
return 0;
}
-static struct page *new_node_page(struct page *page, unsigned long private)
-{
- nodemask_t nmask = node_states[N_MEMORY];
- struct migration_target_control mtc = {
- .nid = page_to_nid(page),
- .nmask = &nmask,
- .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
- };
-
- /*
- * try to allocate from a different node but reuse this node if there
- * are no other online nodes to be used (e.g. we are offlining a part
- * of the only existing node)
- */
- node_clear(mtc.nid, nmask);
- if (nodes_empty(nmask))
- node_set(mtc.nid, nmask);
-
- return alloc_migration_target(page, (unsigned long)&mtc);
-}
-
static int
do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
{
@@ -1363,9 +1349,28 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
put_page(page);
}
if (!list_empty(&source)) {
- /* Allocate a new page from the nearest neighbor node */
- ret = migrate_pages(&source, new_node_page, NULL, 0,
- MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
+ nodemask_t nmask = node_states[N_MEMORY];
+ struct migration_target_control mtc = {
+ .nmask = &nmask,
+ .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
+ };
+
+ /*
+ * We have checked that migration range is on a single zone so
+ * we can use the nid of the first page to all the others.
+ */
+ mtc.nid = page_to_nid(list_first_entry(&source, struct page, lru));
+
+ /*
+ * try to allocate from a different node but reuse this node
+ * if there are no other online nodes to be used (e.g. we are
+ * offlining a part of the only existing node)
+ */
+ node_clear(mtc.nid, nmask);
+ if (nodes_empty(nmask))
+ node_set(mtc.nid, nmask);
+ ret = migrate_pages(&source, alloc_migration_target, NULL,
+ (unsigned long)&mtc, MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
if (ret) {
list_for_each_entry(page, &source, lru) {
pr_warn("migrating pfn %lx failed ret:%d ",
@@ -1379,28 +1384,6 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
return ret;
}
-/* Mark all sections offline and remove all free pages from the buddy. */
-static int
-offline_isolated_pages_cb(unsigned long start, unsigned long nr_pages,
- void *data)
-{
- unsigned long *offlined_pages = (unsigned long *)data;
-
- *offlined_pages += __offline_isolated_pages(start, start + nr_pages);
- return 0;
-}
-
-/*
- * Check all pages in range, recorded as memory resource, are isolated.
- */
-static int
-check_pages_isolated_cb(unsigned long start_pfn, unsigned long nr_pages,
- void *data)
-{
- return test_pages_isolated(start_pfn, start_pfn + nr_pages,
- MEMORY_OFFLINE);
-}
-
static int __init cmdline_parse_movable_node(char *p)
{
movable_node_enabled = true;
@@ -1484,17 +1467,21 @@ static int count_system_ram_pages_cb(unsigned long start_pfn,
return 0;
}
-static int __ref __offline_pages(unsigned long start_pfn,
- unsigned long end_pfn)
+int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages)
{
- unsigned long pfn, nr_pages = 0;
- unsigned long offlined_pages = 0;
- int ret, node, nr_isolate_pageblock;
+ const unsigned long end_pfn = start_pfn + nr_pages;
+ unsigned long pfn, system_ram_pages = 0;
unsigned long flags;
struct zone *zone;
struct memory_notify arg;
+ int ret, node;
char *reason;
+ /* We can only offline full sections (e.g., SECTION_IS_ONLINE) */
+ if (WARN_ON_ONCE(!nr_pages ||
+ !IS_ALIGNED(start_pfn | nr_pages, PAGES_PER_SECTION)))
+ return -EINVAL;
+
mem_hotplug_begin();
/*
@@ -1505,9 +1492,9 @@ static int __ref __offline_pages(unsigned long start_pfn,
* memory holes PG_reserved, don't need pfn_valid() checks, and can
* avoid using walk_system_ram_range() later.
*/
- walk_system_ram_range(start_pfn, end_pfn - start_pfn, &nr_pages,
+ walk_system_ram_range(start_pfn, nr_pages, &system_ram_pages,
count_system_ram_pages_cb);
- if (nr_pages != end_pfn - start_pfn) {
+ if (system_ram_pages != nr_pages) {
ret = -EINVAL;
reason = "memory holes";
goto failed_removal;
@@ -1527,11 +1514,10 @@ static int __ref __offline_pages(unsigned long start_pfn,
ret = start_isolate_page_range(start_pfn, end_pfn,
MIGRATE_MOVABLE,
MEMORY_OFFLINE | REPORT_FAILURE);
- if (ret < 0) {
+ if (ret) {
reason = "failure to isolate range";
goto failed_removal;
}
- nr_isolate_pageblock = ret;
arg.start_pfn = start_pfn;
arg.nr_pages = nr_pages;
@@ -1581,9 +1567,7 @@ static int __ref __offline_pages(unsigned long start_pfn,
reason = "failure to dissolve huge pages";
goto failed_removal_isolated;
}
- /* check again */
- ret = walk_system_ram_range(start_pfn, end_pfn - start_pfn,
- NULL, check_pages_isolated_cb);
+
/*
* per-cpu pages are drained in start_isolate_page_range, but if
* there are still pages that are not free, make sure that we
@@ -1596,30 +1580,30 @@ static int __ref __offline_pages(unsigned long start_pfn,
* because has_unmovable_pages explicitly checks for
* PageBuddy on freed pages on other zones.
*/
+ ret = test_pages_isolated(start_pfn, end_pfn, MEMORY_OFFLINE);
if (ret)
drain_all_pages(zone);
} while (ret);
- /* Ok, all of our target is isolated.
- We cannot do rollback at this point. */
- walk_system_ram_range(start_pfn, end_pfn - start_pfn,
- &offlined_pages, offline_isolated_pages_cb);
- pr_info("Offlined Pages %ld\n", offlined_pages);
+ /* Mark all sections offline and remove free pages from the buddy. */
+ __offline_isolated_pages(start_pfn, end_pfn);
+ pr_info("Offlined Pages %ld\n", nr_pages);
+
/*
- * Onlining will reset pagetype flags and makes migrate type
- * MOVABLE, so just need to decrease the number of isolated
- * pageblocks zone counter here.
+ * The memory sections are marked offline, and the pageblock flags
+ * effectively stale; nobody should be touching them. Fixup the number
+ * of isolated pageblocks, memory onlining will properly revert this.
*/
spin_lock_irqsave(&zone->lock, flags);
- zone->nr_isolate_pageblock -= nr_isolate_pageblock;
+ zone->nr_isolate_pageblock -= nr_pages / pageblock_nr_pages;
spin_unlock_irqrestore(&zone->lock, flags);
/* removal success */
- adjust_managed_page_count(pfn_to_page(start_pfn), -offlined_pages);
- zone->present_pages -= offlined_pages;
+ adjust_managed_page_count(pfn_to_page(start_pfn), -nr_pages);
+ zone->present_pages -= nr_pages;
pgdat_resize_lock(zone->zone_pgdat, &flags);
- zone->zone_pgdat->node_present_pages -= offlined_pages;
+ zone->zone_pgdat->node_present_pages -= nr_pages;
pgdat_resize_unlock(zone->zone_pgdat, &flags);
init_per_zone_wmark_min();
@@ -1656,11 +1640,6 @@ failed_removal:
return ret;
}
-int offline_pages(unsigned long start_pfn, unsigned long nr_pages)
-{
- return __offline_pages(start_pfn, start_pfn + nr_pages);
-}
-
static int check_memblock_offlined_cb(struct memory_block *mem, void *arg)
{
int ret = !is_memblock_offlined(mem);
@@ -1749,26 +1728,6 @@ void try_offline_node(int nid)
}
EXPORT_SYMBOL(try_offline_node);
-static void __release_memory_resource(resource_size_t start,
- resource_size_t size)
-{
- int ret;
-
- /*
- * When removing memory in the same granularity as it was added,
- * this function never fails. It might only fail if resources
- * have to be adjusted or split. We'll ignore the error, as
- * removing of memory cannot fail.
- */
- ret = release_mem_region_adjustable(&iomem_resource, start, size);
- if (ret) {
- resource_size_t endres = start + size - 1;
-
- pr_warn("Unable to release resource <%pa-%pa> (%d)\n",
- &start, &endres, ret);
- }
-}
-
static int __ref try_remove_memory(int nid, u64 start, u64 size)
{
int rc = 0;
@@ -1802,7 +1761,7 @@ static int __ref try_remove_memory(int nid, u64 start, u64 size)
memblock_remove(start, size);
}
- __release_memory_resource(start, size);
+ release_mem_region_adjustable(start, size);
try_offline_node(nid);
diff --git a/mm/memremap.c b/mm/memremap.c
index 198083453182..73a206d0f645 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -266,7 +266,8 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params,
zone = &NODE_DATA(nid)->node_zones[ZONE_DEVICE];
move_pfn_range_to_zone(zone, PHYS_PFN(range->start),
- PHYS_PFN(range_len(range)), params->altmap);
+ PHYS_PFN(range_len(range)), params->altmap,
+ MIGRATE_MOVABLE);
}
mem_hotplug_done();
diff --git a/mm/migrate.c b/mm/migrate.c
index f94d7c7eeddf..5ca5842df5db 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1223,16 +1223,11 @@ out:
* we want to retry.
*/
if (rc == MIGRATEPAGE_SUCCESS) {
- put_page(page);
- if (reason == MR_MEMORY_FAILURE) {
+ if (reason != MR_MEMORY_FAILURE)
/*
- * Set PG_HWPoison on just freed page
- * intentionally. Although it's rather weird,
- * it's how HWPoison flag works at the moment.
+ * We release the page in page_handle_poison.
*/
- if (set_hwpoison_free_buddy_page(page))
- num_poisoned_pages_inc();
- }
+ put_page(page);
} else {
if (rc != -EAGAIN) {
if (likely(!__PageMovable(page))) {
@@ -1869,33 +1864,27 @@ static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
return nr_pages ? -EFAULT : 0;
}
-/*
- * Move a list of pages in the address space of the currently executing
- * process.
- */
-static int kernel_move_pages(pid_t pid, unsigned long nr_pages,
- const void __user * __user *pages,
- const int __user *nodes,
- int __user *status, int flags)
+static struct mm_struct *find_mm_struct(pid_t pid, nodemask_t *mem_nodes)
{
struct task_struct *task;
struct mm_struct *mm;
- int err;
- nodemask_t task_nodes;
- /* Check flags */
- if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
- return -EINVAL;
-
- if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))
- return -EPERM;
+ /*
+ * There is no need to check if current process has the right to modify
+ * the specified process when they are same.
+ */
+ if (!pid) {
+ mmget(current->mm);
+ *mem_nodes = cpuset_mems_allowed(current);
+ return current->mm;
+ }
/* Find the mm_struct */
rcu_read_lock();
- task = pid ? find_task_by_vpid(pid) : current;
+ task = find_task_by_vpid(pid);
if (!task) {
rcu_read_unlock();
- return -ESRCH;
+ return ERR_PTR(-ESRCH);
}
get_task_struct(task);
@@ -1905,22 +1894,47 @@ static int kernel_move_pages(pid_t pid, unsigned long nr_pages,
*/
if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) {
rcu_read_unlock();
- err = -EPERM;
+ mm = ERR_PTR(-EPERM);
goto out;
}
rcu_read_unlock();
- err = security_task_movememory(task);
- if (err)
+ mm = ERR_PTR(security_task_movememory(task));
+ if (IS_ERR(mm))
goto out;
-
- task_nodes = cpuset_mems_allowed(task);
+ *mem_nodes = cpuset_mems_allowed(task);
mm = get_task_mm(task);
+out:
put_task_struct(task);
-
if (!mm)
+ mm = ERR_PTR(-EINVAL);
+ return mm;
+}
+
+/*
+ * Move a list of pages in the address space of the currently executing
+ * process.
+ */
+static int kernel_move_pages(pid_t pid, unsigned long nr_pages,
+ const void __user * __user *pages,
+ const int __user *nodes,
+ int __user *status, int flags)
+{
+ struct mm_struct *mm;
+ int err;
+ nodemask_t task_nodes;
+
+ /* Check flags */
+ if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
return -EINVAL;
+ if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))
+ return -EPERM;
+
+ mm = find_mm_struct(pid, &task_nodes);
+ if (IS_ERR(mm))
+ return PTR_ERR(mm);
+
if (nodes)
err = do_pages_move(mm, task_nodes, nr_pages, pages,
nodes, status, flags);
@@ -1929,10 +1943,6 @@ static int kernel_move_pages(pid_t pid, unsigned long nr_pages,
mmput(mm);
return err;
-
-out:
- put_task_struct(task);
- return err;
}
SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
diff --git a/mm/mmap.c b/mm/mmap.c
index 67d11ad6df24..d91ecb00d38c 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -558,6 +558,50 @@ static int find_vma_links(struct mm_struct *mm, unsigned long addr,
return 0;
}
+/*
+ * vma_next() - Get the next VMA.
+ * @mm: The mm_struct.
+ * @vma: The current vma.
+ *
+ * If @vma is NULL, return the first vma in the mm.
+ *
+ * Returns: The next VMA after @vma.
+ */
+static inline struct vm_area_struct *vma_next(struct mm_struct *mm,
+ struct vm_area_struct *vma)
+{
+ if (!vma)
+ return mm->mmap;
+
+ return vma->vm_next;
+}
+
+/*
+ * munmap_vma_range() - munmap VMAs that overlap a range.
+ * @mm: The mm struct
+ * @start: The start of the range.
+ * @len: The length of the range.
+ * @pprev: pointer to the pointer that will be set to previous vm_area_struct
+ * @rb_link: the rb_node
+ * @rb_parent: the parent rb_node
+ *
+ * Find all the vm_area_struct that overlap from @start to
+ * @end and munmap them. Set @pprev to the previous vm_area_struct.
+ *
+ * Returns: -ENOMEM on munmap failure or 0 on success.
+ */
+static inline int
+munmap_vma_range(struct mm_struct *mm, unsigned long start, unsigned long len,
+ struct vm_area_struct **pprev, struct rb_node ***link,
+ struct rb_node **parent, struct list_head *uf)
+{
+
+ while (find_vma_links(mm, start, start + len, pprev, link, parent))
+ if (do_munmap(mm, start, len, uf))
+ return -ENOMEM;
+
+ return 0;
+}
static unsigned long count_vma_pages_range(struct mm_struct *mm,
unsigned long addr, unsigned long end)
{
@@ -619,7 +663,7 @@ static void __vma_link_file(struct vm_area_struct *vma)
struct address_space *mapping = file->f_mapping;
if (vma->vm_flags & VM_DENYWRITE)
- atomic_dec(&file_inode(file)->i_writecount);
+ put_write_access(file_inode(file));
if (vma->vm_flags & VM_SHARED)
mapping_allow_writable(mapping);
@@ -1128,10 +1172,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
if (vm_flags & VM_SPECIAL)
return NULL;
- if (prev)
- next = prev->vm_next;
- else
- next = mm->mmap;
+ next = vma_next(mm, prev);
area = next;
if (area && area->vm_end == end) /* cases 6, 7, 8 */
next = next->vm_next;
@@ -1707,13 +1748,9 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
return -ENOMEM;
}
- /* Clear old maps */
- while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
- &rb_parent)) {
- if (do_munmap(mm, addr, len, uf))
- return -ENOMEM;
- }
-
+ /* Clear old maps, set up prev, rb_link, rb_parent, and uf */
+ if (munmap_vma_range(mm, addr, len, &prev, &rb_link, &rb_parent, uf))
+ return -ENOMEM;
/*
* Private writable mapping: check memory availability
*/
@@ -2562,7 +2599,7 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
if (vma && (vma->vm_start <= addr))
return vma;
/* don't alter vm_end if the coredump is running */
- if (!prev || !mmget_still_valid(mm) || expand_stack(prev, addr))
+ if (!prev || expand_stack(prev, addr))
return NULL;
if (prev->vm_flags & VM_LOCKED)
populate_vma_page_range(prev, addr, prev->vm_end, NULL);
@@ -2588,9 +2625,6 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
return vma;
if (!(vma->vm_flags & VM_GROWSDOWN))
return NULL;
- /* don't alter vm_start if the coredump is running */
- if (!mmget_still_valid(mm))
- return NULL;
start = vma->vm_start;
if (expand_stack(vma, addr))
return NULL;
@@ -2635,7 +2669,7 @@ static void unmap_region(struct mm_struct *mm,
struct vm_area_struct *vma, struct vm_area_struct *prev,
unsigned long start, unsigned long end)
{
- struct vm_area_struct *next = prev ? prev->vm_next : mm->mmap;
+ struct vm_area_struct *next = vma_next(mm, prev);
struct mmu_gather tlb;
lru_add_drain();
@@ -2834,7 +2868,7 @@ int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
if (error)
return error;
}
- vma = prev ? prev->vm_next : mm->mmap;
+ vma = vma_next(mm, prev);
if (unlikely(uf)) {
/*
@@ -3052,14 +3086,9 @@ static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long fla
if (error)
return error;
- /*
- * Clear old maps. this also does some error checking for us
- */
- while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
- &rb_parent)) {
- if (do_munmap(mm, addr, len, uf))
- return -ENOMEM;
- }
+ /* Clear old maps, set up prev, rb_link, rb_parent, and uf */
+ if (munmap_vma_range(mm, addr, len, &prev, &rb_link, &rb_parent, uf))
+ return -ENOMEM;
/* Check against address space limits *after* clearing old maps... */
if (!may_expand_vm(mm, flags, len >> PAGE_SHIFT))
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 4fc918163dd3..5654dd19addc 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -913,7 +913,7 @@ static int __mmu_interval_notifier_insert(
return -EOVERFLOW;
/* Must call with a mmget() held */
- if (WARN_ON(atomic_read(&mm->mm_count) <= 0))
+ if (WARN_ON(atomic_read(&mm->mm_users) <= 0))
return -EINVAL;
/* pairs with mmdrop in mmu_interval_notifier_remove() */
diff --git a/mm/nommu.c b/mm/nommu.c
index 0df7ca321314..0faf39b32cdb 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -354,13 +354,6 @@ void vm_unmap_aliases(void)
}
EXPORT_SYMBOL_GPL(vm_unmap_aliases);
-struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes)
-{
- BUG();
- return NULL;
-}
-EXPORT_SYMBOL_GPL(alloc_vm_area);
-
void free_vm_area(struct vm_struct *area)
{
BUG();
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 358d6f28c627..7709f0e223f5 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2849,6 +2849,7 @@ EXPORT_SYMBOL_GPL(wait_on_page_writeback);
*/
void wait_for_stable_page(struct page *page)
{
+ page = thp_head(page);
if (page->mapping->host->i_sb->s_iflags & SB_I_STABLE_WRITES)
wait_on_page_writeback(page);
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e0ff3a811ec5..23f5066bd4a5 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -78,6 +78,34 @@
#include "shuffle.h"
#include "page_reporting.h"
+/* Free Page Internal flags: for internal, non-pcp variants of free_pages(). */
+typedef int __bitwise fpi_t;
+
+/* No special request */
+#define FPI_NONE ((__force fpi_t)0)
+
+/*
+ * Skip free page reporting notification for the (possibly merged) page.
+ * This does not hinder free page reporting from grabbing the page,
+ * reporting it and marking it "reported" - it only skips notifying
+ * the free page reporting infrastructure about a newly freed page. For
+ * example, used when temporarily pulling a page from a freelist and
+ * putting it back unmodified.
+ */
+#define FPI_SKIP_REPORT_NOTIFY ((__force fpi_t)BIT(0))
+
+/*
+ * Place the (possibly merged) page to the tail of the freelist. Will ignore
+ * page shuffling (relevant code - e.g., memory onlining - is expected to
+ * shuffle the whole zone).
+ *
+ * Note: No code should rely on this flag for correctness - it's purely
+ * to allow for optimizations when handing back either fresh pages
+ * (memory onlining) or untouched pages (page isolation, free page
+ * reporting).
+ */
+#define FPI_TO_TAIL ((__force fpi_t)BIT(1))
+
/* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */
static DEFINE_MUTEX(pcp_batch_high_lock);
#define MIN_PERCPU_PAGELIST_FRACTION (8)
@@ -247,7 +275,8 @@ bool pm_suspended_storage(void)
unsigned int pageblock_order __read_mostly;
#endif
-static void __free_pages_ok(struct page *page, unsigned int order);
+static void __free_pages_ok(struct page *page, unsigned int order,
+ fpi_t fpi_flags);
/*
* results with 256, 32 in the lowmem_reserve sysctl:
@@ -659,7 +688,7 @@ out:
void free_compound_page(struct page *page)
{
mem_cgroup_uncharge(page);
- __free_pages_ok(page, compound_order(page));
+ __free_pages_ok(page, compound_order(page), FPI_NONE);
}
void prep_compound_page(struct page *page, unsigned int order)
@@ -763,7 +792,7 @@ static inline void clear_page_guard(struct zone *zone, struct page *page,
unsigned int order, int migratetype) {}
#endif
-static inline void set_page_order(struct page *page, unsigned int order)
+static inline void set_buddy_order(struct page *page, unsigned int order)
{
set_page_private(page, order);
__SetPageBuddy(page);
@@ -788,7 +817,7 @@ static inline bool page_is_buddy(struct page *page, struct page *buddy,
if (!page_is_guard(buddy) && !PageBuddy(buddy))
return false;
- if (page_order(buddy) != order)
+ if (buddy_order(buddy) != order)
return false;
/*
@@ -873,13 +902,17 @@ static inline void add_to_free_list_tail(struct page *page, struct zone *zone,
area->nr_free++;
}
-/* Used for pages which are on another list */
+/*
+ * Used for pages which are on another list. Move the pages to the tail
+ * of the list - so the moved pages won't immediately be considered for
+ * allocation again (e.g., optimization for memory onlining).
+ */
static inline void move_to_free_list(struct page *page, struct zone *zone,
unsigned int order, int migratetype)
{
struct free_area *area = &zone->free_area[order];
- list_move(&page->lru, &area->free_list[migratetype]);
+ list_move_tail(&page->lru, &area->free_list[migratetype]);
}
static inline void del_page_from_free_list(struct page *page, struct zone *zone,
@@ -952,7 +985,7 @@ buddy_merge_likely(unsigned long pfn, unsigned long buddy_pfn,
static inline void __free_one_page(struct page *page,
unsigned long pfn,
struct zone *zone, unsigned int order,
- int migratetype, bool report)
+ int migratetype, fpi_t fpi_flags)
{
struct capture_control *capc = task_capc(zone);
unsigned long buddy_pfn;
@@ -1026,9 +1059,11 @@ continue_merging:
}
done_merging:
- set_page_order(page, order);
+ set_buddy_order(page, order);
- if (is_shuffle_order(order))
+ if (fpi_flags & FPI_TO_TAIL)
+ to_tail = true;
+ else if (is_shuffle_order(order))
to_tail = shuffle_pick_tail();
else
to_tail = buddy_merge_likely(pfn, buddy_pfn, page, order);
@@ -1039,7 +1074,7 @@ done_merging:
add_to_free_list(page, zone, order, migratetype);
/* Notify page reporting subsystem of freed page */
- if (report)
+ if (!(fpi_flags & FPI_SKIP_REPORT_NOTIFY))
page_reporting_notify_free(order);
}
@@ -1174,6 +1209,17 @@ static __always_inline bool free_pages_prepare(struct page *page,
trace_mm_page_free(page, order);
+ if (unlikely(PageHWPoison(page)) && !order) {
+ /*
+ * Do not let hwpoison pages hit pcplists/buddy
+ * Untie memcg state and reset page's owner
+ */
+ if (memcg_kmem_enabled() && PageKmemcg(page))
+ __memcg_kmem_uncharge_page(page, order);
+ reset_page_owner(page, order);
+ return false;
+ }
+
/*
* Check tail pages before head page information is cleared to
* avoid checking PageCompound for order-0 pages.
@@ -1369,7 +1415,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
if (unlikely(isolated_pageblocks))
mt = get_pageblock_migratetype(page);
- __free_one_page(page, page_to_pfn(page), zone, 0, mt, true);
+ __free_one_page(page, page_to_pfn(page), zone, 0, mt, FPI_NONE);
trace_mm_page_pcpu_drain(page, 0, mt);
}
spin_unlock(&zone->lock);
@@ -1378,14 +1424,14 @@ static void free_pcppages_bulk(struct zone *zone, int count,
static void free_one_page(struct zone *zone,
struct page *page, unsigned long pfn,
unsigned int order,
- int migratetype)
+ int migratetype, fpi_t fpi_flags)
{
spin_lock(&zone->lock);
if (unlikely(has_isolate_pageblock(zone) ||
is_migrate_isolate(migratetype))) {
migratetype = get_pfnblock_migratetype(page, pfn);
}
- __free_one_page(page, pfn, zone, order, migratetype, true);
+ __free_one_page(page, pfn, zone, order, migratetype, fpi_flags);
spin_unlock(&zone->lock);
}
@@ -1463,7 +1509,8 @@ void __meminit reserve_bootmem_region(phys_addr_t start, phys_addr_t end)
}
}
-static void __free_pages_ok(struct page *page, unsigned int order)
+static void __free_pages_ok(struct page *page, unsigned int order,
+ fpi_t fpi_flags)
{
unsigned long flags;
int migratetype;
@@ -1475,7 +1522,8 @@ static void __free_pages_ok(struct page *page, unsigned int order)
migratetype = get_pfnblock_migratetype(page, pfn);
local_irq_save(flags);
__count_vm_events(PGFREE, 1 << order);
- free_one_page(page_zone(page), page, pfn, order, migratetype);
+ free_one_page(page_zone(page), page, pfn, order, migratetype,
+ fpi_flags);
local_irq_restore(flags);
}
@@ -1485,6 +1533,11 @@ void __free_pages_core(struct page *page, unsigned int order)
struct page *p = page;
unsigned int loop;
+ /*
+ * When initializing the memmap, __init_single_page() sets the refcount
+ * of all pages to 1 ("allocated"/"not free"). We have to set the
+ * refcount of all involved pages to 0.
+ */
prefetchw(p);
for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
prefetchw(p + 1);
@@ -1495,8 +1548,12 @@ void __free_pages_core(struct page *page, unsigned int order)
set_page_count(p, 0);
atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
- set_page_refcounted(page);
- __free_pages(page, order);
+
+ /*
+ * Bypass PCP and place fresh pages right to the tail, primarily
+ * relevant for memory onlining.
+ */
+ __free_pages_ok(page, order, FPI_TO_TAIL);
}
#ifdef CONFIG_NEED_MULTIPLE_NODES
@@ -2121,7 +2178,7 @@ static inline void expand(struct zone *zone, struct page *page,
continue;
add_to_free_list(&page[size], zone, high, migratetype);
- set_page_order(&page[size], high);
+ set_buddy_order(&page[size], high);
}
}
@@ -2299,7 +2356,7 @@ static inline struct page *__rmqueue_cma_fallback(struct zone *zone,
#endif
/*
- * Move the free pages in a range to the free lists of the requested type.
+ * Move the free pages in a range to the freelist tail of the requested type.
* Note that start_page and end_pages are not aligned on a pageblock
* boundary. If alignment is required, use move_freepages_block()
*/
@@ -2335,7 +2392,7 @@ static int move_freepages(struct zone *zone,
VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page);
VM_BUG_ON_PAGE(page_zone(page) != zone, page);
- order = page_order(page);
+ order = buddy_order(page);
move_to_free_list(page, zone, order, migratetype);
page += 1 << order;
pages_moved += 1 << order;
@@ -2459,7 +2516,7 @@ static inline void boost_watermark(struct zone *zone)
static void steal_suitable_fallback(struct zone *zone, struct page *page,
unsigned int alloc_flags, int start_type, bool whole_block)
{
- unsigned int current_order = page_order(page);
+ unsigned int current_order = buddy_order(page);
int free_pages, movable_pages, alike_pages;
int old_block_type;
@@ -3123,7 +3180,8 @@ static void free_unref_page_commit(struct page *page, unsigned long pfn)
*/
if (migratetype >= MIGRATE_PCPTYPES) {
if (unlikely(is_migrate_isolate(migratetype))) {
- free_one_page(zone, page, pfn, 0, migratetype);
+ free_one_page(zone, page, pfn, 0, migratetype,
+ FPI_NONE);
return;
}
migratetype = MIGRATE_MOVABLE;
@@ -3209,7 +3267,7 @@ void split_page(struct page *page, unsigned int order)
for (i = 1; i < (1 << order); i++)
set_page_refcounted(page + i);
- split_page_owner(page, order);
+ split_page_owner(page, 1 << order);
}
EXPORT_SYMBOL_GPL(split_page);
@@ -3278,7 +3336,8 @@ void __putback_isolated_page(struct page *page, unsigned int order, int mt)
lockdep_assert_held(&zone->lock);
/* Return isolated page to tail of freelist. */
- __free_one_page(page, page_to_pfn(page), zone, order, mt, false);
+ __free_one_page(page, page_to_pfn(page), zone, order, mt,
+ FPI_SKIP_REPORT_NOTIFY | FPI_TO_TAIL);
}
/*
@@ -4945,7 +5004,7 @@ static inline void free_the_page(struct page *page, unsigned int order)
if (order == 0) /* Via pcp? */
free_unref_page(page);
else
- __free_pages_ok(page, order);
+ __free_pages_ok(page, order, FPI_NONE);
}
void __free_pages(struct page *page, unsigned int order)
@@ -5979,10 +6038,15 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn)
* Initially all pages are reserved - free ones are freed
* up by memblock_free_all() once the early boot process is
* done. Non-atomic initialization, single-pass.
+ *
+ * All aligned pageblocks are initialized to the specified migratetype
+ * (usually MIGRATE_MOVABLE). Besides setting the migratetype, no related
+ * zone stats (e.g., nr_isolate_pageblock) are touched.
*/
void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
- unsigned long start_pfn, enum meminit_context context,
- struct vmem_altmap *altmap)
+ unsigned long start_pfn,
+ enum meminit_context context,
+ struct vmem_altmap *altmap, int migratetype)
{
unsigned long pfn, end_pfn = start_pfn + size;
struct page *page;
@@ -6026,19 +6090,12 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
__SetPageReserved(page);
/*
- * Mark the block movable so that blocks are reserved for
- * movable at startup. This will force kernel allocations
- * to reserve their blocks rather than leaking throughout
- * the address space during boot when many long-lived
- * kernel allocations are made.
- *
- * bitmap is created for zone's valid pfn range. but memmap
- * can be created for invalid pages (for alignment)
- * check here not to call set_pageblock_migratetype() against
- * pfn out of zone.
+ * Usually, we want to mark the pageblock MIGRATE_MOVABLE,
+ * such that unmovable allocations won't be scattered all
+ * over the place during system boot.
*/
- if (!(pfn & (pageblock_nr_pages - 1))) {
- set_pageblock_migratetype(page, MIGRATE_MOVABLE);
+ if (IS_ALIGNED(pfn, pageblock_nr_pages)) {
+ set_pageblock_migratetype(page, migratetype);
cond_resched();
}
pfn++;
@@ -6100,15 +6157,10 @@ void __ref memmap_init_zone_device(struct zone *zone,
* the address space during boot when many long-lived
* kernel allocations are made.
*
- * bitmap is created for zone's valid pfn range. but memmap
- * can be created for invalid pages (for alignment)
- * check here not to call set_pageblock_migratetype() against
- * pfn out of zone.
- *
* Please note that MEMINIT_HOTPLUG path doesn't clear memmap
* because this is done early in section_activate()
*/
- if (!(pfn & (pageblock_nr_pages - 1))) {
+ if (IS_ALIGNED(pfn, pageblock_nr_pages)) {
set_pageblock_migratetype(page, MIGRATE_MOVABLE);
cond_resched();
}
@@ -6143,7 +6195,7 @@ void __meminit __weak memmap_init(unsigned long size, int nid,
if (end_pfn > start_pfn) {
size = end_pfn - start_pfn;
memmap_init_zone(size, nid, zone, start_pfn,
- MEMINIT_EARLY, NULL);
+ MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
}
}
}
@@ -8292,7 +8344,7 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page,
*/
if (!page_ref_count(page)) {
if (PageBuddy(page))
- iter += (1 << page_order(page)) - 1;
+ iter += (1 << buddy_order(page)) - 1;
continue;
}
@@ -8457,7 +8509,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
ret = start_isolate_page_range(pfn_max_align_down(start),
pfn_max_align_up(end), migratetype, 0);
- if (ret < 0)
+ if (ret)
return ret;
/*
@@ -8505,7 +8557,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
}
if (outer_start != start) {
- order = page_order(pfn_to_page(outer_start));
+ order = buddy_order(pfn_to_page(outer_start));
/*
* outer_start page could be small order buddy page and
@@ -8693,35 +8745,21 @@ void zone_pcp_reset(struct zone *zone)
#ifdef CONFIG_MEMORY_HOTREMOVE
/*
- * All pages in the range must be in a single zone and isolated
- * before calling this.
+ * All pages in the range must be in a single zone, must not contain holes,
+ * must span full sections, and must be isolated before calling this function.
*/
-unsigned long
-__offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
+void __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
{
+ unsigned long pfn = start_pfn;
struct page *page;
struct zone *zone;
unsigned int order;
- unsigned long pfn;
unsigned long flags;
- unsigned long offlined_pages = 0;
-
- /* find the first valid pfn */
- for (pfn = start_pfn; pfn < end_pfn; pfn++)
- if (pfn_valid(pfn))
- break;
- if (pfn == end_pfn)
- return offlined_pages;
offline_mem_sections(pfn, end_pfn);
zone = page_zone(pfn_to_page(pfn));
spin_lock_irqsave(&zone->lock, flags);
- pfn = start_pfn;
while (pfn < end_pfn) {
- if (!pfn_valid(pfn)) {
- pfn++;
- continue;
- }
page = pfn_to_page(pfn);
/*
* The HWPoisoned page may be not in buddy system, and
@@ -8729,7 +8767,6 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
*/
if (unlikely(!PageBuddy(page) && PageHWPoison(page))) {
pfn++;
- offlined_pages++;
continue;
}
/*
@@ -8740,20 +8777,16 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
BUG_ON(page_count(page));
BUG_ON(PageBuddy(page));
pfn++;
- offlined_pages++;
continue;
}
BUG_ON(page_count(page));
BUG_ON(!PageBuddy(page));
- order = page_order(page);
- offlined_pages += 1 << order;
+ order = buddy_order(page);
del_page_from_free_list(page, zone, order);
pfn += (1 << order);
}
spin_unlock_irqrestore(&zone->lock, flags);
-
- return offlined_pages;
}
#endif
@@ -8768,7 +8801,7 @@ bool is_free_buddy_page(struct page *page)
for (order = 0; order < MAX_ORDER; order++) {
struct page *page_head = page - (pfn & ((1 << order) - 1));
- if (PageBuddy(page_head) && page_order(page_head) >= order)
+ if (PageBuddy(page_head) && buddy_order(page_head) >= order)
break;
}
spin_unlock_irqrestore(&zone->lock, flags);
@@ -8778,30 +8811,70 @@ bool is_free_buddy_page(struct page *page)
#ifdef CONFIG_MEMORY_FAILURE
/*
- * Set PG_hwpoison flag if a given page is confirmed to be a free page. This
- * test is performed under the zone lock to prevent a race against page
- * allocation.
+ * Break down a higher-order page in sub-pages, and keep our target out of
+ * buddy allocator.
*/
-bool set_hwpoison_free_buddy_page(struct page *page)
+static void break_down_buddy_pages(struct zone *zone, struct page *page,
+ struct page *target, int low, int high,
+ int migratetype)
+{
+ unsigned long size = 1 << high;
+ struct page *current_buddy, *next_page;
+
+ while (high > low) {
+ high--;
+ size >>= 1;
+
+ if (target >= &page[size]) {
+ next_page = page + size;
+ current_buddy = page;
+ } else {
+ next_page = page;
+ current_buddy = page + size;
+ }
+
+ if (set_page_guard(zone, current_buddy, high, migratetype))
+ continue;
+
+ if (current_buddy != target) {
+ add_to_free_list(current_buddy, zone, high, migratetype);
+ set_buddy_order(current_buddy, high);
+ page = next_page;
+ }
+ }
+}
+
+/*
+ * Take a page that will be marked as poisoned off the buddy allocator.
+ */
+bool take_page_off_buddy(struct page *page)
{
struct zone *zone = page_zone(page);
unsigned long pfn = page_to_pfn(page);
unsigned long flags;
unsigned int order;
- bool hwpoisoned = false;
+ bool ret = false;
spin_lock_irqsave(&zone->lock, flags);
for (order = 0; order < MAX_ORDER; order++) {
struct page *page_head = page - (pfn & ((1 << order) - 1));
+ int page_order = buddy_order(page_head);
- if (PageBuddy(page_head) && page_order(page_head) >= order) {
- if (!TestSetPageHWPoison(page))
- hwpoisoned = true;
+ if (PageBuddy(page_head) && page_order >= order) {
+ unsigned long pfn_head = page_to_pfn(page_head);
+ int migratetype = get_pfnblock_migratetype(page_head,
+ pfn_head);
+
+ del_page_from_free_list(page_head, zone, page_order);
+ break_down_buddy_pages(zone, page_head, page, 0,
+ page_order, migratetype);
+ ret = true;
break;
}
+ if (page_count(page_head) > 0)
+ break;
}
spin_unlock_irqrestore(&zone->lock, flags);
-
- return hwpoisoned;
+ return ret;
}
#endif
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index aa94afb63823..abbf42214485 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -88,7 +88,7 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype)
* these pages to be merged.
*/
if (PageBuddy(page)) {
- order = page_order(page);
+ order = buddy_order(page);
if (order >= pageblock_order) {
pfn = page_to_pfn(page);
buddy_pfn = __find_buddy_pfn(pfn, order);
@@ -106,6 +106,11 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype)
* If we isolate freepage with more than pageblock_order, there
* should be no freepage in the range, so we could avoid costly
* pageblock scanning for freepage moving.
+ *
+ * We didn't actually touch any of the isolated pages, so place them
+ * to the tail of the freelist. This is an optimization for memory
+ * onlining - just onlined memory won't immediately be considered for
+ * allocation.
*/
if (!isolated_page) {
nr_pages = move_freepages_block(zone, page, migratetype, NULL);
@@ -173,8 +178,7 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
* (e.g. __offline_pages will need to call it after check for isolated range for
* a next retry).
*
- * Return: the number of isolated pageblocks on success and -EBUSY if any part
- * of range cannot be isolated.
+ * Return: 0 on success and -EBUSY if any part of range cannot be isolated.
*/
int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
unsigned migratetype, int flags)
@@ -182,7 +186,6 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
unsigned long pfn;
unsigned long undo_pfn;
struct page *page;
- int nr_isolate_pageblock = 0;
BUG_ON(!IS_ALIGNED(start_pfn, pageblock_nr_pages));
BUG_ON(!IS_ALIGNED(end_pfn, pageblock_nr_pages));
@@ -196,10 +199,9 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
undo_pfn = pfn;
goto undo;
}
- nr_isolate_pageblock++;
}
}
- return nr_isolate_pageblock;
+ return 0;
undo:
for (pfn = start_pfn;
pfn < undo_pfn;
@@ -259,7 +261,7 @@ __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn,
* the correct MIGRATE_ISOLATE freelist. There is no
* simple way to verify that as VM_BUG_ON(), though.
*/
- pfn += 1 << page_order(page);
+ pfn += 1 << buddy_order(page);
else if ((flags & MEMORY_OFFLINE) && PageHWPoison(page))
/* A HWPoisoned page cannot be also PageBuddy */
pfn++;
diff --git a/mm/page_owner.c b/mm/page_owner.c
index 360461509423..b735a8eafcdb 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -204,7 +204,7 @@ void __set_page_owner_migrate_reason(struct page *page, int reason)
page_owner->last_migrate_reason = reason;
}
-void __split_page_owner(struct page *page, unsigned int order)
+void __split_page_owner(struct page *page, unsigned int nr)
{
int i;
struct page_ext *page_ext = lookup_page_ext(page);
@@ -213,7 +213,7 @@ void __split_page_owner(struct page *page, unsigned int order)
if (unlikely(!page_ext))
return;
- for (i = 0; i < (1 << order); i++) {
+ for (i = 0; i < nr; i++) {
page_owner = get_page_owner(page_ext);
page_owner->order = 0;
page_ext = page_ext_next(page_ext);
@@ -295,7 +295,7 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m,
if (PageBuddy(page)) {
unsigned long freepage_order;
- freepage_order = page_order_unsafe(page);
+ freepage_order = buddy_order_unsafe(page);
if (freepage_order < MAX_ORDER)
pfn += (1UL << freepage_order) - 1;
continue;
@@ -490,7 +490,7 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
page = pfn_to_page(pfn);
if (PageBuddy(page)) {
- unsigned long freepage_order = page_order_unsafe(page);
+ unsigned long freepage_order = buddy_order_unsafe(page);
if (freepage_order < MAX_ORDER)
pfn += (1UL << freepage_order) - 1;
@@ -584,7 +584,7 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone)
* heavy lock contention.
*/
if (PageBuddy(page)) {
- unsigned long order = page_order_unsafe(page);
+ unsigned long order = buddy_order_unsafe(page);
if (order > 0 && order < MAX_ORDER)
pfn += (1UL << order) - 1;
diff --git a/mm/page_poison.c b/mm/page_poison.c
index 34b9181ee5d1..ae0482cded87 100644
--- a/mm/page_poison.c
+++ b/mm/page_poison.c
@@ -8,13 +8,23 @@
#include <linux/ratelimit.h>
#include <linux/kasan.h>
-static bool want_page_poisoning __read_mostly;
+static DEFINE_STATIC_KEY_FALSE_RO(want_page_poisoning);
static int __init early_page_poison_param(char *buf)
{
- if (!buf)
- return -EINVAL;
- return strtobool(buf, &want_page_poisoning);
+ int ret;
+ bool tmp;
+
+ ret = strtobool(buf, &tmp);
+ if (ret)
+ return ret;
+
+ if (tmp)
+ static_branch_enable(&want_page_poisoning);
+ else
+ static_branch_disable(&want_page_poisoning);
+
+ return 0;
}
early_param("page_poison", early_page_poison_param);
@@ -31,7 +41,7 @@ bool page_poisoning_enabled(void)
* Page poisoning is debug page alloc for some arches. If
* either of those options are enabled, enable poisoning.
*/
- return (want_page_poisoning ||
+ return (static_branch_unlikely(&want_page_poisoning) ||
(!IS_ENABLED(CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC) &&
debug_pagealloc_enabled()));
}
diff --git a/mm/page_reporting.c b/mm/page_reporting.c
index 3bbd471cfc81..cd8e13d41df4 100644
--- a/mm/page_reporting.c
+++ b/mm/page_reporting.c
@@ -92,7 +92,7 @@ page_reporting_drain(struct page_reporting_dev_info *prdev,
* report on the new larger page when we make our way
* up to that higher order.
*/
- if (PageBuddy(page) && page_order(page) == order)
+ if (PageBuddy(page) && buddy_order(page) == order)
__SetPageReported(page);
} while ((sg = sg_next(sg)));
@@ -178,7 +178,7 @@ page_reporting_cycle(struct page_reporting_dev_info *prdev, struct zone *zone,
* the new head of the free list before we release the
* zone lock.
*/
- if (&page->lru != list && !list_is_first(&page->lru, list))
+ if (!list_is_first(&page->lru, list))
list_rotate_to_front(&page->lru, list);
/* release lock before waiting on report processing */
diff --git a/mm/percpu.c b/mm/percpu.c
index 1ed1a349eab8..66a93f096394 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1584,8 +1584,7 @@ static enum pcpu_chunk_type pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp,
{
struct obj_cgroup *objcg;
- if (!memcg_kmem_enabled() || !(gfp & __GFP_ACCOUNT) ||
- memcg_kmem_bypass())
+ if (!memcg_kmem_enabled() || !(gfp & __GFP_ACCOUNT))
return PCPU_CHUNK_ROOT;
objcg = get_obj_cgroup_from_current();
diff --git a/mm/readahead.c b/mm/readahead.c
index 3c9a8dd7c56c..c5b0457415be 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -158,10 +158,8 @@ out:
}
/**
- * page_cache_readahead_unbounded - Start unchecked readahead.
- * @mapping: File address space.
- * @file: This instance of the open file; used for authentication.
- * @index: First page index to read.
+ * page_cache_ra_unbounded - Start unchecked readahead.
+ * @ractl: Readahead control.
* @nr_to_read: The number of pages to read.
* @lookahead_size: Where to start the next readahead.
*
@@ -173,17 +171,13 @@ out:
* Context: File is referenced by caller. Mutexes may be held by caller.
* May sleep, but will not reenter filesystem to reclaim memory.
*/
-void page_cache_readahead_unbounded(struct address_space *mapping,
- struct file *file, pgoff_t index, unsigned long nr_to_read,
- unsigned long lookahead_size)
+void page_cache_ra_unbounded(struct readahead_control *ractl,
+ unsigned long nr_to_read, unsigned long lookahead_size)
{
+ struct address_space *mapping = ractl->mapping;
+ unsigned long index = readahead_index(ractl);
LIST_HEAD(page_pool);
gfp_t gfp_mask = readahead_gfp_mask(mapping);
- struct readahead_control rac = {
- .mapping = mapping,
- .file = file,
- ._index = index,
- };
unsigned long i;
/*
@@ -204,7 +198,7 @@ void page_cache_readahead_unbounded(struct address_space *mapping,
for (i = 0; i < nr_to_read; i++) {
struct page *page = xa_load(&mapping->i_pages, index + i);
- BUG_ON(index + i != rac._index + rac._nr_pages);
+ BUG_ON(index + i != ractl->_index + ractl->_nr_pages);
if (page && !xa_is_value(page)) {
/*
@@ -215,7 +209,7 @@ void page_cache_readahead_unbounded(struct address_space *mapping,
* have a stable reference to this page, and it's
* not worth getting one just for that.
*/
- read_pages(&rac, &page_pool, true);
+ read_pages(ractl, &page_pool, true);
continue;
}
@@ -228,12 +222,12 @@ void page_cache_readahead_unbounded(struct address_space *mapping,
} else if (add_to_page_cache_lru(page, mapping, index + i,
gfp_mask) < 0) {
put_page(page);
- read_pages(&rac, &page_pool, true);
+ read_pages(ractl, &page_pool, true);
continue;
}
if (i == nr_to_read - lookahead_size)
SetPageReadahead(page);
- rac._nr_pages++;
+ ractl->_nr_pages++;
}
/*
@@ -241,22 +235,22 @@ void page_cache_readahead_unbounded(struct address_space *mapping,
* uptodate then the caller will launch readpage again, and
* will then handle the error.
*/
- read_pages(&rac, &page_pool, false);
+ read_pages(ractl, &page_pool, false);
memalloc_nofs_restore(nofs);
}
-EXPORT_SYMBOL_GPL(page_cache_readahead_unbounded);
+EXPORT_SYMBOL_GPL(page_cache_ra_unbounded);
/*
- * __do_page_cache_readahead() actually reads a chunk of disk. It allocates
+ * do_page_cache_ra() actually reads a chunk of disk. It allocates
* the pages first, then submits them for I/O. This avoids the very bad
* behaviour which would occur if page allocations are causing VM writeback.
* We really don't want to intermingle reads and writes like that.
*/
-void __do_page_cache_readahead(struct address_space *mapping,
- struct file *file, pgoff_t index, unsigned long nr_to_read,
- unsigned long lookahead_size)
+void do_page_cache_ra(struct readahead_control *ractl,
+ unsigned long nr_to_read, unsigned long lookahead_size)
{
- struct inode *inode = mapping->host;
+ struct inode *inode = ractl->mapping->host;
+ unsigned long index = readahead_index(ractl);
loff_t isize = i_size_read(inode);
pgoff_t end_index; /* The last page we want to read */
@@ -270,20 +264,19 @@ void __do_page_cache_readahead(struct address_space *mapping,
if (nr_to_read > end_index - index)
nr_to_read = end_index - index + 1;
- page_cache_readahead_unbounded(mapping, file, index, nr_to_read,
- lookahead_size);
+ page_cache_ra_unbounded(ractl, nr_to_read, lookahead_size);
}
/*
* Chunk the readahead into 2 megabyte units, so that we don't pin too much
* memory at once.
*/
-void force_page_cache_readahead(struct address_space *mapping,
- struct file *filp, pgoff_t index, unsigned long nr_to_read)
+void force_page_cache_ra(struct readahead_control *ractl,
+ struct file_ra_state *ra, unsigned long nr_to_read)
{
+ struct address_space *mapping = ractl->mapping;
struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
- struct file_ra_state *ra = &filp->f_ra;
- unsigned long max_pages;
+ unsigned long max_pages, index;
if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages &&
!mapping->a_ops->readahead))
@@ -293,14 +286,16 @@ void force_page_cache_readahead(struct address_space *mapping,
* If the request exceeds the readahead window, allow the read to
* be up to the optimal hardware IO size
*/
+ index = readahead_index(ractl);
max_pages = max_t(unsigned long, bdi->io_pages, ra->ra_pages);
- nr_to_read = min(nr_to_read, max_pages);
+ nr_to_read = min_t(unsigned long, nr_to_read, max_pages);
while (nr_to_read) {
unsigned long this_chunk = (2 * 1024 * 1024) / PAGE_SIZE;
if (this_chunk > nr_to_read)
this_chunk = nr_to_read;
- __do_page_cache_readahead(mapping, filp, index, this_chunk, 0);
+ ractl->_index = index;
+ do_page_cache_ra(ractl, this_chunk, 0);
index += this_chunk;
nr_to_read -= this_chunk;
@@ -437,14 +432,14 @@ static int try_context_readahead(struct address_space *mapping,
/*
* A minimal readahead algorithm for trivial sequential/random reads.
*/
-static void ondemand_readahead(struct address_space *mapping,
- struct file_ra_state *ra, struct file *filp,
- bool hit_readahead_marker, pgoff_t index,
+static void ondemand_readahead(struct readahead_control *ractl,
+ struct file_ra_state *ra, bool hit_readahead_marker,
unsigned long req_size)
{
- struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
+ struct backing_dev_info *bdi = inode_to_bdi(ractl->mapping->host);
unsigned long max_pages = ra->ra_pages;
unsigned long add_pages;
+ unsigned long index = readahead_index(ractl);
pgoff_t prev_index;
/*
@@ -482,7 +477,8 @@ static void ondemand_readahead(struct address_space *mapping,
pgoff_t start;
rcu_read_lock();
- start = page_cache_next_miss(mapping, index + 1, max_pages);
+ start = page_cache_next_miss(ractl->mapping, index + 1,
+ max_pages);
rcu_read_unlock();
if (!start || start - index > max_pages)
@@ -515,14 +511,15 @@ static void ondemand_readahead(struct address_space *mapping,
* Query the page cache and look for the traces(cached history pages)
* that a sequential stream would leave behind.
*/
- if (try_context_readahead(mapping, ra, index, req_size, max_pages))
+ if (try_context_readahead(ractl->mapping, ra, index, req_size,
+ max_pages))
goto readit;
/*
* standalone, small random read
* Read as is, and do not pollute the readahead state.
*/
- __do_page_cache_readahead(mapping, filp, index, req_size, 0);
+ do_page_cache_ra(ractl, req_size, 0);
return;
initial_readahead:
@@ -548,63 +545,42 @@ readit:
}
}
- ra_submit(ra, mapping, filp);
+ ractl->_index = ra->start;
+ do_page_cache_ra(ractl, ra->size, ra->async_size);
}
-/**
- * page_cache_sync_readahead - generic file readahead
- * @mapping: address_space which holds the pagecache and I/O vectors
- * @ra: file_ra_state which holds the readahead state
- * @filp: passed on to ->readpage() and ->readpages()
- * @index: Index of first page to be read.
- * @req_count: Total number of pages being read by the caller.
- *
- * page_cache_sync_readahead() should be called when a cache miss happened:
- * it will submit the read. The readahead logic may decide to piggyback more
- * pages onto the read request if access patterns suggest it will improve
- * performance.
- */
-void page_cache_sync_readahead(struct address_space *mapping,
- struct file_ra_state *ra, struct file *filp,
- pgoff_t index, unsigned long req_count)
+void page_cache_sync_ra(struct readahead_control *ractl,
+ struct file_ra_state *ra, unsigned long req_count)
{
- /* no read-ahead */
- if (!ra->ra_pages)
- return;
+ bool do_forced_ra = ractl->file && (ractl->file->f_mode & FMODE_RANDOM);
- if (blk_cgroup_congested())
- return;
+ /*
+ * Even if read-ahead is disabled, issue this request as read-ahead
+ * as we'll need it to satisfy the requested range. The forced
+ * read-ahead will do the right thing and limit the read to just the
+ * requested range, which we'll set to 1 page for this case.
+ */
+ if (!ra->ra_pages || blk_cgroup_congested()) {
+ if (!ractl->file)
+ return;
+ req_count = 1;
+ do_forced_ra = true;
+ }
/* be dumb */
- if (filp && (filp->f_mode & FMODE_RANDOM)) {
- force_page_cache_readahead(mapping, filp, index, req_count);
+ if (do_forced_ra) {
+ force_page_cache_ra(ractl, ra, req_count);
return;
}
/* do read-ahead */
- ondemand_readahead(mapping, ra, filp, false, index, req_count);
+ ondemand_readahead(ractl, ra, false, req_count);
}
-EXPORT_SYMBOL_GPL(page_cache_sync_readahead);
+EXPORT_SYMBOL_GPL(page_cache_sync_ra);
-/**
- * page_cache_async_readahead - file readahead for marked pages
- * @mapping: address_space which holds the pagecache and I/O vectors
- * @ra: file_ra_state which holds the readahead state
- * @filp: passed on to ->readpage() and ->readpages()
- * @page: The page at @index which triggered the readahead call.
- * @index: Index of first page to be read.
- * @req_count: Total number of pages being read by the caller.
- *
- * page_cache_async_readahead() should be called when a page is used which
- * is marked as PageReadahead; this is a marker to suggest that the application
- * has used up enough of the readahead window that we should start pulling in
- * more pages.
- */
-void
-page_cache_async_readahead(struct address_space *mapping,
- struct file_ra_state *ra, struct file *filp,
- struct page *page, pgoff_t index,
- unsigned long req_count)
+void page_cache_async_ra(struct readahead_control *ractl,
+ struct file_ra_state *ra, struct page *page,
+ unsigned long req_count)
{
/* no read-ahead */
if (!ra->ra_pages)
@@ -621,16 +597,16 @@ page_cache_async_readahead(struct address_space *mapping,
/*
* Defer asynchronous read-ahead on IO congestion.
*/
- if (inode_read_congested(mapping->host))
+ if (inode_read_congested(ractl->mapping->host))
return;
if (blk_cgroup_congested())
return;
/* do read-ahead */
- ondemand_readahead(mapping, ra, filp, true, index, req_count);
+ ondemand_readahead(ractl, ra, true, req_count);
}
-EXPORT_SYMBOL_GPL(page_cache_async_readahead);
+EXPORT_SYMBOL_GPL(page_cache_async_ra);
ssize_t ksys_readahead(int fd, loff_t offset, size_t count)
{
diff --git a/mm/rmap.c b/mm/rmap.c
index 9425260774a1..1b84945d655c 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1205,7 +1205,7 @@ void page_add_file_rmap(struct page *page, bool compound)
VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
lock_page_memcg(page);
if (compound && PageTransHuge(page)) {
- for (i = 0, nr = 0; i < HPAGE_PMD_NR; i++) {
+ for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
if (atomic_inc_and_test(&page[i]._mapcount))
nr++;
}
@@ -1246,7 +1246,7 @@ static void page_remove_file_rmap(struct page *page, bool compound)
/* page still mapped by someone else? */
if (compound && PageTransHuge(page)) {
- for (i = 0, nr = 0; i < HPAGE_PMD_NR; i++) {
+ for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
if (atomic_add_negative(-1, &page[i]._mapcount))
nr++;
}
@@ -1293,7 +1293,7 @@ static void page_remove_anon_compound_rmap(struct page *page)
* Subpages can be mapped with PTEs too. Check how many of
* them are still mapped.
*/
- for (i = 0, nr = 0; i < HPAGE_PMD_NR; i++) {
+ for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
if (atomic_add_negative(-1, &page[i]._mapcount))
nr++;
}
@@ -1303,10 +1303,10 @@ static void page_remove_anon_compound_rmap(struct page *page)
* page of the compound page is unmapped, but at least one
* small page is still mapped.
*/
- if (nr && nr < HPAGE_PMD_NR)
+ if (nr && nr < thp_nr_pages(page))
deferred_split_huge_page(page);
} else {
- nr = HPAGE_PMD_NR;
+ nr = thp_nr_pages(page);
}
if (unlikely(PageMlocked(page)))
diff --git a/mm/shmem.c b/mm/shmem.c
index 6d4ddef4a24f..537c137698f8 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3984,7 +3984,7 @@ static struct file_system_type shmem_fs_type = {
.parameters = shmem_fs_parameters,
#endif
.kill_sb = kill_litter_super,
- .fs_flags = FS_USERNS_MOUNT,
+ .fs_flags = FS_USERNS_MOUNT | FS_THP_SUPPORT,
};
int __init shmem_init(void)
diff --git a/mm/shuffle.c b/mm/shuffle.c
index 9b5cd4b004b0..9c2e145a747a 100644
--- a/mm/shuffle.c
+++ b/mm/shuffle.c
@@ -60,7 +60,7 @@ static struct page * __meminit shuffle_valid_page(struct zone *zone,
* ...is the page on the same list as the page we will
* shuffle it with?
*/
- if (page_order(page) != order)
+ if (buddy_order(page) != order)
return NULL;
return page;
diff --git a/mm/slab.c b/mm/slab.c
index 399a9d185b0f..b1113561b98b 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1062,7 +1062,7 @@ int slab_prepare_cpu(unsigned int cpu)
* Even if all the cpus of a node are down, we don't free the
* kmem_cache_node of any cache. This to avoid a race between cpu_down, and
* a kmalloc allocation from another cpu for memory from the node of
- * the cpu going down. The list3 structure is usually allocated from
+ * the cpu going down. The kmem_cache_node structure is usually allocated from
* kmem_cache_create() and gets destroyed at kmem_cache_destroy().
*/
int slab_dead_cpu(unsigned int cpu)
diff --git a/mm/slab.h b/mm/slab.h
index 6dd4b702888a..6d7c6a5056ba 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -46,7 +46,6 @@ struct kmem_cache {
#include <linux/kmemleak.h>
#include <linux/random.h>
#include <linux/sched/mm.h>
-#include <linux/kmemleak.h>
/*
* State of the slab allocator.
@@ -281,9 +280,6 @@ static inline struct obj_cgroup *memcg_slab_pre_alloc_hook(struct kmem_cache *s,
{
struct obj_cgroup *objcg;
- if (memcg_kmem_bypass())
- return NULL;
-
objcg = get_obj_cgroup_from_current();
if (!objcg)
return NULL;
diff --git a/mm/slub.c b/mm/slub.c
index 61d0d2968413..b30be2385d1c 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1956,7 +1956,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
/*
* Racy check. If we mistakenly see no partial slabs then we
* just allocate an empty slab. If we mistakenly try to get a
- * partial slab and there is none available then get_partials()
+ * partial slab and there is none available then get_partial()
* will return NULL.
*/
if (!n || !n->nr_partial)
diff --git a/mm/sparse.c b/mm/sparse.c
index b25ad8e64839..7bd23f9d6cef 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -312,6 +312,7 @@ static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long p
return coded_mem_map;
}
+#ifdef CONFIG_MEMORY_HOTPLUG
/*
* Decode mem_map from the coded memmap
*/
@@ -321,6 +322,7 @@ struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pn
coded_mem_map &= SECTION_MAP_MASK;
return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum);
}
+#endif /* CONFIG_MEMORY_HOTPLUG */
static void __meminit sparse_init_one_section(struct mem_section *ms,
unsigned long pnum, struct page *mem_map,
diff --git a/mm/swap_state.c b/mm/swap_state.c
index aa40e706604c..ee465827420e 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -246,7 +246,7 @@ int add_to_swap(struct page *page)
goto fail;
/*
* Normally the page will be dirtied in unmap because its pte should be
- * dirty. A special case is MADV_FREE page. The page'e pte could have
+ * dirty. A special case is MADV_FREE page. The page's pte could have
* dirty bit cleared but the page's SwapBacked bit is still set because
* clearing the dirty bit and SwapBacked bit has no lock protected. For
* such page, unmap will not set dirty bit for it, so page reclaim will
diff --git a/mm/truncate.c b/mm/truncate.c
index 6bbe0f0b3ce9..18cec39a9f53 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -168,7 +168,7 @@ void do_invalidatepage(struct page *page, unsigned int offset,
* becomes orphaned. It will be left on the LRU and may even be mapped into
* user pagetables if we're racing with filemap_fault().
*
- * We need to bale out if page->mapping is no longer equal to the original
+ * We need to bail out if page->mapping is no longer equal to the original
* mapping. This happens a) when the VM reclaimed the page while we waited on
* its lock, b) when a concurrent invalidate_mapping_pages got there first and
* c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
@@ -177,12 +177,12 @@ static void
truncate_cleanup_page(struct address_space *mapping, struct page *page)
{
if (page_mapped(page)) {
- pgoff_t nr = PageTransHuge(page) ? HPAGE_PMD_NR : 1;
+ unsigned int nr = thp_nr_pages(page);
unmap_mapping_pages(mapping, page->index, nr, false);
}
if (page_has_private(page))
- do_invalidatepage(page, 0, PAGE_SIZE);
+ do_invalidatepage(page, 0, thp_size(page));
/*
* Some filesystems seem to re-dirty the page even after
diff --git a/mm/util.c b/mm/util.c
index 4e21fe7eae27..4ddb6e186dd5 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -69,7 +69,8 @@ EXPORT_SYMBOL(kstrdup);
* @s: the string to duplicate
* @gfp: the GFP mask used in the kmalloc() call when allocating memory
*
- * Note: Strings allocated by kstrdup_const should be freed by kfree_const.
+ * Note: Strings allocated by kstrdup_const should be freed by kfree_const and
+ * must not be passed to krealloc().
*
* Return: source string if it is in .rodata section otherwise
* fallback to kstrdup.
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 04ac98bf5045..6ae491a8b210 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * linux/mm/vmalloc.c
- *
* Copyright (C) 1993 Linus Torvalds
* Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
* SMP-safe vmalloc/vfree/ioremap, Tigran Aivazian <tigran@veritas.com>, May 2000
@@ -2321,20 +2319,21 @@ static void __vfree(const void *addr)
}
/**
- * vfree - release memory allocated by vmalloc()
- * @addr: memory base address
+ * vfree - Release memory allocated by vmalloc()
+ * @addr: Memory base address
*
- * Free the virtually continuous memory area starting at @addr, as
- * obtained from vmalloc(), vmalloc_32() or __vmalloc(). If @addr is
- * NULL, no operation is performed.
+ * Free the virtually continuous memory area starting at @addr, as obtained
+ * from one of the vmalloc() family of APIs. This will usually also free the
+ * physical memory underlying the virtual allocation, but that memory is
+ * reference counted, so it will not be freed until the last user goes away.
*
- * Must not be called in NMI context (strictly speaking, only if we don't
- * have CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG, but making the calling
- * conventions for vfree() arch-depenedent would be a really bad idea)
+ * If @addr is NULL, no operation is performed.
*
+ * Context:
* May sleep if called *not* from interrupt context.
- *
- * NOTE: assumes that the object at @addr has a size >= sizeof(llist_node)
+ * Must not be called in NMI context (strictly speaking, it could be
+ * if we have CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG, but making the calling
+ * conventions for vfree() arch-depenedent would be a really bad idea).
*/
void vfree(const void *addr)
{
@@ -2376,8 +2375,11 @@ EXPORT_SYMBOL(vunmap);
* @flags: vm_area->flags
* @prot: page protection for the mapping
*
- * Maps @count pages from @pages into contiguous kernel virtual
- * space.
+ * Maps @count pages from @pages into contiguous kernel virtual space.
+ * If @flags contains %VM_MAP_PUT_PAGES the ownership of the pages array itself
+ * (which must be kmalloc or vmalloc memory) and one reference per pages in it
+ * are transferred from the caller to vmap(), and will be freed / dropped when
+ * vfree() is called on the return value.
*
* Return: the address of the area or %NULL on failure
*/
@@ -2403,28 +2405,73 @@ void *vmap(struct page **pages, unsigned int count,
return NULL;
}
+ if (flags & VM_MAP_PUT_PAGES)
+ area->pages = pages;
return area->addr;
}
EXPORT_SYMBOL(vmap);
+#ifdef CONFIG_VMAP_PFN
+struct vmap_pfn_data {
+ unsigned long *pfns;
+ pgprot_t prot;
+ unsigned int idx;
+};
+
+static int vmap_pfn_apply(pte_t *pte, unsigned long addr, void *private)
+{
+ struct vmap_pfn_data *data = private;
+
+ if (WARN_ON_ONCE(pfn_valid(data->pfns[data->idx])))
+ return -EINVAL;
+ *pte = pte_mkspecial(pfn_pte(data->pfns[data->idx++], data->prot));
+ return 0;
+}
+
+/**
+ * vmap_pfn - map an array of PFNs into virtually contiguous space
+ * @pfns: array of PFNs
+ * @count: number of pages to map
+ * @prot: page protection for the mapping
+ *
+ * Maps @count PFNs from @pfns into contiguous kernel virtual space and returns
+ * the start address of the mapping.
+ */
+void *vmap_pfn(unsigned long *pfns, unsigned int count, pgprot_t prot)
+{
+ struct vmap_pfn_data data = { .pfns = pfns, .prot = pgprot_nx(prot) };
+ struct vm_struct *area;
+
+ area = get_vm_area_caller(count * PAGE_SIZE, VM_IOREMAP,
+ __builtin_return_address(0));
+ if (!area)
+ return NULL;
+ if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
+ count * PAGE_SIZE, vmap_pfn_apply, &data)) {
+ free_vm_area(area);
+ return NULL;
+ }
+ return area->addr;
+}
+EXPORT_SYMBOL_GPL(vmap_pfn);
+#endif /* CONFIG_VMAP_PFN */
+
static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
pgprot_t prot, int node)
{
- struct page **pages;
- unsigned int nr_pages, array_size, i;
const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
- const gfp_t alloc_mask = gfp_mask | __GFP_NOWARN;
- const gfp_t highmem_mask = (gfp_mask & (GFP_DMA | GFP_DMA32)) ?
- 0 :
- __GFP_HIGHMEM;
+ unsigned int nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
+ unsigned int array_size = nr_pages * sizeof(struct page *), i;
+ struct page **pages;
- nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
- array_size = (nr_pages * sizeof(struct page *));
+ gfp_mask |= __GFP_NOWARN;
+ if (!(gfp_mask & (GFP_DMA | GFP_DMA32)))
+ gfp_mask |= __GFP_HIGHMEM;
/* Please note that the recursion is strictly bounded. */
if (array_size > PAGE_SIZE) {
- pages = __vmalloc_node(array_size, 1, nested_gfp|highmem_mask,
- node, area->caller);
+ pages = __vmalloc_node(array_size, 1, nested_gfp, node,
+ area->caller);
} else {
pages = kmalloc_node(array_size, nested_gfp, node);
}
@@ -2442,9 +2489,9 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
struct page *page;
if (node == NUMA_NO_NODE)
- page = alloc_page(alloc_mask|highmem_mask);
+ page = alloc_page(gfp_mask);
else
- page = alloc_pages_node(node, alloc_mask|highmem_mask, 0);
+ page = alloc_pages_node(node, gfp_mask, 0);
if (unlikely(!page)) {
/* Successfully allocated i pages, free them in __vfree() */
@@ -3032,54 +3079,6 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
}
EXPORT_SYMBOL(remap_vmalloc_range);
-static int f(pte_t *pte, unsigned long addr, void *data)
-{
- pte_t ***p = data;
-
- if (p) {
- *(*p) = pte;
- (*p)++;
- }
- return 0;
-}
-
-/**
- * alloc_vm_area - allocate a range of kernel address space
- * @size: size of the area
- * @ptes: returns the PTEs for the address space
- *
- * Returns: NULL on failure, vm_struct on success
- *
- * This function reserves a range of kernel address space, and
- * allocates pagetables to map that range. No actual mappings
- * are created.
- *
- * If @ptes is non-NULL, pointers to the PTEs (in init_mm)
- * allocated for the VM area are returned.
- */
-struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes)
-{
- struct vm_struct *area;
-
- area = get_vm_area_caller(size, VM_IOREMAP,
- __builtin_return_address(0));
- if (area == NULL)
- return NULL;
-
- /*
- * This ensures that page tables are constructed for this region
- * of kernel virtual address space and mapped into init_mm.
- */
- if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
- size, f, ptes ? &ptes : NULL)) {
- free_vm_area(area);
- return NULL;
- }
-
- return area;
-}
-EXPORT_SYMBOL_GPL(alloc_vm_area);
-
void free_vm_area(struct vm_struct *area)
{
struct vm_struct *ret;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 879fb57c5045..1b8f0e059767 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -725,8 +725,7 @@ static inline int is_page_cache_freeable(struct page *page)
* that isolated the page, the page cache and optional buffer
* heads at page->private.
*/
- int page_cache_pins = PageTransHuge(page) && PageSwapCache(page) ?
- HPAGE_PMD_NR : 1;
+ int page_cache_pins = thp_nr_pages(page);
return page_count(page) - page_has_private(page) == 1 + page_cache_pins;
}
@@ -2240,7 +2239,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
unsigned long anon_cost, file_cost, total_cost;
int swappiness = mem_cgroup_swappiness(memcg);
- u64 fraction[2];
+ u64 fraction[ANON_AND_FILE];
u64 denominator = 0; /* gcc */
enum scan_balance scan_balance;
unsigned long ap, fp;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 4f7b4ee6aa12..698bc0bc18d1 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -325,7 +325,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
t = __this_cpu_read(pcp->stat_threshold);
- if (unlikely(x > t || x < -t)) {
+ if (unlikely(abs(x) > t)) {
zone_page_state_add(x, zone, item);
x = 0;
}
@@ -350,7 +350,7 @@ void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
t = __this_cpu_read(pcp->stat_threshold);
- if (unlikely(x > t || x < -t)) {
+ if (unlikely(abs(x) > t)) {
node_page_state_add(x, pgdat, item);
x = 0;
}
@@ -511,7 +511,7 @@ static inline void mod_zone_state(struct zone *zone,
o = this_cpu_read(*p);
n = delta + o;
- if (n > t || n < -t) {
+ if (abs(n) > t) {
int os = overstep_mode * (t >> 1) ;
/* Overflow must be added to zone counters */
@@ -573,7 +573,7 @@ static inline void mod_node_state(struct pglist_data *pgdat,
o = this_cpu_read(*p);
n = delta + o;
- if (n > t || n < -t) {
+ if (abs(n) > t) {
int os = overstep_mode * (t >> 1) ;
/* Overflow must be added to node counters */
diff --git a/mm/workingset.c b/mm/workingset.c
index 92e66113a577..975a4d2dd02e 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -216,7 +216,7 @@ static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat,
/**
* workingset_age_nonresident - age non-resident entries as LRU ages
- * @memcg: the lruvec that was aged
+ * @lruvec: the lruvec that was aged
* @nr_pages: the number of pages to count
*
* As in-memory pages are aged, non-resident pages need to be aged as
@@ -519,12 +519,11 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
void *arg) __must_hold(lru_lock)
{
struct xa_node *node = container_of(item, struct xa_node, private_list);
- XA_STATE(xas, node->array, 0);
struct address_space *mapping;
int ret;
/*
- * Page cache insertions and deletions synchroneously maintain
+ * Page cache insertions and deletions synchronously maintain
* the shadow node LRU under the i_pages lock and the
* lru_lock. Because the page cache tree is emptied before
* the inode can be destroyed, holding the lru_lock pins any
@@ -559,15 +558,7 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
if (WARN_ON_ONCE(node->count != node->nr_values))
goto out_invalid;
mapping->nrexceptional -= node->nr_values;
- xas.xa_node = xa_parent_locked(&mapping->i_pages, node);
- xas.xa_offset = node->offset;
- xas.xa_shift = node->shift + XA_CHUNK_SHIFT;
- xas_set_update(&xas, workingset_update_node);
- /*
- * We could store a shadow entry here which was the minimum of the
- * shadow entries we were tracking ...
- */
- xas_store(&xas, NULL);
+ xa_delete_node(node, workingset_update_node);
__inc_lruvec_slab_state(node, WORKINGSET_NODERECLAIM);
out_invalid:
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index c36fdff9a371..918c7b019b3d 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -1122,10 +1122,16 @@ static inline int __zs_cpu_up(struct mapping_area *area)
*/
if (area->vm)
return 0;
- area->vm = alloc_vm_area(PAGE_SIZE * 2, NULL);
+ area->vm = get_vm_area(PAGE_SIZE * 2, 0);
if (!area->vm)
return -ENOMEM;
- return 0;
+
+ /*
+ * Populate ptes in advance to avoid pte allocation with GFP_KERNEL
+ * in non-preemtible context of zs_map_object.
+ */
+ return apply_to_page_range(&init_mm, (unsigned long)area->vm->addr,
+ PAGE_SIZE * 2, NULL, NULL);
}
static inline void __zs_cpu_down(struct mapping_area *area)
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 195b40c5dae4..3fecad369592 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -5,7 +5,7 @@
NetApp provides this source code under the GPL v2 License.
The GPL v2 license is available at
-http://opensource.org/licenses/gpl-license.php.
+https://opensource.org/licenses/gpl-license.php.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 62e0b6c1e8cf..3259120462ed 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -47,10 +47,6 @@
# define RPCDBG_FACILITY RPCDBG_CALL
#endif
-#define dprint_status(t) \
- dprintk("RPC: %5u %s (status %d)\n", t->tk_pid, \
- __func__, t->tk_status)
-
/*
* All RPC clients are linked into this list
*/
@@ -1639,10 +1635,6 @@ call_start(struct rpc_task *task)
int idx = task->tk_msg.rpc_proc->p_statidx;
trace_rpc_request(task);
- dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid,
- clnt->cl_program->name, clnt->cl_vers,
- rpc_proc_name(task),
- (RPC_IS_ASYNC(task) ? "async" : "sync"));
/* Increment call count (version might not be valid for ping) */
if (clnt->cl_program->version[clnt->cl_vers])
@@ -1658,8 +1650,6 @@ call_start(struct rpc_task *task)
static void
call_reserve(struct rpc_task *task)
{
- dprint_status(task);
-
task->tk_status = 0;
task->tk_action = call_reserveresult;
xprt_reserve(task);
@@ -1675,8 +1665,6 @@ call_reserveresult(struct rpc_task *task)
{
int status = task->tk_status;
- dprint_status(task);
-
/*
* After a call to xprt_reserve(), we must have either
* a request slot or else an error status.
@@ -1717,8 +1705,6 @@ call_reserveresult(struct rpc_task *task)
static void
call_retry_reserve(struct rpc_task *task)
{
- dprint_status(task);
-
task->tk_status = 0;
task->tk_action = call_reserveresult;
xprt_retry_reserve(task);
@@ -1730,8 +1716,6 @@ call_retry_reserve(struct rpc_task *task)
static void
call_refresh(struct rpc_task *task)
{
- dprint_status(task);
-
task->tk_action = call_refreshresult;
task->tk_status = 0;
task->tk_client->cl_stats->rpcauthrefresh++;
@@ -1746,8 +1730,6 @@ call_refreshresult(struct rpc_task *task)
{
int status = task->tk_status;
- dprint_status(task);
-
task->tk_status = 0;
task->tk_action = call_refresh;
switch (status) {
@@ -1770,12 +1752,10 @@ call_refreshresult(struct rpc_task *task)
if (!task->tk_cred_retry)
break;
task->tk_cred_retry--;
- dprintk("RPC: %5u %s: retry refresh creds\n",
- task->tk_pid, __func__);
+ trace_rpc_retry_refresh_status(task);
return;
}
- dprintk("RPC: %5u %s: refresh creds failed with error %d\n",
- task->tk_pid, __func__, status);
+ trace_rpc_refresh_status(task);
rpc_call_rpcerror(task, status);
}
@@ -1792,8 +1772,6 @@ call_allocate(struct rpc_task *task)
const struct rpc_procinfo *proc = task->tk_msg.rpc_proc;
int status;
- dprint_status(task);
-
task->tk_status = 0;
task->tk_action = call_encode;
@@ -1823,6 +1801,7 @@ call_allocate(struct rpc_task *task)
req->rq_rcvsize <<= 2;
status = xprt->ops->buf_alloc(task);
+ trace_rpc_buf_alloc(task, status);
xprt_inject_disconnect(xprt);
if (status == 0)
return;
@@ -1831,8 +1810,6 @@ call_allocate(struct rpc_task *task)
return;
}
- dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid);
-
if (RPC_IS_ASYNC(task) || !fatal_signal_pending(current)) {
task->tk_action = call_allocate;
rpc_delay(task, HZ>>4);
@@ -1883,7 +1860,7 @@ call_encode(struct rpc_task *task)
{
if (!rpc_task_need_encode(task))
goto out;
- dprint_status(task);
+
/* Dequeue task from the receive queue while we're encoding */
xprt_request_dequeue_xprt(task);
/* Encode here so that rpcsec_gss can use correct sequence number. */
@@ -1902,8 +1879,7 @@ call_encode(struct rpc_task *task)
} else {
task->tk_action = call_refresh;
task->tk_cred_retry--;
- dprintk("RPC: %5u %s: retry refresh creds\n",
- task->tk_pid, __func__);
+ trace_rpc_retry_refresh_status(task);
}
break;
default:
@@ -1960,8 +1936,6 @@ call_bind(struct rpc_task *task)
return;
}
- dprint_status(task);
-
task->tk_action = call_bind_status;
if (!xprt_prepare_transmit(task))
return;
@@ -1983,8 +1957,6 @@ call_bind_status(struct rpc_task *task)
return;
}
- dprint_status(task);
- trace_rpc_bind_status(task);
if (task->tk_status >= 0)
goto out_next;
if (xprt_bound(xprt)) {
@@ -1994,12 +1966,10 @@ call_bind_status(struct rpc_task *task)
switch (task->tk_status) {
case -ENOMEM:
- dprintk("RPC: %5u rpcbind out of memory\n", task->tk_pid);
rpc_delay(task, HZ >> 2);
goto retry_timeout;
case -EACCES:
- dprintk("RPC: %5u remote rpcbind: RPC program/version "
- "unavailable\n", task->tk_pid);
+ trace_rpcb_prog_unavail_err(task);
/* fail immediately if this is an RPC ping */
if (task->tk_msg.rpc_proc->p_proc == 0) {
status = -EOPNOTSUPP;
@@ -2016,17 +1986,14 @@ call_bind_status(struct rpc_task *task)
case -EAGAIN:
goto retry_timeout;
case -ETIMEDOUT:
- dprintk("RPC: %5u rpcbind request timed out\n",
- task->tk_pid);
+ trace_rpcb_timeout_err(task);
goto retry_timeout;
case -EPFNOSUPPORT:
/* server doesn't support any rpcbind version we know of */
- dprintk("RPC: %5u unrecognized remote rpcbind service\n",
- task->tk_pid);
+ trace_rpcb_bind_version_err(task);
break;
case -EPROTONOSUPPORT:
- dprintk("RPC: %5u remote rpcbind version unavailable, retrying\n",
- task->tk_pid);
+ trace_rpcb_bind_version_err(task);
goto retry_timeout;
case -ECONNREFUSED: /* connection problems */
case -ECONNRESET:
@@ -2037,8 +2004,7 @@ call_bind_status(struct rpc_task *task)
case -EHOSTUNREACH:
case -ENETUNREACH:
case -EPIPE:
- dprintk("RPC: %5u remote rpcbind unreachable: %d\n",
- task->tk_pid, task->tk_status);
+ trace_rpcb_unreachable_err(task);
if (!RPC_IS_SOFTCONN(task)) {
rpc_delay(task, 5*HZ);
goto retry_timeout;
@@ -2046,8 +2012,7 @@ call_bind_status(struct rpc_task *task)
status = task->tk_status;
break;
default:
- dprintk("RPC: %5u unrecognized rpcbind error (%d)\n",
- task->tk_pid, -task->tk_status);
+ trace_rpcb_unrecognized_err(task);
}
rpc_call_rpcerror(task, status);
@@ -2079,10 +2044,6 @@ call_connect(struct rpc_task *task)
return;
}
- dprintk("RPC: %5u call_connect xprt %p %s connected\n",
- task->tk_pid, xprt,
- (xprt_connected(xprt) ? "is" : "is not"));
-
task->tk_action = call_connect_status;
if (task->tk_status < 0)
return;
@@ -2110,7 +2071,6 @@ call_connect_status(struct rpc_task *task)
return;
}
- dprint_status(task);
trace_rpc_connect_status(task);
if (task->tk_status == 0) {
@@ -2178,8 +2138,6 @@ call_transmit(struct rpc_task *task)
return;
}
- dprint_status(task);
-
task->tk_action = call_transmit_status;
if (!xprt_prepare_transmit(task))
return;
@@ -2214,7 +2172,6 @@ call_transmit_status(struct rpc_task *task)
switch (task->tk_status) {
default:
- dprint_status(task);
break;
case -EBADMSG:
task->tk_status = 0;
@@ -2296,8 +2253,6 @@ call_bc_transmit_status(struct rpc_task *task)
if (rpc_task_transmitted(task))
task->tk_status = 0;
- dprint_status(task);
-
switch (task->tk_status) {
case 0:
/* Success */
@@ -2357,8 +2312,6 @@ call_status(struct rpc_task *task)
if (!task->tk_msg.rpc_proc->p_proc)
trace_xprt_ping(task->tk_xprt, task->tk_status);
- dprint_status(task);
-
status = task->tk_status;
if (status >= 0) {
task->tk_action = call_decode;
@@ -2405,7 +2358,8 @@ call_status(struct rpc_task *task)
goto out_exit;
}
task->tk_action = call_encode;
- rpc_check_timeout(task);
+ if (status != -ECONNRESET && status != -ECONNABORTED)
+ rpc_check_timeout(task);
return;
out_exit:
rpc_call_rpcerror(task, status);
@@ -2433,7 +2387,7 @@ rpc_check_timeout(struct rpc_task *task)
if (xprt_adjust_timeout(task->tk_rqstp) == 0)
return;
- dprintk("RPC: %5u call_timeout (major)\n", task->tk_pid);
+ trace_rpc_timeout_status(task);
task->tk_timeouts++;
if (RPC_IS_SOFTCONN(task) && !rpc_check_connected(task->tk_rqstp)) {
@@ -2492,8 +2446,6 @@ call_decode(struct rpc_task *task)
struct xdr_stream xdr;
int err;
- dprint_status(task);
-
if (!task->tk_msg.rpc_proc->p_decode) {
task->tk_action = rpc_exit_task;
return;
@@ -2537,8 +2489,6 @@ out:
case 0:
task->tk_action = rpc_exit_task;
task->tk_status = rpcauth_unwrap_resp(task, &xdr);
- dprintk("RPC: %5u %s result %d\n",
- task->tk_pid, __func__, task->tk_status);
return;
case -EAGAIN:
task->tk_status = 0;
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 4a67685c83eb..38fe2ce8a5aa 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -31,11 +31,9 @@
#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/xprtsock.h>
-#include "netns.h"
+#include <trace/events/sunrpc.h>
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_BIND
-#endif
+#include "netns.h"
#define RPCBIND_SOCK_PATHNAME "/var/run/rpcbind.sock"
@@ -216,10 +214,6 @@ static void rpcb_set_local(struct net *net, struct rpc_clnt *clnt,
sn->rpcb_is_af_local = is_af_local ? 1 : 0;
smp_wmb();
sn->rpcb_users = 1;
- dprintk("RPC: created new rpcb local clients (rpcb_local_clnt: "
- "%p, rpcb_local_clnt4: %p) for net %x%s\n",
- sn->rpcb_local_clnt, sn->rpcb_local_clnt4,
- net->ns.inum, (net == &init_net) ? " (init_net)" : "");
}
/*
@@ -261,19 +255,13 @@ static int rpcb_create_local_unix(struct net *net)
*/
clnt = rpc_create(&args);
if (IS_ERR(clnt)) {
- dprintk("RPC: failed to create AF_LOCAL rpcbind "
- "client (errno %ld).\n", PTR_ERR(clnt));
result = PTR_ERR(clnt);
goto out;
}
clnt4 = rpc_bind_new_program(clnt, &rpcb_program, RPCBVERS_4);
- if (IS_ERR(clnt4)) {
- dprintk("RPC: failed to bind second program to "
- "rpcbind v4 client (errno %ld).\n",
- PTR_ERR(clnt4));
+ if (IS_ERR(clnt4))
clnt4 = NULL;
- }
rpcb_set_local(net, clnt, clnt4, true);
@@ -309,8 +297,6 @@ static int rpcb_create_local_net(struct net *net)
clnt = rpc_create(&args);
if (IS_ERR(clnt)) {
- dprintk("RPC: failed to create local rpcbind "
- "client (errno %ld).\n", PTR_ERR(clnt));
result = PTR_ERR(clnt);
goto out;
}
@@ -321,12 +307,8 @@ static int rpcb_create_local_net(struct net *net)
* v4 upcalls.
*/
clnt4 = rpc_bind_new_program(clnt, &rpcb_program, RPCBVERS_4);
- if (IS_ERR(clnt4)) {
- dprintk("RPC: failed to bind second program to "
- "rpcbind v4 client (errno %ld).\n",
- PTR_ERR(clnt4));
+ if (IS_ERR(clnt4))
clnt4 = NULL;
- }
rpcb_set_local(net, clnt, clnt4, false);
@@ -403,11 +385,8 @@ static int rpcb_register_call(struct sunrpc_net *sn, struct rpc_clnt *clnt, stru
msg->rpc_resp = &result;
error = rpc_call_sync(clnt, msg, flags);
- if (error < 0) {
- dprintk("RPC: failed to contact local rpcbind "
- "server (errno %d).\n", -error);
+ if (error < 0)
return error;
- }
if (!result)
return -EACCES;
@@ -461,9 +440,7 @@ int rpcb_register(struct net *net, u32 prog, u32 vers, int prot, unsigned short
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
bool is_set = false;
- dprintk("RPC: %sregistering (%u, %u, %d, %u) with local "
- "rpcbind\n", (port ? "" : "un"),
- prog, vers, prot, port);
+ trace_pmap_register(prog, vers, prot, port);
msg.rpc_proc = &rpcb_procedures2[RPCBPROC_UNSET];
if (port != 0) {
@@ -489,11 +466,6 @@ static int rpcb_register_inet4(struct sunrpc_net *sn,
map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL);
- dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with "
- "local rpcbind\n", (port ? "" : "un"),
- map->r_prog, map->r_vers,
- map->r_addr, map->r_netid);
-
msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET];
if (port != 0) {
msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET];
@@ -520,11 +492,6 @@ static int rpcb_register_inet6(struct sunrpc_net *sn,
map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL);
- dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with "
- "local rpcbind\n", (port ? "" : "un"),
- map->r_prog, map->r_vers,
- map->r_addr, map->r_netid);
-
msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET];
if (port != 0) {
msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET];
@@ -541,9 +508,7 @@ static int rpcb_unregister_all_protofamilies(struct sunrpc_net *sn,
{
struct rpcbind_args *map = msg->rpc_argp;
- dprintk("RPC: unregistering [%u, %u, '%s'] with "
- "local rpcbind\n",
- map->r_prog, map->r_vers, map->r_netid);
+ trace_rpcb_unregister(map->r_prog, map->r_vers, map->r_netid);
map->r_addr = "";
msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET];
@@ -615,6 +580,8 @@ int rpcb_v4_register(struct net *net, const u32 program, const u32 version,
if (address == NULL)
return rpcb_unregister_all_protofamilies(sn, &msg);
+ trace_rpcb_register(map.r_prog, map.r_vers, map.r_addr, map.r_netid);
+
switch (address->sa_family) {
case AF_INET:
return rpcb_register_inet4(sn, address, &msg);
@@ -693,18 +660,12 @@ void rpcb_getport_async(struct rpc_task *task)
rcu_read_unlock();
xprt = xprt_get(task->tk_xprt);
- dprintk("RPC: %5u %s(%s, %u, %u, %d)\n",
- task->tk_pid, __func__,
- xprt->servername, clnt->cl_prog, clnt->cl_vers, xprt->prot);
-
/* Put self on the wait queue to ensure we get notified if
* some other task is already attempting to bind the port */
rpc_sleep_on_timeout(&xprt->binding, task,
NULL, jiffies + xprt->bind_timeout);
if (xprt_test_and_set_binding(xprt)) {
- dprintk("RPC: %5u %s: waiting for another binder\n",
- task->tk_pid, __func__);
xprt_put(xprt);
return;
}
@@ -712,8 +673,6 @@ void rpcb_getport_async(struct rpc_task *task)
/* Someone else may have bound if we slept */
if (xprt_bound(xprt)) {
status = 0;
- dprintk("RPC: %5u %s: already bound\n",
- task->tk_pid, __func__);
goto bailout_nofree;
}
@@ -732,20 +691,15 @@ void rpcb_getport_async(struct rpc_task *task)
break;
default:
status = -EAFNOSUPPORT;
- dprintk("RPC: %5u %s: bad address family\n",
- task->tk_pid, __func__);
goto bailout_nofree;
}
if (proc == NULL) {
xprt->bind_index = 0;
status = -EPFNOSUPPORT;
- dprintk("RPC: %5u %s: no more getport versions available\n",
- task->tk_pid, __func__);
goto bailout_nofree;
}
- dprintk("RPC: %5u %s: trying rpcbind version %u\n",
- task->tk_pid, __func__, bind_version);
+ trace_rpcb_getport(clnt, task, bind_version);
rpcb_clnt = rpcb_create(xprt->xprt_net,
clnt->cl_nodename,
@@ -754,16 +708,12 @@ void rpcb_getport_async(struct rpc_task *task)
clnt->cl_cred);
if (IS_ERR(rpcb_clnt)) {
status = PTR_ERR(rpcb_clnt);
- dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n",
- task->tk_pid, __func__, PTR_ERR(rpcb_clnt));
goto bailout_nofree;
}
map = kzalloc(sizeof(struct rpcbind_args), GFP_NOFS);
if (!map) {
status = -ENOMEM;
- dprintk("RPC: %5u %s: no memory available\n",
- task->tk_pid, __func__);
goto bailout_release_client;
}
map->r_prog = clnt->cl_prog;
@@ -780,8 +730,6 @@ void rpcb_getport_async(struct rpc_task *task)
map->r_addr = rpc_sockaddr2uaddr(sap, GFP_NOFS);
if (!map->r_addr) {
status = -ENOMEM;
- dprintk("RPC: %5u %s: no memory available\n",
- task->tk_pid, __func__);
goto bailout_free_args;
}
map->r_owner = "";
@@ -818,34 +766,33 @@ static void rpcb_getport_done(struct rpc_task *child, void *data)
{
struct rpcbind_args *map = data;
struct rpc_xprt *xprt = map->r_xprt;
- int status = child->tk_status;
+
+ map->r_status = child->tk_status;
/* Garbage reply: retry with a lesser rpcbind version */
- if (status == -EIO)
- status = -EPROTONOSUPPORT;
+ if (map->r_status == -EIO)
+ map->r_status = -EPROTONOSUPPORT;
/* rpcbind server doesn't support this rpcbind protocol version */
- if (status == -EPROTONOSUPPORT)
+ if (map->r_status == -EPROTONOSUPPORT)
xprt->bind_index++;
- if (status < 0) {
+ if (map->r_status < 0) {
/* rpcbind server not available on remote host? */
- xprt->ops->set_port(xprt, 0);
+ map->r_port = 0;
+
} else if (map->r_port == 0) {
/* Requested RPC service wasn't registered on remote host */
- xprt->ops->set_port(xprt, 0);
- status = -EACCES;
+ map->r_status = -EACCES;
} else {
/* Succeeded */
- xprt->ops->set_port(xprt, map->r_port);
- xprt_set_bound(xprt);
- status = 0;
+ map->r_status = 0;
}
- dprintk("RPC: %5u rpcb_getport_done(status %d, port %u)\n",
- child->tk_pid, status, map->r_port);
-
- map->r_status = status;
+ trace_rpcb_setport(child, map->r_status, map->r_port);
+ xprt->ops->set_port(xprt, map->r_port);
+ if (map->r_port)
+ xprt_set_bound(xprt);
}
/*
@@ -858,11 +805,6 @@ static void rpcb_enc_mapping(struct rpc_rqst *req, struct xdr_stream *xdr,
const struct rpcbind_args *rpcb = data;
__be32 *p;
- dprintk("RPC: %5u encoding PMAP_%s call (%u, %u, %d, %u)\n",
- req->rq_task->tk_pid,
- req->rq_task->tk_msg.rpc_proc->p_name,
- rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port);
-
p = xdr_reserve_space(xdr, RPCB_mappingargs_sz << 2);
*p++ = cpu_to_be32(rpcb->r_prog);
*p++ = cpu_to_be32(rpcb->r_vers);
@@ -884,8 +826,6 @@ static int rpcb_dec_getport(struct rpc_rqst *req, struct xdr_stream *xdr,
return -EIO;
port = be32_to_cpup(p);
- dprintk("RPC: %5u PMAP_%s result: %lu\n", req->rq_task->tk_pid,
- req->rq_task->tk_msg.rpc_proc->p_name, port);
if (unlikely(port > USHRT_MAX))
return -EIO;
@@ -906,11 +846,6 @@ static int rpcb_dec_set(struct rpc_rqst *req, struct xdr_stream *xdr,
*boolp = 0;
if (*p != xdr_zero)
*boolp = 1;
-
- dprintk("RPC: %5u RPCB_%s call %s\n",
- req->rq_task->tk_pid,
- req->rq_task->tk_msg.rpc_proc->p_name,
- (*boolp ? "succeeded" : "failed"));
return 0;
}
@@ -935,12 +870,6 @@ static void rpcb_enc_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr,
const struct rpcbind_args *rpcb = data;
__be32 *p;
- dprintk("RPC: %5u encoding RPCB_%s call (%u, %u, '%s', '%s')\n",
- req->rq_task->tk_pid,
- req->rq_task->tk_msg.rpc_proc->p_name,
- rpcb->r_prog, rpcb->r_vers,
- rpcb->r_netid, rpcb->r_addr);
-
p = xdr_reserve_space(xdr, (RPCB_program_sz + RPCB_version_sz) << 2);
*p++ = cpu_to_be32(rpcb->r_prog);
*p = cpu_to_be32(rpcb->r_vers);
@@ -970,11 +899,8 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr,
* If the returned universal address is a null string,
* the requested RPC service was not registered.
*/
- if (len == 0) {
- dprintk("RPC: %5u RPCB reply: program not registered\n",
- req->rq_task->tk_pid);
+ if (len == 0)
return 0;
- }
if (unlikely(len > RPCBIND_MAXUADDRLEN))
goto out_fail;
@@ -982,8 +908,6 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr,
p = xdr_inline_decode(xdr, len);
if (unlikely(p == NULL))
goto out_fail;
- dprintk("RPC: %5u RPCB_%s reply: %*pE\n", req->rq_task->tk_pid,
- req->rq_task->tk_msg.rpc_proc->p_name, len, (char *)p);
if (rpc_uaddr2sockaddr(req->rq_xprt->xprt_net, (char *)p, len,
sap, sizeof(address)) == 0)
@@ -993,9 +917,6 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr,
return 0;
out_fail:
- dprintk("RPC: %5u malformed RPCB_%s reply\n",
- req->rq_task->tk_pid,
- req->rq_task->tk_msg.rpc_proc->p_name);
return -EIO;
}
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 7eba20a88438..f06d7c315017 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -27,10 +27,6 @@
#include "sunrpc.h"
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-#define RPCDBG_FACILITY RPCDBG_SCHED
-#endif
-
#define CREATE_TRACE_POINTS
#include <trace/events/sunrpc.h>
@@ -85,7 +81,6 @@ __rpc_disable_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
{
if (list_empty(&task->u.tk_wait.timer_list))
return;
- dprintk("RPC: %5u disabling timer\n", task->tk_pid);
task->tk_timeout = 0;
list_del(&task->u.tk_wait.timer_list);
if (list_empty(&queue->timer_list.list))
@@ -111,9 +106,6 @@ static void
__rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task,
unsigned long timeout)
{
- dprintk("RPC: %5u setting alarm for %u ms\n",
- task->tk_pid, jiffies_to_msecs(timeout - jiffies));
-
task->tk_timeout = timeout;
if (list_empty(&queue->timer_list.list) || time_before(timeout, queue->timer_list.expires))
rpc_set_queue_timer(queue, timeout);
@@ -216,9 +208,6 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
/* barrier matches the read in rpc_wake_up_task_queue_locked() */
smp_wmb();
rpc_set_queued(task);
-
- dprintk("RPC: %5u added to queue %p \"%s\"\n",
- task->tk_pid, queue, rpc_qname(queue));
}
/*
@@ -241,8 +230,6 @@ static void __rpc_remove_wait_queue(struct rpc_wait_queue *queue, struct rpc_tas
else
list_del(&task->u.tk_wait.list);
queue->qlen--;
- dprintk("RPC: %5u removed from queue %p \"%s\"\n",
- task->tk_pid, queue, rpc_qname(queue));
}
static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname, unsigned char nr_queues)
@@ -382,13 +369,9 @@ static void __rpc_do_sleep_on_priority(struct rpc_wait_queue *q,
struct rpc_task *task,
unsigned char queue_priority)
{
- dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n",
- task->tk_pid, rpc_qname(q), jiffies);
-
trace_rpc_task_sleep(task, q);
__rpc_add_wait_queue(q, task, queue_priority);
-
}
static void __rpc_sleep_on_priority(struct rpc_wait_queue *q,
@@ -510,9 +493,6 @@ static void __rpc_do_wake_up_task_on_wq(struct workqueue_struct *wq,
struct rpc_wait_queue *queue,
struct rpc_task *task)
{
- dprintk("RPC: %5u __rpc_wake_up_task (now %lu)\n",
- task->tk_pid, jiffies);
-
/* Has the task been executed yet? If not, we cannot wake it up! */
if (!RPC_IS_ACTIVATED(task)) {
printk(KERN_ERR "RPC: Inactive task (%p) being woken up!\n", task);
@@ -524,8 +504,6 @@ static void __rpc_do_wake_up_task_on_wq(struct workqueue_struct *wq,
__rpc_remove_wait_queue(queue, task);
rpc_make_runnable(wq, task);
-
- dprintk("RPC: __rpc_wake_up_task done\n");
}
/*
@@ -663,8 +641,6 @@ struct rpc_task *rpc_wake_up_first_on_wq(struct workqueue_struct *wq,
{
struct rpc_task *task = NULL;
- dprintk("RPC: wake_up_first(%p \"%s\")\n",
- queue, rpc_qname(queue));
spin_lock(&queue->lock);
task = __rpc_find_next_queued(queue);
if (task != NULL)
@@ -770,7 +746,7 @@ static void __rpc_queue_timer_fn(struct work_struct *work)
list_for_each_entry_safe(task, n, &queue->timer_list.list, u.tk_wait.timer_list) {
timeo = task->tk_timeout;
if (time_after_eq(now, timeo)) {
- dprintk("RPC: %5u timeout\n", task->tk_pid);
+ trace_rpc_task_timeout(task, task->tk_action);
task->tk_status = -ETIMEDOUT;
rpc_wake_up_task_queue_locked(queue, task);
continue;
@@ -885,9 +861,6 @@ static void __rpc_execute(struct rpc_task *task)
int task_is_async = RPC_IS_ASYNC(task);
int status = 0;
- dprintk("RPC: %5u __rpc_execute flags=0x%x\n",
- task->tk_pid, task->tk_flags);
-
WARN_ON_ONCE(RPC_IS_QUEUED(task));
if (RPC_IS_QUEUED(task))
return;
@@ -947,7 +920,7 @@ static void __rpc_execute(struct rpc_task *task)
return;
/* sync task: sleep here */
- dprintk("RPC: %5u sync task going to sleep\n", task->tk_pid);
+ trace_rpc_task_sync_sleep(task, task->tk_action);
status = out_of_line_wait_on_bit(&task->tk_runstate,
RPC_TASK_QUEUED, rpc_wait_bit_killable,
TASK_KILLABLE);
@@ -963,11 +936,9 @@ static void __rpc_execute(struct rpc_task *task)
task->tk_rpc_status = -ERESTARTSYS;
rpc_exit(task, -ERESTARTSYS);
}
- dprintk("RPC: %5u sync task resuming\n", task->tk_pid);
+ trace_rpc_task_sync_wake(task, task->tk_action);
}
- dprintk("RPC: %5u return %d, status %d\n", task->tk_pid, status,
- task->tk_status);
/* Release all resources associated with the task */
rpc_release_task(task);
}
@@ -1036,8 +1007,6 @@ int rpc_malloc(struct rpc_task *task)
return -ENOMEM;
buf->len = size;
- dprintk("RPC: %5u allocated buffer of size %zu at %p\n",
- task->tk_pid, size, buf);
rqst->rq_buffer = buf->data;
rqst->rq_rbuffer = (char *)rqst->rq_buffer + rqst->rq_callsize;
return 0;
@@ -1058,9 +1027,6 @@ void rpc_free(struct rpc_task *task)
buf = container_of(buffer, struct rpc_buffer, data);
size = buf->len;
- dprintk("RPC: freeing buffer of size %zu at %p\n",
- size, buf);
-
if (size <= RPC_BUFFER_MAXSIZE)
mempool_free(buf, rpc_buffer_mempool);
else
@@ -1095,9 +1061,6 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta
task->tk_action = rpc_prepare_task;
rpc_init_task_statistics(task);
-
- dprintk("RPC: new task initialized, procpid %u\n",
- task_pid_nr(current));
}
static struct rpc_task *
@@ -1121,7 +1084,6 @@ struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data)
rpc_init_task(task, setup_data);
task->tk_flags |= flags;
- dprintk("RPC: allocated task %p\n", task);
return task;
}
@@ -1151,10 +1113,8 @@ static void rpc_free_task(struct rpc_task *task)
put_rpccred(task->tk_op_cred);
rpc_release_calldata(task->tk_ops, task->tk_calldata);
- if (tk_flags & RPC_TASK_DYNAMIC) {
- dprintk("RPC: %5u freeing task\n", task->tk_pid);
+ if (tk_flags & RPC_TASK_DYNAMIC)
mempool_free(task, rpc_task_mempool);
- }
}
static void rpc_async_release(struct work_struct *work)
@@ -1208,8 +1168,6 @@ EXPORT_SYMBOL_GPL(rpc_put_task_async);
static void rpc_release_task(struct rpc_task *task)
{
- dprintk("RPC: %5u release task\n", task->tk_pid);
-
WARN_ON_ONCE(RPC_IS_QUEUED(task));
rpc_release_resources_task(task);
@@ -1250,7 +1208,6 @@ static int rpciod_start(void)
/*
* Create the rpciod thread and wait for it to start.
*/
- dprintk("RPC: creating workqueue rpciod\n");
wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
if (!wq)
goto out_failed;
@@ -1275,7 +1232,6 @@ static void rpciod_stop(void)
if (rpciod_workqueue == NULL)
return;
- dprintk("RPC: destroying workqueue rpciod\n");
wq = rpciod_workqueue;
rpciod_workqueue = NULL;
diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h
index f6fe2e6cd65a..2f59464e6524 100644
--- a/net/sunrpc/sunrpc.h
+++ b/net/sunrpc/sunrpc.h
@@ -4,7 +4,7 @@
NetApp provides this source code under the GPL v2 License.
The GPL v2 license is available at
-http://opensource.org/licenses/gpl-license.php.
+https://opensource.org/licenses/gpl-license.php.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index be11d672b5b9..3feff529a764 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -19,6 +19,9 @@
#include <linux/bvec.h>
#include <trace/events/sunrpc.h>
+static void _copy_to_pages(struct page **, size_t, const char *, size_t);
+
+
/*
* XDR functions for basic NFS types
*/
@@ -202,6 +205,88 @@ EXPORT_SYMBOL_GPL(xdr_inline_pages);
*/
/**
+ * _shift_data_left_pages
+ * @pages: vector of pages containing both the source and dest memory area.
+ * @pgto_base: page vector address of destination
+ * @pgfrom_base: page vector address of source
+ * @len: number of bytes to copy
+ *
+ * Note: the addresses pgto_base and pgfrom_base are both calculated in
+ * the same way:
+ * if a memory area starts at byte 'base' in page 'pages[i]',
+ * then its address is given as (i << PAGE_CACHE_SHIFT) + base
+ * Alse note: pgto_base must be < pgfrom_base, but the memory areas
+ * they point to may overlap.
+ */
+static void
+_shift_data_left_pages(struct page **pages, size_t pgto_base,
+ size_t pgfrom_base, size_t len)
+{
+ struct page **pgfrom, **pgto;
+ char *vfrom, *vto;
+ size_t copy;
+
+ BUG_ON(pgfrom_base <= pgto_base);
+
+ pgto = pages + (pgto_base >> PAGE_SHIFT);
+ pgfrom = pages + (pgfrom_base >> PAGE_SHIFT);
+
+ pgto_base &= ~PAGE_MASK;
+ pgfrom_base &= ~PAGE_MASK;
+
+ do {
+ if (pgto_base >= PAGE_SIZE) {
+ pgto_base = 0;
+ pgto++;
+ }
+ if (pgfrom_base >= PAGE_SIZE){
+ pgfrom_base = 0;
+ pgfrom++;
+ }
+
+ copy = len;
+ if (copy > (PAGE_SIZE - pgto_base))
+ copy = PAGE_SIZE - pgto_base;
+ if (copy > (PAGE_SIZE - pgfrom_base))
+ copy = PAGE_SIZE - pgfrom_base;
+
+ vto = kmap_atomic(*pgto);
+ if (*pgto != *pgfrom) {
+ vfrom = kmap_atomic(*pgfrom);
+ memcpy(vto + pgto_base, vfrom + pgfrom_base, copy);
+ kunmap_atomic(vfrom);
+ } else
+ memmove(vto + pgto_base, vto + pgfrom_base, copy);
+ flush_dcache_page(*pgto);
+ kunmap_atomic(vto);
+
+ pgto_base += copy;
+ pgfrom_base += copy;
+
+ } while ((len -= copy) != 0);
+}
+
+static void
+_shift_data_left_tail(struct xdr_buf *buf, unsigned int pgto, size_t len)
+{
+ struct kvec *tail = buf->tail;
+
+ if (len > tail->iov_len)
+ len = tail->iov_len;
+
+ _copy_to_pages(buf->pages,
+ buf->page_base + pgto,
+ (char *)tail->iov_base,
+ len);
+ tail->iov_len -= len;
+
+ if (tail->iov_len > 0)
+ memmove((char *)tail->iov_base,
+ tail->iov_base + len,
+ tail->iov_len);
+}
+
+/**
* _shift_data_right_pages
* @pages: vector of pages containing both the source and dest memory area.
* @pgto_base: page vector address of destination
@@ -266,6 +351,46 @@ _shift_data_right_pages(struct page **pages, size_t pgto_base,
} while ((len -= copy) != 0);
}
+static unsigned int
+_shift_data_right_tail(struct xdr_buf *buf, unsigned int pgfrom, size_t len)
+{
+ struct kvec *tail = buf->tail;
+ unsigned int tailbuf_len;
+ unsigned int result = 0;
+ size_t copy;
+
+ tailbuf_len = buf->buflen - buf->head->iov_len - buf->page_len;
+
+ /* Shift the tail first */
+ if (tailbuf_len != 0) {
+ unsigned int free_space = tailbuf_len - tail->iov_len;
+
+ if (len < free_space)
+ free_space = len;
+ if (len > free_space)
+ len = free_space;
+
+ tail->iov_len += free_space;
+ copy = len;
+
+ if (tail->iov_len > len) {
+ char *p = (char *)tail->iov_base + len;
+ memmove(p, tail->iov_base, tail->iov_len - free_space);
+ result += tail->iov_len - free_space;
+ } else
+ copy = tail->iov_len;
+
+ /* Copy from the inlined pages into the tail */
+ _copy_from_pages((char *)tail->iov_base,
+ buf->pages,
+ buf->page_base + pgfrom,
+ copy);
+ result += copy;
+ }
+
+ return result;
+}
+
/**
* _copy_to_pages
* @pages: array of pages
@@ -351,6 +476,38 @@ _copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len)
EXPORT_SYMBOL_GPL(_copy_from_pages);
/**
+ * _zero_pages
+ * @pages: array of pages
+ * @pgbase: beginning page vector address
+ * @len: length
+ */
+static void
+_zero_pages(struct page **pages, size_t pgbase, size_t len)
+{
+ struct page **page;
+ char *vpage;
+ size_t zero;
+
+ page = pages + (pgbase >> PAGE_SHIFT);
+ pgbase &= ~PAGE_MASK;
+
+ do {
+ zero = PAGE_SIZE - pgbase;
+ if (zero > len)
+ zero = len;
+
+ vpage = kmap_atomic(*page);
+ memset(vpage + pgbase, 0, zero);
+ kunmap_atomic(vpage);
+
+ flush_dcache_page(*page);
+ pgbase = 0;
+ page++;
+
+ } while ((len -= zero) != 0);
+}
+
+/**
* xdr_shrink_bufhead
* @buf: xdr_buf
* @len: bytes to remove from buf->head[0]
@@ -446,39 +603,13 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len)
static unsigned int
xdr_shrink_pagelen(struct xdr_buf *buf, size_t len)
{
- struct kvec *tail;
- size_t copy;
unsigned int pglen = buf->page_len;
- unsigned int tailbuf_len;
unsigned int result;
- result = 0;
- tail = buf->tail;
if (len > buf->page_len)
len = buf-> page_len;
- tailbuf_len = buf->buflen - buf->head->iov_len - buf->page_len;
-
- /* Shift the tail first */
- if (tailbuf_len != 0) {
- unsigned int free_space = tailbuf_len - tail->iov_len;
- if (len < free_space)
- free_space = len;
- tail->iov_len += free_space;
-
- copy = len;
- if (tail->iov_len > len) {
- char *p = (char *)tail->iov_base + len;
- memmove(p, tail->iov_base, tail->iov_len - len);
- result += tail->iov_len - len;
- } else
- copy = tail->iov_len;
- /* Copy from the inlined pages into the tail */
- _copy_from_pages((char *)tail->iov_base,
- buf->pages, buf->page_base + pglen - len,
- copy);
- result += copy;
- }
+ result = _shift_data_right_tail(buf, pglen - len, len);
buf->page_len -= len;
buf->buflen -= len;
/* Have we truncated the message? */
@@ -506,6 +637,19 @@ unsigned int xdr_stream_pos(const struct xdr_stream *xdr)
EXPORT_SYMBOL_GPL(xdr_stream_pos);
/**
+ * xdr_page_pos - Return the current offset from the start of the xdr pages
+ * @xdr: pointer to struct xdr_stream
+ */
+unsigned int xdr_page_pos(const struct xdr_stream *xdr)
+{
+ unsigned int pos = xdr_stream_pos(xdr);
+
+ WARN_ON(pos < xdr->buf->head[0].iov_len);
+ return pos - xdr->buf->head[0].iov_len;
+}
+EXPORT_SYMBOL_GPL(xdr_page_pos);
+
+/**
* xdr_init_encode - Initialize a struct xdr_stream for sending data.
* @xdr: pointer to xdr_stream struct
* @buf: pointer to XDR buffer in which to encode data
@@ -825,6 +969,13 @@ static int xdr_set_page_base(struct xdr_stream *xdr,
return 0;
}
+static void xdr_set_page(struct xdr_stream *xdr, unsigned int base,
+ unsigned int len)
+{
+ if (xdr_set_page_base(xdr, base, len) < 0)
+ xdr_set_iov(xdr, xdr->buf->tail, xdr->nwords << 2);
+}
+
static void xdr_set_next_page(struct xdr_stream *xdr)
{
unsigned int newbase;
@@ -832,8 +983,7 @@ static void xdr_set_next_page(struct xdr_stream *xdr)
newbase = (1 + xdr->page_ptr - xdr->buf->pages) << PAGE_SHIFT;
newbase -= xdr->buf->page_base;
- if (xdr_set_page_base(xdr, newbase, PAGE_SIZE) < 0)
- xdr_set_iov(xdr, xdr->buf->tail, xdr->nwords << 2);
+ xdr_set_page(xdr, newbase, PAGE_SIZE);
}
static bool xdr_set_next_buffer(struct xdr_stream *xdr)
@@ -841,8 +991,7 @@ static bool xdr_set_next_buffer(struct xdr_stream *xdr)
if (xdr->page_ptr != NULL)
xdr_set_next_page(xdr);
else if (xdr->iov == xdr->buf->head) {
- if (xdr_set_page_base(xdr, 0, PAGE_SIZE) < 0)
- xdr_set_iov(xdr, xdr->buf->tail, xdr->nwords << 2);
+ xdr_set_page(xdr, 0, PAGE_SIZE);
}
return xdr->p != xdr->end;
}
@@ -979,26 +1128,33 @@ out_overflow:
}
EXPORT_SYMBOL_GPL(xdr_inline_decode);
-static unsigned int xdr_align_pages(struct xdr_stream *xdr, unsigned int len)
+static void xdr_realign_pages(struct xdr_stream *xdr)
{
struct xdr_buf *buf = xdr->buf;
- struct kvec *iov;
- unsigned int nwords = XDR_QUADLEN(len);
+ struct kvec *iov = buf->head;
unsigned int cur = xdr_stream_pos(xdr);
unsigned int copied, offset;
- if (xdr->nwords == 0)
- return 0;
-
/* Realign pages to current pointer position */
- iov = buf->head;
if (iov->iov_len > cur) {
offset = iov->iov_len - cur;
copied = xdr_shrink_bufhead(buf, offset);
trace_rpc_xdr_alignment(xdr, offset, copied);
xdr->nwords = XDR_QUADLEN(buf->len - cur);
}
+}
+static unsigned int xdr_align_pages(struct xdr_stream *xdr, unsigned int len)
+{
+ struct xdr_buf *buf = xdr->buf;
+ unsigned int nwords = XDR_QUADLEN(len);
+ unsigned int cur = xdr_stream_pos(xdr);
+ unsigned int copied, offset;
+
+ if (xdr->nwords == 0)
+ return 0;
+
+ xdr_realign_pages(xdr);
if (nwords > xdr->nwords) {
nwords = xdr->nwords;
len = nwords << 2;
@@ -1057,6 +1213,79 @@ unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len)
}
EXPORT_SYMBOL_GPL(xdr_read_pages);
+uint64_t xdr_align_data(struct xdr_stream *xdr, uint64_t offset, uint32_t length)
+{
+ struct xdr_buf *buf = xdr->buf;
+ unsigned int from, bytes;
+ unsigned int shift = 0;
+
+ if ((offset + length) < offset ||
+ (offset + length) > buf->page_len)
+ length = buf->page_len - offset;
+
+ xdr_realign_pages(xdr);
+ from = xdr_page_pos(xdr);
+ bytes = xdr->nwords << 2;
+ if (length < bytes)
+ bytes = length;
+
+ /* Move page data to the left */
+ if (from > offset) {
+ shift = min_t(unsigned int, bytes, buf->page_len - from);
+ _shift_data_left_pages(buf->pages,
+ buf->page_base + offset,
+ buf->page_base + from,
+ shift);
+ bytes -= shift;
+
+ /* Move tail data into the pages, if necessary */
+ if (bytes > 0)
+ _shift_data_left_tail(buf, offset + shift, bytes);
+ }
+
+ xdr->nwords -= XDR_QUADLEN(length);
+ xdr_set_page(xdr, from + length, PAGE_SIZE);
+ return length;
+}
+EXPORT_SYMBOL_GPL(xdr_align_data);
+
+uint64_t xdr_expand_hole(struct xdr_stream *xdr, uint64_t offset, uint64_t length)
+{
+ struct xdr_buf *buf = xdr->buf;
+ unsigned int bytes;
+ unsigned int from;
+ unsigned int truncated = 0;
+
+ if ((offset + length) < offset ||
+ (offset + length) > buf->page_len)
+ length = buf->page_len - offset;
+
+ xdr_realign_pages(xdr);
+ from = xdr_page_pos(xdr);
+ bytes = xdr->nwords << 2;
+
+ if (offset + length + bytes > buf->page_len) {
+ unsigned int shift = (offset + length + bytes) - buf->page_len;
+ unsigned int res = _shift_data_right_tail(buf, from + bytes - shift, shift);
+ truncated = shift - res;
+ xdr->nwords -= XDR_QUADLEN(truncated);
+ bytes -= shift;
+ }
+
+ /* Now move the page data over and zero pages */
+ if (bytes > 0)
+ _shift_data_right_pages(buf->pages,
+ buf->page_base + offset + length,
+ buf->page_base + from,
+ bytes);
+ _zero_pages(buf->pages, buf->page_base + offset, length);
+
+ buf->len += length - (from - offset) - truncated;
+ xdr_set_page(xdr, offset + length, PAGE_SIZE);
+ return length;
+}
+EXPORT_SYMBOL_GPL(xdr_expand_hole);
+
/**
* xdr_enter_page - decode data from the XDR page
* @xdr: pointer to xdr_stream struct
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 5a8e47bbfb9f..f6c17e75f20e 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -834,8 +834,7 @@ void xprt_connect(struct rpc_task *task)
{
struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
- dprintk("RPC: %5u xprt_connect xprt %p %s connected\n", task->tk_pid,
- xprt, (xprt_connected(xprt) ? "is" : "is not"));
+ trace_xprt_connect(xprt);
if (!xprt_bound(xprt)) {
task->tk_status = -EAGAIN;
@@ -1131,8 +1130,6 @@ void xprt_complete_rqst(struct rpc_task *task, int copied)
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
- trace_xprt_complete_rqst(xprt, req->rq_xid, copied);
-
xprt->stat.recvs++;
req->rq_private_buf.len = copied;
@@ -1269,7 +1266,6 @@ xprt_request_enqueue_transmit(struct rpc_task *task)
/* Note: req is added _before_ pos */
list_add_tail(&req->rq_xmit, &pos->rq_xmit);
INIT_LIST_HEAD(&req->rq_xmit2);
- trace_xprt_enq_xmit(task, 1);
goto out;
}
} else if (RPC_IS_SWAPPER(task)) {
@@ -1281,7 +1277,6 @@ xprt_request_enqueue_transmit(struct rpc_task *task)
/* Note: req is added _before_ pos */
list_add_tail(&req->rq_xmit, &pos->rq_xmit);
INIT_LIST_HEAD(&req->rq_xmit2);
- trace_xprt_enq_xmit(task, 2);
goto out;
}
} else if (!req->rq_seqno) {
@@ -1290,13 +1285,11 @@ xprt_request_enqueue_transmit(struct rpc_task *task)
continue;
list_add_tail(&req->rq_xmit2, &pos->rq_xmit2);
INIT_LIST_HEAD(&req->rq_xmit);
- trace_xprt_enq_xmit(task, 3);
goto out;
}
}
list_add_tail(&req->rq_xmit, &xprt->xmit_queue);
INIT_LIST_HEAD(&req->rq_xmit2);
- trace_xprt_enq_xmit(task, 4);
out:
set_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate);
spin_unlock(&xprt->queue_lock);
@@ -1414,9 +1407,9 @@ bool xprt_prepare_transmit(struct rpc_task *task)
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
- dprintk("RPC: %5u xprt_prepare_transmit\n", task->tk_pid);
-
if (!xprt_lock_write(xprt, task)) {
+ trace_xprt_transmit_queued(xprt, task);
+
/* Race breaker: someone may have transmitted us */
if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
rpc_wake_up_queued_task_set_status(&xprt->sending,
@@ -1520,10 +1513,13 @@ xprt_transmit(struct rpc_task *task)
{
struct rpc_rqst *next, *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
- int status;
+ int counter, status;
spin_lock(&xprt->queue_lock);
+ counter = 0;
while (!list_empty(&xprt->xmit_queue)) {
+ if (++counter == 20)
+ break;
next = list_first_entry(&xprt->xmit_queue,
struct rpc_rqst, rq_xmit);
xprt_pin_rqst(next);
@@ -1531,7 +1527,6 @@ xprt_transmit(struct rpc_task *task)
status = xprt_request_transmit(next, task);
if (status == -EBADMSG && next != req)
status = 0;
- cond_resched();
spin_lock(&xprt->queue_lock);
xprt_unpin_rqst(next);
if (status == 0) {
@@ -1747,8 +1742,8 @@ xprt_request_init(struct rpc_task *task)
req->rq_rcv_buf.bvec = NULL;
req->rq_release_snd_buf = NULL;
xprt_init_majortimeo(task, req);
- dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid,
- req, ntohl(req->rq_xid));
+
+ trace_xprt_reserve(req);
}
static void
@@ -1838,7 +1833,6 @@ void xprt_release(struct rpc_task *task)
if (req->rq_release_snd_buf)
req->rq_release_snd_buf(req);
- dprintk("RPC: %5u release request %p\n", task->tk_pid, req);
if (likely(!bc_prealloc(req)))
xprt->ops->free_slot(xprt, req);
else
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 7f94c9a19fd3..44888f5badef 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -124,7 +124,7 @@ int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
if (IS_ERR(frmr))
goto out_mr_err;
- sg = kcalloc(depth, sizeof(*sg), GFP_NOFS);
+ sg = kmalloc_array(depth, sizeof(*sg), GFP_NOFS);
if (!sg)
goto out_list_err;
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 053c8ab1265a..8915e42240d3 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -413,9 +413,6 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]);
snprintf(buf, sizeof(buf), "%4hx", port);
xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL);
-
- trace_xprtrdma_op_setport(container_of(xprt, struct rpcrdma_xprt,
- rx_xprt));
}
/**
@@ -586,11 +583,9 @@ xprt_rdma_allocate(struct rpc_task *task)
rqst->rq_buffer = rdmab_data(req->rl_sendbuf);
rqst->rq_rbuffer = rdmab_data(req->rl_recvbuf);
- trace_xprtrdma_op_allocate(task, req);
return 0;
out_fail:
- trace_xprtrdma_op_allocate(task, NULL);
return -ENOMEM;
}
@@ -607,8 +602,6 @@ xprt_rdma_free(struct rpc_task *task)
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
- trace_xprtrdma_op_free(task, req);
-
if (!list_empty(&req->rl_registered))
frwr_unmap_sync(r_xprt, req);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 554e1bb4c1c7..7090bbee0ec5 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -762,10 +762,7 @@ static int xs_nospace(struct rpc_rqst *req)
struct sock *sk = transport->inet;
int ret = -EAGAIN;
- dprintk("RPC: %5u xmit incomplete (%u left of %u)\n",
- req->rq_task->tk_pid,
- req->rq_slen - transport->xmit.offset,
- req->rq_slen);
+ trace_rpc_socket_nospace(req, transport);
/* Protect against races with write_space */
spin_lock(&xprt->transport_lock);
diff --git a/scripts/Makefile.ubsan b/scripts/Makefile.ubsan
index 27348029b2b8..4e3fff0745e8 100644
--- a/scripts/Makefile.ubsan
+++ b/scripts/Makefile.ubsan
@@ -4,7 +4,15 @@ ifdef CONFIG_UBSAN_ALIGNMENT
endif
ifdef CONFIG_UBSAN_BOUNDS
- CFLAGS_UBSAN += $(call cc-option, -fsanitize=bounds)
+ ifdef CONFIG_CC_IS_CLANG
+ CFLAGS_UBSAN += -fsanitize=array-bounds
+ else
+ CFLAGS_UBSAN += $(call cc-option, -fsanitize=bounds)
+ endif
+endif
+
+ifdef CONFIG_UBSAN_LOCAL_BOUNDS
+ CFLAGS_UBSAN += -fsanitize=local-bounds
endif
ifdef CONFIG_UBSAN_MISC
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 504d2e431c60..4223a9ac7059 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -65,6 +65,7 @@ my $allow_c99_comments = 1; # Can be overridden by --ignore C99_COMMENT_TOLERANC
# git output parsing needs US English output, so first set backtick child process LANGUAGE
my $git_command ='export LANGUAGE=en_US.UTF-8; git';
my $tabsize = 8;
+my ${CONFIG_} = "CONFIG_";
sub help {
my ($exitcode) = @_;
@@ -127,6 +128,8 @@ Options:
--typedefsfile Read additional types from this file
--color[=WHEN] Use colors 'always', 'never', or only when output
is a terminal ('auto'). Default is 'auto'.
+ --kconfig-prefix=WORD use WORD as a prefix for Kconfig symbols (default
+ ${CONFIG_})
-h, --help, --version display this help and exit
When FILE is - read standard input.
@@ -235,6 +238,7 @@ GetOptions(
'color=s' => \$color,
'no-color' => \$color, #keep old behaviors of -nocolor
'nocolor' => \$color, #keep old behaviors of -nocolor
+ 'kconfig-prefix=s' => \${CONFIG_},
'h|help' => \$help,
'version' => \$help
) or help(1);
@@ -970,6 +974,16 @@ sub seed_camelcase_includes {
}
}
+sub git_is_single_file {
+ my ($filename) = @_;
+
+ return 0 if ((which("git") eq "") || !(-e "$gitroot"));
+
+ my $output = `${git_command} ls-files -- $filename 2>/dev/null`;
+ my $count = $output =~ tr/\n//;
+ return $count eq 1 && $output =~ m{^${filename}$};
+}
+
sub git_commit_info {
my ($commit, $id, $desc) = @_;
@@ -1043,6 +1057,9 @@ my $vname;
$allow_c99_comments = !defined $ignore_type{"C99_COMMENT_TOLERANCE"};
for my $filename (@ARGV) {
my $FILE;
+ my $is_git_file = git_is_single_file($filename);
+ my $oldfile = $file;
+ $file = 1 if ($is_git_file);
if ($git) {
open($FILE, '-|', "git format-patch -M --stdout -1 $filename") ||
die "$P: $filename: git format-patch failed - $!\n";
@@ -1087,6 +1104,7 @@ for my $filename (@ARGV) {
@modifierListFile = ();
@typeListFile = ();
build_types();
+ $file = $oldfile if ($is_git_file);
}
if (!$quiet) {
@@ -1163,10 +1181,10 @@ sub parse_email {
}
}
+ $comment = trim($comment);
$name = trim($name);
$name =~ s/^\"|\"$//g;
- $name =~ s/(\s*\([^\)]+\))\s*//;
- if (defined($1)) {
+ if ($name =~ s/(\s*\([^\)]+\))\s*//) {
$name_comment = trim($1);
}
$address = trim($address);
@@ -1181,10 +1199,12 @@ sub parse_email {
}
sub format_email {
- my ($name, $address) = @_;
+ my ($name, $name_comment, $address, $comment) = @_;
my $formatted_email;
+ $name_comment = trim($name_comment);
+ $comment = trim($comment);
$name = trim($name);
$name =~ s/^\"|\"$//g;
$address = trim($address);
@@ -1197,9 +1217,9 @@ sub format_email {
if ("$name" eq "") {
$formatted_email = "$address";
} else {
- $formatted_email = "$name <$address>";
+ $formatted_email = "$name$name_comment <$address>";
}
-
+ $formatted_email .= "$comment";
return $formatted_email;
}
@@ -1207,17 +1227,23 @@ sub reformat_email {
my ($email) = @_;
my ($email_name, $name_comment, $email_address, $comment) = parse_email($email);
- return format_email($email_name, $email_address);
+ return format_email($email_name, $name_comment, $email_address, $comment);
}
sub same_email_addresses {
- my ($email1, $email2) = @_;
+ my ($email1, $email2, $match_comment) = @_;
my ($email1_name, $name1_comment, $email1_address, $comment1) = parse_email($email1);
my ($email2_name, $name2_comment, $email2_address, $comment2) = parse_email($email2);
+ if ($match_comment != 1) {
+ return $email1_name eq $email2_name &&
+ $email1_address eq $email2_address;
+ }
return $email1_name eq $email2_name &&
- $email1_address eq $email2_address;
+ $email1_address eq $email2_address &&
+ $name1_comment eq $name2_comment &&
+ $comment1 eq $comment2;
}
sub which {
@@ -2347,6 +2373,7 @@ sub process {
my $signoff = 0;
my $author = '';
my $authorsignoff = 0;
+ my $author_sob = '';
my $is_patch = 0;
my $is_binding_patch = -1;
my $in_header_lines = $file ? 0 : 1;
@@ -2661,6 +2688,10 @@ sub process {
# Check the patch for a From:
if (decode("MIME-Header", $line) =~ /^From:\s*(.*)/) {
$author = $1;
+ my $curline = $linenr;
+ while(defined($rawlines[$curline]) && ($rawlines[$curline++] =~ /^[ \t]\s*(.*)/)) {
+ $author .= $1;
+ }
$author = encode("utf8", $author) if ($line =~ /=\?utf-8\?/i);
$author =~ s/"//g;
$author = reformat_email($author);
@@ -2670,9 +2701,37 @@ sub process {
if ($line =~ /^\s*signed-off-by:\s*(.*)/i) {
$signoff++;
$in_commit_log = 0;
- if ($author ne '') {
- if (same_email_addresses($1, $author)) {
+ if ($author ne '' && $authorsignoff != 1) {
+ if (same_email_addresses($1, $author, 1)) {
$authorsignoff = 1;
+ } else {
+ my $ctx = $1;
+ my ($email_name, $email_comment, $email_address, $comment1) = parse_email($ctx);
+ my ($author_name, $author_comment, $author_address, $comment2) = parse_email($author);
+
+ if ($email_address eq $author_address && $email_name eq $author_name) {
+ $author_sob = $ctx;
+ $authorsignoff = 2;
+ } elsif ($email_address eq $author_address) {
+ $author_sob = $ctx;
+ $authorsignoff = 3;
+ } elsif ($email_name eq $author_name) {
+ $author_sob = $ctx;
+ $authorsignoff = 4;
+
+ my $address1 = $email_address;
+ my $address2 = $author_address;
+
+ if ($address1 =~ /(\S+)\+\S+(\@.*)/) {
+ $address1 = "$1$2";
+ }
+ if ($address2 =~ /(\S+)\+\S+(\@.*)/) {
+ $address2 = "$1$2";
+ }
+ if ($address1 eq $address2) {
+ $authorsignoff = 5;
+ }
+ }
}
}
}
@@ -2729,7 +2788,7 @@ sub process {
}
my ($email_name, $name_comment, $email_address, $comment) = parse_email($email);
- my $suggested_email = format_email(($email_name, $email_address));
+ my $suggested_email = format_email(($email_name, $name_comment, $email_address, $comment));
if ($suggested_email eq "") {
ERROR("BAD_SIGN_OFF",
"Unrecognized email address: '$email'\n" . $herecurr);
@@ -2739,9 +2798,9 @@ sub process {
$dequoted =~ s/" </ </;
# Don't force email to have quotes
# Allow just an angle bracketed address
- if (!same_email_addresses($email, $suggested_email)) {
+ if (!same_email_addresses($email, $suggested_email, 0)) {
WARN("BAD_SIGN_OFF",
- "email address '$email' might be better as '$suggested_email$comment'\n" . $herecurr);
+ "email address '$email' might be better as '$suggested_email'\n" . $herecurr);
}
}
@@ -2987,6 +3046,42 @@ sub process {
}
}
+# check for repeated words separated by a single space
+ if ($rawline =~ /^\+/ || $in_commit_log) {
+ while ($rawline =~ /\b($word_pattern) (?=($word_pattern))/g) {
+
+ my $first = $1;
+ my $second = $2;
+
+ if ($first =~ /(?:struct|union|enum)/) {
+ pos($rawline) += length($first) + length($second) + 1;
+ next;
+ }
+
+ next if ($first ne $second);
+ next if ($first eq 'long');
+
+ if (WARN("REPEATED_WORD",
+ "Possible repeated word: '$first'\n" . $herecurr) &&
+ $fix) {
+ $fixed[$fixlinenr] =~ s/\b$first $second\b/$first/;
+ }
+ }
+
+ # if it's a repeated word on consecutive lines in a comment block
+ if ($prevline =~ /$;+\s*$/ &&
+ $prevrawline =~ /($word_pattern)\s*$/) {
+ my $last_word = $1;
+ if ($rawline =~ /^\+\s*\*\s*$last_word /) {
+ if (WARN("REPEATED_WORD",
+ "Possible repeated word: '$last_word'\n" . $hereprev) &&
+ $fix) {
+ $fixed[$fixlinenr] =~ s/(\+\s*\*\s*)$last_word /$1/;
+ }
+ }
+ }
+ }
+
# ignore non-hunk lines and lines being removed
next if (!$hunk_line || $line =~ /^-/);
@@ -3213,6 +3308,12 @@ sub process {
}
}
+# check for embedded filenames
+ if ($rawline =~ /^\+.*\Q$realfile\E/) {
+ WARN("EMBEDDED_FILENAME",
+ "It's generally not useful to have the filename in the file\n" . $herecurr);
+ }
+
# check we are in a valid source file if not then ignore this hunk
next if ($realfile !~ /\.(h|c|s|S|sh|dtsi|dts)$/);
@@ -3310,42 +3411,6 @@ sub process {
}
}
-# check for repeated words separated by a single space
- if ($rawline =~ /^\+/) {
- while ($rawline =~ /\b($word_pattern) (?=($word_pattern))/g) {
-
- my $first = $1;
- my $second = $2;
-
- if ($first =~ /(?:struct|union|enum)/) {
- pos($rawline) += length($first) + length($second) + 1;
- next;
- }
-
- next if ($first ne $second);
- next if ($first eq 'long');
-
- if (WARN("REPEATED_WORD",
- "Possible repeated word: '$first'\n" . $herecurr) &&
- $fix) {
- $fixed[$fixlinenr] =~ s/\b$first $second\b/$first/;
- }
- }
-
- # if it's a repeated word on consecutive lines in a comment block
- if ($prevline =~ /$;+\s*$/ &&
- $prevrawline =~ /($word_pattern)\s*$/) {
- my $last_word = $1;
- if ($rawline =~ /^\+\s*\*\s*$last_word /) {
- if (WARN("REPEATED_WORD",
- "Possible repeated word: '$last_word'\n" . $hereprev) &&
- $fix) {
- $fixed[$fixlinenr] =~ s/(\+\s*\*\s*)$last_word /$1/;
- }
- }
- }
- }
-
# check for space before tabs.
if ($rawline =~ /^\+/ && $rawline =~ / \t/) {
my $herevet = "$here\n" . cat_vet($rawline) . "\n";
@@ -3436,7 +3501,7 @@ sub process {
if ($realfile =~ m@^(drivers/net/|net/)@ &&
$prevrawline =~ /^\+[ \t]*\/\*[ \t]*$/ &&
$rawline =~ /^\+[ \t]*\*/ &&
- $realline > 2) {
+ $realline > 3) { # Do not warn about the initial copyright comment block after SPDX-License-Identifier
WARN("NETWORKING_BLOCK_COMMENT_STYLE",
"networking block comments don't use an empty /* line, use /* Comment...\n" . $hereprev);
}
@@ -3895,6 +3960,17 @@ sub process {
#ignore lines not being added
next if ($line =~ /^[^\+]/);
+# check for self assignments used to avoid compiler warnings
+# e.g.: int foo = foo, *bar = NULL;
+# struct foo bar = *(&(bar));
+ if ($line =~ /^\+\s*(?:$Declare)?([A-Za-z_][A-Za-z\d_]*)\s*=/) {
+ my $var = $1;
+ if ($line =~ /^\+\s*(?:$Declare)?$var\s*=\s*(?:$var|\*\s*\(?\s*&\s*\(?\s*$var\s*\)?\s*\)?)\s*[;,]/) {
+ WARN("SELF_ASSIGNMENT",
+ "Do not use self-assignments to avoid compiler warnings\n" . $herecurr);
+ }
+ }
+
# check for dereferences that span multiple lines
if ($prevline =~ /^\+.*$Lval\s*(?:\.|->)\s*$/ &&
$line =~ /^\+\s*(?!\#\s*(?!define\s+|if))\s*$Lval/) {
@@ -4270,6 +4346,12 @@ sub process {
"Prefer dev_$level(... to dev_printk(KERN_$orig, ...\n" . $herecurr);
}
+# trace_printk should not be used in production code.
+ if ($line =~ /\b(trace_printk|trace_puts|ftrace_vprintk)\s*\(/) {
+ WARN("TRACE_PRINTK",
+ "Do not use $1() in production code (this can be ignored if built only with a debug config option)\n" . $herecurr);
+ }
+
# ENOSYS means "bad syscall nr" and nothing else. This will have a small
# number of false positives, but assembly files are not checked, so at
# least the arch entry code will not trigger this warning.
@@ -4936,6 +5018,17 @@ sub process {
}
}
+# check if a statement with a comma should be two statements like:
+# foo = bar(), /* comma should be semicolon */
+# bar = baz();
+ if (defined($stat) &&
+ $stat =~ /^\+\s*(?:$Lval\s*$Assignment\s*)?$FuncArg\s*,\s*(?:$Lval\s*$Assignment\s*)?$FuncArg\s*;\s*$/) {
+ my $cnt = statement_rawlines($stat);
+ my $herectx = get_stat_here($linenr, $cnt, $here);
+ WARN("SUSPECT_COMMA_SEMICOLON",
+ "Possible comma where semicolon could be used\n" . $herectx);
+ }
+
# return is not a function
if (defined($stat) && $stat =~ /^.\s*return(\s*)\(/s) {
my $spacing = $1;
@@ -5295,9 +5388,9 @@ sub process {
$dstat =~ s/\s*$//s;
# Flatten any parentheses and braces
- while ($dstat =~ s/\([^\(\)]*\)/1/ ||
- $dstat =~ s/\{[^\{\}]*\}/1/ ||
- $dstat =~ s/.\[[^\[\]]*\]/1/)
+ while ($dstat =~ s/\([^\(\)]*\)/1u/ ||
+ $dstat =~ s/\{[^\{\}]*\}/1u/ ||
+ $dstat =~ s/.\[[^\[\]]*\]/1u/)
{
}
@@ -5338,6 +5431,7 @@ sub process {
$dstat !~ /^\.$Ident\s*=/ && # .foo =
$dstat !~ /^(?:\#\s*$Ident|\#\s*$Constant)\s*$/ && # stringification #foo
$dstat !~ /^do\s*$Constant\s*while\s*$Constant;?$/ && # do {...} while (...); // do {...} while (...)
+ $dstat !~ /^while\s*$Constant\s*$Constant\s*$/ && # while (...) {...}
$dstat !~ /^for\s*$Constant$/ && # for (...)
$dstat !~ /^for\s*$Constant\s+(?:$Ident|-?$Constant)$/ && # for (...) bar()
$dstat !~ /^do\s*{/ && # do {...
@@ -6524,16 +6618,16 @@ sub process {
}
# check for IS_ENABLED() without CONFIG_<FOO> ($rawline for comments too)
- if ($rawline =~ /\bIS_ENABLED\s*\(\s*(\w+)\s*\)/ && $1 !~ /^CONFIG_/) {
+ if ($rawline =~ /\bIS_ENABLED\s*\(\s*(\w+)\s*\)/ && $1 !~ /^${CONFIG_}/) {
WARN("IS_ENABLED_CONFIG",
- "IS_ENABLED($1) is normally used as IS_ENABLED(CONFIG_$1)\n" . $herecurr);
+ "IS_ENABLED($1) is normally used as IS_ENABLED(${CONFIG_}$1)\n" . $herecurr);
}
# check for #if defined CONFIG_<FOO> || defined CONFIG_<FOO>_MODULE
- if ($line =~ /^\+\s*#\s*if\s+defined(?:\s*\(?\s*|\s+)(CONFIG_[A-Z_]+)\s*\)?\s*\|\|\s*defined(?:\s*\(?\s*|\s+)\1_MODULE\s*\)?\s*$/) {
+ if ($line =~ /^\+\s*#\s*if\s+defined(?:\s*\(?\s*|\s+)(${CONFIG_}[A-Z_]+)\s*\)?\s*\|\|\s*defined(?:\s*\(?\s*|\s+)\1_MODULE\s*\)?\s*$/) {
my $config = $1;
if (WARN("PREFER_IS_ENABLED",
- "Prefer IS_ENABLED(<FOO>) to CONFIG_<FOO> || CONFIG_<FOO>_MODULE\n" . $herecurr) &&
+ "Prefer IS_ENABLED(<FOO>) to ${CONFIG_}<FOO> || ${CONFIG_}<FOO>_MODULE\n" . $herecurr) &&
$fix) {
$fixed[$fixlinenr] = "\+#if IS_ENABLED($config)";
}
@@ -6886,9 +6980,33 @@ sub process {
if ($signoff == 0) {
ERROR("MISSING_SIGN_OFF",
"Missing Signed-off-by: line(s)\n");
- } elsif (!$authorsignoff) {
- WARN("NO_AUTHOR_SIGN_OFF",
- "Missing Signed-off-by: line by nominal patch author '$author'\n");
+ } elsif ($authorsignoff != 1) {
+ # authorsignoff values:
+ # 0 -> missing sign off
+ # 1 -> sign off identical
+ # 2 -> names and addresses match, comments mismatch
+ # 3 -> addresses match, names different
+ # 4 -> names match, addresses different
+ # 5 -> names match, addresses excluding subaddress details (refer RFC 5233) match
+
+ my $sob_msg = "'From: $author' != 'Signed-off-by: $author_sob'";
+
+ if ($authorsignoff == 0) {
+ ERROR("NO_AUTHOR_SIGN_OFF",
+ "Missing Signed-off-by: line by nominal patch author '$author'\n");
+ } elsif ($authorsignoff == 2) {
+ CHK("FROM_SIGN_OFF_MISMATCH",
+ "From:/Signed-off-by: email comments mismatch: $sob_msg\n");
+ } elsif ($authorsignoff == 3) {
+ WARN("FROM_SIGN_OFF_MISMATCH",
+ "From:/Signed-off-by: email name mismatch: $sob_msg\n");
+ } elsif ($authorsignoff == 4) {
+ WARN("FROM_SIGN_OFF_MISMATCH",
+ "From:/Signed-off-by: email address mismatch: $sob_msg\n");
+ } elsif ($authorsignoff == 5) {
+ WARN("FROM_SIGN_OFF_MISMATCH",
+ "From:/Signed-off-by: email subaddress mismatch: $sob_msg\n");
+ }
}
}
diff --git a/scripts/coccicheck b/scripts/coccicheck
index e04d328210ac..209bb0427b43 100755
--- a/scripts/coccicheck
+++ b/scripts/coccicheck
@@ -75,8 +75,13 @@ else
OPTIONS="--dir $KBUILD_EXTMOD $COCCIINCLUDE"
fi
+ # Use only one thread per core by default if hyperthreading is enabled
+ THREADS_PER_CORE=$(lscpu | grep "Thread(s) per core: " | tr -cd "[:digit:]")
if [ -z "$J" ]; then
NPROC=$(getconf _NPROCESSORS_ONLN)
+ if [ $THREADS_PER_CORE -gt 1 -a $NPROC -gt 4 ] ; then
+ NPROC=$((NPROC/2))
+ fi
else
NPROC="$J"
fi
@@ -99,7 +104,7 @@ fi
if [ "$MODE" = "" ] ; then
if [ "$ONLINE" = "0" ] ; then
echo 'You have not explicitly specified the mode to use. Using default "report" mode.'
- echo 'Available modes are the following: patch, report, context, org'
+ echo 'Available modes are the following: patch, report, context, org, chain'
echo 'You can specify the mode with "make coccicheck MODE=<mode>"'
echo 'Note however that some modes are not implemented by some semantic patches.'
fi
@@ -126,8 +131,14 @@ run_cmd_parmap() {
if [ $VERBOSE -ne 0 ] ; then
echo "Running ($NPROC in parallel): $@"
fi
- echo $@ >>$DEBUG_FILE
- $@ 2>>$DEBUG_FILE
+ if [ "$DEBUG_FILE" != "/dev/null" -a "$DEBUG_FILE" != "" ]; then
+ echo $@>>$DEBUG_FILE
+ $@ 2>>$DEBUG_FILE
+ else
+ echo $@
+ $@ 2>&1
+ fi
+
err=$?
if [[ $err -ne 0 ]]; then
echo "coccicheck failed"
diff --git a/scripts/coccinelle/api/alloc/zalloc-simple.cocci b/scripts/coccinelle/api/alloc/zalloc-simple.cocci
index 26cda3f48f01..b3d0c3c230c1 100644
--- a/scripts/coccinelle/api/alloc/zalloc-simple.cocci
+++ b/scripts/coccinelle/api/alloc/zalloc-simple.cocci
@@ -127,6 +127,16 @@ statement S;
if ((x==NULL) || ...) S
- memset((T2)x,0,E1);
+@depends on patch@
+type T, T2;
+expression x;
+expression E1,E2,E3,E4;
+statement S;
+@@
+ x = (T)dma_alloc_coherent(E1, E2, E3, E4);
+ if ((x==NULL) || ...) S
+- memset((T2)x, 0, E2);
+
//----------------------------------------------------------
// For org mode
//----------------------------------------------------------
@@ -199,9 +209,9 @@ statement S;
position p;
@@
- x = (T)dma_alloc_coherent@p(E2,E1,E3,E4);
+ x = (T)dma_alloc_coherent@p(E1,E2,E3,E4);
if ((x==NULL) || ...) S
- memset((T2)x,0,E1);
+ memset((T2)x,0,E2);
@script:python depends on org@
p << r2.p;
@@ -217,7 +227,7 @@ p << r2.p;
x << r2.x;
@@
-msg="WARNING: dma_alloc_coherent use in %s already zeroes out memory, so memset is not needed" % (x)
+msg="WARNING: dma_alloc_coherent used in %s already zeroes out memory, so memset is not needed" % (x)
coccilib.report.print_report(p[0], msg)
//-----------------------------------------------------------------
diff --git a/scripts/coccinelle/api/kfree_mismatch.cocci b/scripts/coccinelle/api/kfree_mismatch.cocci
new file mode 100644
index 000000000000..d46a9b3eb7b3
--- /dev/null
+++ b/scripts/coccinelle/api/kfree_mismatch.cocci
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0-only
+///
+/// Check that kvmalloc'ed memory is freed by kfree functions,
+/// vmalloc'ed by vfree functions and kvmalloc'ed by kvfree
+/// functions.
+///
+// Confidence: High
+// Copyright: (C) 2020 Denis Efremov ISPRAS
+// Options: --no-includes --include-headers
+//
+
+virtual patch
+virtual report
+virtual org
+virtual context
+
+@alloc@
+expression E, E1;
+position kok, vok;
+@@
+
+(
+ if (...) {
+ ...
+ E = \(kmalloc\|kzalloc\|krealloc\|kcalloc\|
+ kmalloc_node\|kzalloc_node\|kmalloc_array\|
+ kmalloc_array_node\|kcalloc_node\)(...)@kok
+ ...
+ } else {
+ ...
+ E = \(vmalloc\|vzalloc\|vmalloc_user\|vmalloc_node\|
+ vzalloc_node\|vmalloc_exec\|vmalloc_32\|
+ vmalloc_32_user\|__vmalloc\|__vmalloc_node_range\|
+ __vmalloc_node\)(...)@vok
+ ...
+ }
+|
+ E = \(kmalloc\|kzalloc\|krealloc\|kcalloc\|kmalloc_node\|kzalloc_node\|
+ kmalloc_array\|kmalloc_array_node\|kcalloc_node\)(...)@kok
+ ... when != E = E1
+ when any
+ if (E == NULL) {
+ ...
+ E = \(vmalloc\|vzalloc\|vmalloc_user\|vmalloc_node\|
+ vzalloc_node\|vmalloc_exec\|vmalloc_32\|
+ vmalloc_32_user\|__vmalloc\|__vmalloc_node_range\|
+ __vmalloc_node\)(...)@vok
+ ...
+ }
+)
+
+@free@
+expression E;
+position fok;
+@@
+
+ E = \(kvmalloc\|kvzalloc\|kvcalloc\|kvzalloc_node\|kvmalloc_node\|
+ kvmalloc_array\)(...)
+ ...
+ kvfree(E)@fok
+
+@vfree depends on !patch@
+expression E;
+position a != alloc.kok;
+position f != free.fok;
+@@
+
+* E = \(kmalloc\|kzalloc\|krealloc\|kcalloc\|kmalloc_node\|
+* kzalloc_node\|kmalloc_array\|kmalloc_array_node\|
+* kcalloc_node\)(...)@a
+ ... when != if (...) { ... E = \(vmalloc\|vzalloc\|vmalloc_user\|vmalloc_node\|vzalloc_node\|vmalloc_exec\|vmalloc_32\|vmalloc_32_user\|__vmalloc\|__vmalloc_node_range\|__vmalloc_node\)(...); ... }
+ when != is_vmalloc_addr(E)
+ when any
+* \(vfree\|vfree_atomic\|kvfree\)(E)@f
+
+@depends on patch exists@
+expression E;
+position a != alloc.kok;
+position f != free.fok;
+@@
+
+ E = \(kmalloc\|kzalloc\|krealloc\|kcalloc\|kmalloc_node\|
+ kzalloc_node\|kmalloc_array\|kmalloc_array_node\|
+ kcalloc_node\)(...)@a
+ ... when != if (...) { ... E = \(vmalloc\|vzalloc\|vmalloc_user\|vmalloc_node\|vzalloc_node\|vmalloc_exec\|vmalloc_32\|vmalloc_32_user\|__vmalloc\|__vmalloc_node_range\|__vmalloc_node\)(...); ... }
+ when != is_vmalloc_addr(E)
+ when any
+- \(vfree\|vfree_atomic\|kvfree\)(E)@f
++ kfree(E)
+
+@kfree depends on !patch@
+expression E;
+position a != alloc.vok;
+position f != free.fok;
+@@
+
+* E = \(vmalloc\|vzalloc\|vmalloc_user\|vmalloc_node\|vzalloc_node\|
+* vmalloc_exec\|vmalloc_32\|vmalloc_32_user\|__vmalloc\|
+* __vmalloc_node_range\|__vmalloc_node\)(...)@a
+ ... when != is_vmalloc_addr(E)
+ when any
+* \(kfree\|kfree_sensitive\|kvfree\)(E)@f
+
+@depends on patch exists@
+expression E;
+position a != alloc.vok;
+position f != free.fok;
+@@
+
+ E = \(vmalloc\|vzalloc\|vmalloc_user\|vmalloc_node\|vzalloc_node\|
+ vmalloc_exec\|vmalloc_32\|vmalloc_32_user\|__vmalloc\|
+ __vmalloc_node_range\|__vmalloc_node\)(...)@a
+ ... when != is_vmalloc_addr(E)
+ when any
+- \(kfree\|kvfree\)(E)@f
++ vfree(E)
+
+@kvfree depends on !patch@
+expression E;
+position a, f;
+@@
+
+* E = \(kvmalloc\|kvzalloc\|kvcalloc\|kvzalloc_node\|kvmalloc_node\|
+* kvmalloc_array\)(...)@a
+ ... when != is_vmalloc_addr(E)
+ when any
+* \(kfree\|kfree_sensitive\|vfree\|vfree_atomic\)(E)@f
+
+@depends on patch exists@
+expression E;
+@@
+
+ E = \(kvmalloc\|kvzalloc\|kvcalloc\|kvzalloc_node\|kvmalloc_node\|
+ kvmalloc_array\)(...)
+ ... when != is_vmalloc_addr(E)
+ when any
+- \(kfree\|vfree\)(E)
++ kvfree(E)
+
+@kvfree_switch depends on !patch@
+expression alloc.E;
+position f;
+@@
+
+ ... when != is_vmalloc_addr(E)
+ when any
+* \(kfree\|kfree_sensitive\|vfree\|vfree_atomic\)(E)@f
+
+@depends on patch exists@
+expression alloc.E;
+position f;
+@@
+
+ ... when != is_vmalloc_addr(E)
+ when any
+(
+- \(kfree\|vfree\)(E)@f
++ kvfree(E)
+|
+- kfree_sensitive(E)@f
++ kvfree_sensitive(E)
+)
+
+@script: python depends on report@
+a << vfree.a;
+f << vfree.f;
+@@
+
+msg = "WARNING kmalloc is used to allocate this memory at line %s" % (a[0].line)
+coccilib.report.print_report(f[0], msg)
+
+@script: python depends on org@
+a << vfree.a;
+f << vfree.f;
+@@
+
+msg = "WARNING kmalloc is used to allocate this memory at line %s" % (a[0].line)
+coccilib.org.print_todo(f[0], msg)
+
+@script: python depends on report@
+a << kfree.a;
+f << kfree.f;
+@@
+
+msg = "WARNING vmalloc is used to allocate this memory at line %s" % (a[0].line)
+coccilib.report.print_report(f[0], msg)
+
+@script: python depends on org@
+a << kfree.a;
+f << kfree.f;
+@@
+
+msg = "WARNING vmalloc is used to allocate this memory at line %s" % (a[0].line)
+coccilib.org.print_todo(f[0], msg)
+
+@script: python depends on report@
+a << kvfree.a;
+f << kvfree.f;
+@@
+
+msg = "WARNING kvmalloc is used to allocate this memory at line %s" % (a[0].line)
+coccilib.report.print_report(f[0], msg)
+
+@script: python depends on org@
+a << kvfree.a;
+f << kvfree.f;
+@@
+
+msg = "WARNING kvmalloc is used to allocate this memory at line %s" % (a[0].line)
+coccilib.org.print_todo(f[0], msg)
+
+@script: python depends on report@
+ka << alloc.kok;
+va << alloc.vok;
+f << kvfree_switch.f;
+@@
+
+msg = "WARNING kmalloc (line %s) && vmalloc (line %s) are used to allocate this memory" % (ka[0].line, va[0].line)
+coccilib.report.print_report(f[0], msg)
+
+@script: python depends on org@
+ka << alloc.kok;
+va << alloc.vok;
+f << kvfree_switch.f;
+@@
+
+msg = "WARNING kmalloc (line %s) && vmalloc (line %s) are used to allocate this memory" % (ka[0].line, va[0].line)
+coccilib.org.print_todo(f[0], msg)
diff --git a/scripts/coccinelle/api/kzfree.cocci b/scripts/coccinelle/api/kfree_sensitive.cocci
index 33625bd7cec9..8d980ebf3223 100644
--- a/scripts/coccinelle/api/kzfree.cocci
+++ b/scripts/coccinelle/api/kfree_sensitive.cocci
@@ -1,13 +1,13 @@
// SPDX-License-Identifier: GPL-2.0-only
///
-/// Use kzfree, kvfree_sensitive rather than memset or
-/// memzero_explicit followed by kfree
+/// Use kfree_sensitive, kvfree_sensitive rather than memset or
+/// memzero_explicit followed by kfree.
///
// Confidence: High
// Copyright: (C) 2020 Denis Efremov ISPRAS
// Options: --no-includes --include-headers
//
-// Keywords: kzfree, kvfree_sensitive
+// Keywords: kfree_sensitive, kvfree_sensitive
//
virtual context
@@ -18,7 +18,8 @@ virtual report
@initialize:python@
@@
# kmalloc_oob_in_memset uses memset to explicitly trigger out-of-bounds access
-filter = frozenset(['kmalloc_oob_in_memset', 'kzfree', 'kvfree_sensitive'])
+filter = frozenset(['kmalloc_oob_in_memset',
+ 'kfree_sensitive', 'kvfree_sensitive'])
def relevant(p):
return not (filter & {el.current_element for el in p})
@@ -56,17 +57,13 @@ type T;
- memzero_explicit@m((T)E, size);
... when != E
when strict
-// TODO: uncomment when kfree_sensitive will be merged.
-// Only this case is commented out because developers
-// may not like patches like this since kzfree uses memset
-// internally (not memzero_explicit).
-//(
-//- kfree(E)@p;
-//+ kfree_sensitive(E);
-//|
+(
+- kfree(E)@p;
++ kfree_sensitive(E);
+|
- \(vfree\|kvfree\)(E)@p;
+ kvfree_sensitive(E, size);
-//)
+)
@rp_memset depends on patch@
expression E, size;
@@ -80,7 +77,7 @@ type T;
when strict
(
- kfree(E)@p;
-+ kzfree(E);
++ kfree_sensitive(E);
|
- \(vfree\|kvfree\)(E)@p;
+ kvfree_sensitive(E, size);
@@ -88,14 +85,16 @@ type T;
@script:python depends on report@
p << r.p;
+m << r.m;
@@
-coccilib.report.print_report(p[0],
- "WARNING: opportunity for kzfree/kvfree_sensitive")
+msg = "WARNING opportunity for kfree_sensitive/kvfree_sensitive (memset at line %s)"
+coccilib.report.print_report(p[0], msg % (m[0].line))
@script:python depends on org@
p << r.p;
+m << r.m;
@@
-coccilib.org.print_todo(p[0],
- "WARNING: opportunity for kzfree/kvfree_sensitive")
+msg = "WARNING opportunity for kfree_sensitive/kvfree_sensitive (memset at line %s)"
+coccilib.org.print_todo(p[0], msg % (m[0].line))
diff --git a/scripts/coccinelle/api/kobj_to_dev.cocci b/scripts/coccinelle/api/kobj_to_dev.cocci
new file mode 100644
index 000000000000..cd5d31c6fe76
--- /dev/null
+++ b/scripts/coccinelle/api/kobj_to_dev.cocci
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0-only
+///
+/// Use kobj_to_dev() instead of container_of()
+///
+// Confidence: High
+// Copyright: (C) 2020 Denis Efremov ISPRAS
+// Options: --no-includes --include-headers
+//
+// Keywords: kobj_to_dev, container_of
+//
+
+virtual context
+virtual report
+virtual org
+virtual patch
+
+
+@r depends on !patch@
+expression ptr;
+symbol kobj;
+position p;
+@@
+
+* container_of(ptr, struct device, kobj)@p
+
+
+@depends on patch@
+expression ptr;
+@@
+
+- container_of(ptr, struct device, kobj)
++ kobj_to_dev(ptr)
+
+
+@script:python depends on report@
+p << r.p;
+@@
+
+coccilib.report.print_report(p[0], "WARNING opportunity for kobj_to_dev()")
+
+@script:python depends on org@
+p << r.p;
+@@
+
+coccilib.org.print_todo(p[0], "WARNING opportunity for kobj_to_dev()")
diff --git a/scripts/coccinelle/api/kvmalloc.cocci b/scripts/coccinelle/api/kvmalloc.cocci
new file mode 100644
index 000000000000..c30dab718a49
--- /dev/null
+++ b/scripts/coccinelle/api/kvmalloc.cocci
@@ -0,0 +1,256 @@
+// SPDX-License-Identifier: GPL-2.0-only
+///
+/// Find if/else condition with kmalloc/vmalloc calls.
+/// Suggest to use kvmalloc instead. Same for kvfree.
+///
+// Confidence: High
+// Copyright: (C) 2020 Denis Efremov ISPRAS
+// Options: --no-includes --include-headers
+//
+
+virtual patch
+virtual report
+virtual org
+virtual context
+
+@initialize:python@
+@@
+filter = frozenset(['kvfree'])
+
+def relevant(p):
+ return not (filter & {el.current_element for el in p})
+
+@kvmalloc depends on !patch@
+expression E, E1, size;
+identifier flags;
+binary operator cmp = {<=, <, ==, >, >=};
+identifier x;
+type T;
+position p;
+@@
+
+(
+* if (size cmp E1 || ...)@p {
+ ...
+* E = \(kmalloc\|kzalloc\|kcalloc\|kmalloc_node\|kzalloc_node\|
+* kmalloc_array\|kmalloc_array_node\|kcalloc_node\)
+* (..., size, \(flags\|GFP_KERNEL\|\(GFP_KERNEL\|flags\)|__GFP_NOWARN\), ...)
+ ...
+ } else {
+ ...
+* E = \(vmalloc\|vzalloc\|vmalloc_node\|vzalloc_node\)(..., size, ...)
+ ...
+ }
+|
+* E = \(kmalloc\|kzalloc\|kcalloc\|kmalloc_node\|kzalloc_node\|
+* kmalloc_array\|kmalloc_array_node\|kcalloc_node\)
+* (..., size, \(flags\|GFP_KERNEL\|\(GFP_KERNEL\|flags\)|__GFP_NOWARN\), ...)
+ ... when != E = E1
+ when != size = E1
+ when any
+* if (E == NULL)@p {
+ ...
+* E = \(vmalloc\|vzalloc\|vmalloc_node\|vzalloc_node\)(..., size, ...)
+ ...
+ }
+|
+* T x = \(kmalloc\|kzalloc\|kcalloc\|kmalloc_node\|kzalloc_node\|
+* kmalloc_array\|kmalloc_array_node\|kcalloc_node\)
+* (..., size, \(flags\|GFP_KERNEL\|\(GFP_KERNEL\|flags\)|__GFP_NOWARN\), ...);
+ ... when != x = E1
+ when != size = E1
+ when any
+* if (x == NULL)@p {
+ ...
+* x = \(vmalloc\|vzalloc\|vmalloc_node\|vzalloc_node\)(..., size, ...)
+ ...
+ }
+)
+
+@kvfree depends on !patch@
+expression E;
+position p : script:python() { relevant(p) };
+@@
+
+* if (is_vmalloc_addr(E))@p {
+ ...
+* vfree(E)
+ ...
+ } else {
+ ... when != krealloc(E, ...)
+ when any
+* \(kfree\|kzfree\)(E)
+ ...
+ }
+
+@depends on patch@
+expression E, E1, size, node;
+binary operator cmp = {<=, <, ==, >, >=};
+identifier flags, x;
+type T;
+@@
+
+(
+- if (size cmp E1)
+- E = kmalloc(size, flags);
+- else
+- E = vmalloc(size);
++ E = kvmalloc(size, flags);
+|
+- if (size cmp E1)
+- E = kmalloc(size, \(GFP_KERNEL\|GFP_KERNEL|__GFP_NOWARN\));
+- else
+- E = vmalloc(size);
++ E = kvmalloc(size, GFP_KERNEL);
+|
+- E = kmalloc(size, flags | __GFP_NOWARN);
+- if (E == NULL)
+- E = vmalloc(size);
++ E = kvmalloc(size, flags);
+|
+- E = kmalloc(size, \(GFP_KERNEL\|GFP_KERNEL|__GFP_NOWARN\));
+- if (E == NULL)
+- E = vmalloc(size);
++ E = kvmalloc(size, GFP_KERNEL);
+|
+- T x = kmalloc(size, flags | __GFP_NOWARN);
+- if (x == NULL)
+- x = vmalloc(size);
++ T x = kvmalloc(size, flags);
+|
+- T x = kmalloc(size, \(GFP_KERNEL\|GFP_KERNEL|__GFP_NOWARN\));
+- if (x == NULL)
+- x = vmalloc(size);
++ T x = kvmalloc(size, GFP_KERNEL);
+|
+- if (size cmp E1)
+- E = kzalloc(size, flags);
+- else
+- E = vzalloc(size);
++ E = kvzalloc(size, flags);
+|
+- if (size cmp E1)
+- E = kzalloc(size, \(GFP_KERNEL\|GFP_KERNEL|__GFP_NOWARN\));
+- else
+- E = vzalloc(size);
++ E = kvzalloc(size, GFP_KERNEL);
+|
+- E = kzalloc(size, flags | __GFP_NOWARN);
+- if (E == NULL)
+- E = vzalloc(size);
++ E = kvzalloc(size, flags);
+|
+- E = kzalloc(size, \(GFP_KERNEL\|GFP_KERNEL|__GFP_NOWARN\));
+- if (E == NULL)
+- E = vzalloc(size);
++ E = kvzalloc(size, GFP_KERNEL);
+|
+- T x = kzalloc(size, flags | __GFP_NOWARN);
+- if (x == NULL)
+- x = vzalloc(size);
++ T x = kvzalloc(size, flags);
+|
+- T x = kzalloc(size, \(GFP_KERNEL\|GFP_KERNEL|__GFP_NOWARN\));
+- if (x == NULL)
+- x = vzalloc(size);
++ T x = kvzalloc(size, GFP_KERNEL);
+|
+- if (size cmp E1)
+- E = kmalloc_node(size, flags, node);
+- else
+- E = vmalloc_node(size, node);
++ E = kvmalloc_node(size, flags, node);
+|
+- if (size cmp E1)
+- E = kmalloc_node(size, \(GFP_KERNEL\|GFP_KERNEL|__GFP_NOWARN\), node);
+- else
+- E = vmalloc_node(size, node);
++ E = kvmalloc_node(size, GFP_KERNEL, node);
+|
+- E = kmalloc_node(size, flags | __GFP_NOWARN, node);
+- if (E == NULL)
+- E = vmalloc_node(size, node);
++ E = kvmalloc_node(size, flags, node);
+|
+- E = kmalloc_node(size, \(GFP_KERNEL\|GFP_KERNEL|__GFP_NOWARN\), node);
+- if (E == NULL)
+- E = vmalloc_node(size, node);
++ E = kvmalloc_node(size, GFP_KERNEL, node);
+|
+- T x = kmalloc_node(size, flags | __GFP_NOWARN, node);
+- if (x == NULL)
+- x = vmalloc_node(size, node);
++ T x = kvmalloc_node(size, flags, node);
+|
+- T x = kmalloc_node(size, \(GFP_KERNEL\|GFP_KERNEL|__GFP_NOWARN\), node);
+- if (x == NULL)
+- x = vmalloc_node(size, node);
++ T x = kvmalloc_node(size, GFP_KERNEL, node);
+|
+- if (size cmp E1)
+- E = kvzalloc_node(size, flags, node);
+- else
+- E = vzalloc_node(size, node);
++ E = kvzalloc_node(size, flags, node);
+|
+- if (size cmp E1)
+- E = kvzalloc_node(size, \(GFP_KERNEL\|GFP_KERNEL|__GFP_NOWARN\), node);
+- else
+- E = vzalloc_node(size, node);
++ E = kvzalloc_node(size, GFP_KERNEL, node);
+|
+- E = kvzalloc_node(size, flags | __GFP_NOWARN, node);
+- if (E == NULL)
+- E = vzalloc_node(size, node);
++ E = kvzalloc_node(size, flags, node);
+|
+- E = kvzalloc_node(size, \(GFP_KERNEL\|GFP_KERNEL|__GFP_NOWARN\), node);
+- if (E == NULL)
+- E = vzalloc_node(size, node);
++ E = kvzalloc_node(size, GFP_KERNEL, node);
+|
+- T x = kvzalloc_node(size, flags | __GFP_NOWARN, node);
+- if (x == NULL)
+- x = vzalloc_node(size, node);
++ T x = kvzalloc_node(size, flags, node);
+|
+- T x = kvzalloc_node(size, \(GFP_KERNEL\|GFP_KERNEL|__GFP_NOWARN\), node);
+- if (x == NULL)
+- x = vzalloc_node(size, node);
++ T x = kvzalloc_node(size, GFP_KERNEL, node);
+)
+
+@depends on patch@
+expression E;
+position p : script:python() { relevant(p) };
+@@
+
+- if (is_vmalloc_addr(E))@p
+- vfree(E);
+- else
+- kfree(E);
++ kvfree(E);
+
+@script: python depends on report@
+p << kvmalloc.p;
+@@
+
+coccilib.report.print_report(p[0], "WARNING opportunity for kvmalloc")
+
+@script: python depends on org@
+p << kvmalloc.p;
+@@
+
+coccilib.org.print_todo(p[0], "WARNING opportunity for kvmalloc")
+
+@script: python depends on report@
+p << kvfree.p;
+@@
+
+coccilib.report.print_report(p[0], "WARNING opportunity for kvfree")
+
+@script: python depends on org@
+p << kvfree.p;
+@@
+
+coccilib.org.print_todo(p[0], "WARNING opportunity for kvfree")
diff --git a/scripts/coccinelle/free/ifnullfree.cocci b/scripts/coccinelle/free/ifnullfree.cocci
index 2045391e36a0..285b92d5c665 100644
--- a/scripts/coccinelle/free/ifnullfree.cocci
+++ b/scripts/coccinelle/free/ifnullfree.cocci
@@ -21,8 +21,14 @@ expression E;
(
kfree(E);
|
+ kvfree(E);
+|
kfree_sensitive(E);
|
+ kvfree_sensitive(E, ...);
+|
+ vfree(E);
+|
debugfs_remove(E);
|
debugfs_remove_recursive(E);
@@ -42,9 +48,10 @@ position p;
@@
* if (E != NULL)
-* \(kfree@p\|kfree_sensitive@p\|debugfs_remove@p\|debugfs_remove_recursive@p\|
+* \(kfree@p\|kvfree@p\|kfree_sensitive@p\|kvfree_sensitive@p\|vfree@p\|
+* debugfs_remove@p\|debugfs_remove_recursive@p\|
* usb_free_urb@p\|kmem_cache_destroy@p\|mempool_destroy@p\|
-* dma_pool_destroy@p\)(E);
+* dma_pool_destroy@p\)(E, ...);
@script:python depends on org@
p << r.p;
diff --git a/scripts/coccinelle/iterators/for_each_child.cocci b/scripts/coccinelle/iterators/for_each_child.cocci
new file mode 100644
index 000000000000..bc394615948e
--- /dev/null
+++ b/scripts/coccinelle/iterators/for_each_child.cocci
@@ -0,0 +1,358 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Adds missing of_node_put() before return/break/goto statement within a for_each iterator for child nodes.
+//# False positives can be due to function calls within the for_each
+//# loop that may encapsulate an of_node_put.
+///
+// Confidence: High
+// Copyright: (C) 2020 Sumera Priyadarsini
+// URL: http://coccinelle.lip6.fr
+// Options: --no-includes --include-headers
+
+virtual patch
+virtual context
+virtual org
+virtual report
+
+@r@
+local idexpression n;
+expression e1,e2;
+iterator name for_each_node_by_name, for_each_node_by_type,
+for_each_compatible_node, for_each_matching_node,
+for_each_matching_node_and_match, for_each_child_of_node,
+for_each_available_child_of_node, for_each_node_with_property;
+iterator i;
+statement S;
+expression list [n1] es;
+@@
+
+(
+(
+for_each_node_by_name(n,e1) S
+|
+for_each_node_by_type(n,e1) S
+|
+for_each_compatible_node(n,e1,e2) S
+|
+for_each_matching_node(n,e1) S
+|
+for_each_matching_node_and_match(n,e1,e2) S
+|
+for_each_child_of_node(e1,n) S
+|
+for_each_available_child_of_node(e1,n) S
+|
+for_each_node_with_property(n,e1) S
+)
+&
+i(es,n,...) S
+)
+
+@ruleone depends on patch && !context && !org && !report@
+
+local idexpression r.n;
+iterator r.i,i1;
+expression e;
+expression list [r.n1] es;
+statement S;
+@@
+
+ i(es,n,...) {
+ ...
+(
+ of_node_put(n);
+|
+ e = n
+|
+ return n;
+|
+ i1(...,n,...) S
+|
+- return of_node_get(n);
++ return n;
+|
++ of_node_put(n);
+? return ...;
+)
+ ... when any
+ }
+
+@ruletwo depends on patch && !context && !org && !report@
+
+local idexpression r.n;
+iterator r.i,i1,i2;
+expression e,e1;
+expression list [r.n1] es;
+statement S,S2;
+@@
+
+ i(es,n,...) {
+ ...
+(
+ of_node_put(n);
+|
+ e = n
+|
+ i1(...,n,...) S
+|
++ of_node_put(n);
+? break;
+)
+ ... when any
+ }
+... when != n
+ when strict
+ when forall
+(
+ n = e1;
+|
+?i2(...,n,...) S2
+)
+
+@rulethree depends on patch && !context && !org && !report exists@
+
+local idexpression r.n;
+iterator r.i,i1,i2;
+expression e,e1;
+identifier l;
+expression list [r.n1] es;
+statement S,S2;
+@@
+
+ i(es,n,...) {
+ ...
+(
+ of_node_put(n);
+|
+ e = n
+|
+ i1(...,n,...) S
+|
++ of_node_put(n);
+? goto l;
+)
+ ... when any
+ }
+... when exists
+l: ... when != n
+ when strict
+ when forall
+(
+ n = e1;
+|
+?i2(...,n,...) S2
+)
+
+// ----------------------------------------------------------------------------
+
+@ruleone_context depends on !patch && (context || org || report) exists@
+statement S;
+expression e;
+expression list[r.n1] es;
+iterator r.i, i1;
+local idexpression r.n;
+position j0, j1;
+@@
+
+ i@j0(es,n,...) {
+ ...
+(
+ of_node_put(n);
+|
+ e = n
+|
+ return n;
+|
+ i1(...,n,...) S
+|
+ return @j1 ...;
+)
+ ... when any
+ }
+
+@ruleone_disj depends on !patch && (context || org || report)@
+expression list[r.n1] es;
+iterator r.i;
+local idexpression r.n;
+position ruleone_context.j0, ruleone_context.j1;
+@@
+
+* i@j0(es,n,...) {
+ ...
+*return @j1...;
+ ... when any
+ }
+
+@ruletwo_context depends on !patch && (context || org || report) exists@
+statement S, S2;
+expression e, e1;
+expression list[r.n1] es;
+iterator r.i, i1, i2;
+local idexpression r.n;
+position j0, j2;
+@@
+
+ i@j0(es,n,...) {
+ ...
+(
+ of_node_put(n);
+|
+ e = n
+|
+ i1(...,n,...) S
+|
+ break@j2;
+)
+ ... when any
+ }
+... when != n
+ when strict
+ when forall
+(
+ n = e1;
+|
+?i2(...,n,...) S2
+)
+
+@ruletwo_disj depends on !patch && (context || org || report)@
+statement S2;
+expression e1;
+expression list[r.n1] es;
+iterator r.i, i2;
+local idexpression r.n;
+position ruletwo_context.j0, ruletwo_context.j2;
+@@
+
+* i@j0(es,n,...) {
+ ...
+*break @j2;
+ ... when any
+ }
+... when != n
+ when strict
+ when forall
+(
+ n = e1;
+|
+?i2(...,n,...) S2
+)
+
+@rulethree_context depends on !patch && (context || org || report) exists@
+identifier l;
+statement S,S2;
+expression e, e1;
+expression list[r.n1] es;
+iterator r.i, i1, i2;
+local idexpression r.n;
+position j0, j3;
+@@
+
+ i@j0(es,n,...) {
+ ...
+(
+ of_node_put(n);
+|
+ e = n
+|
+ i1(...,n,...) S
+|
+ goto l@j3;
+)
+ ... when any
+ }
+... when exists
+l:
+... when != n
+ when strict
+ when forall
+(
+ n = e1;
+|
+?i2(...,n,...) S2
+)
+
+@rulethree_disj depends on !patch && (context || org || report) exists@
+identifier l;
+statement S2;
+expression e1;
+expression list[r.n1] es;
+iterator r.i, i2;
+local idexpression r.n;
+position rulethree_context.j0, rulethree_context.j3;
+@@
+
+* i@j0(es,n,...) {
+ ...
+*goto l@j3;
+ ... when any
+ }
+... when exists
+ l:
+ ... when != n
+ when strict
+ when forall
+(
+ n = e1;
+|
+?i2(...,n,...) S2
+)
+
+// ----------------------------------------------------------------------------
+
+@script:python ruleone_org depends on org@
+i << r.i;
+j0 << ruleone_context.j0;
+j1 << ruleone_context. j1;
+@@
+
+msg = "WARNING: Function \"%s\" should have of_node_put() before return " % (i)
+coccilib.org.print_safe_todo(j0[0], msg)
+coccilib.org.print_link(j1[0], "")
+
+@script:python ruletwo_org depends on org@
+i << r.i;
+j0 << ruletwo_context.j0;
+j2 << ruletwo_context.j2;
+@@
+
+msg = "WARNING: Function \"%s\" should have of_node_put() before break " % (i)
+coccilib.org.print_safe_todo(j0[0], msg)
+coccilib.org.print_link(j2[0], "")
+
+@script:python rulethree_org depends on org@
+i << r.i;
+j0 << rulethree_context.j0;
+j3 << rulethree_context.j3;
+@@
+
+msg = "WARNING: Function \"%s\" should have of_node_put() before goto " % (i)
+coccilib.org.print_safe_todo(j0[0], msg)
+coccilib.org.print_link(j3[0], "")
+
+// ----------------------------------------------------------------------------
+
+@script:python ruleone_report depends on report@
+i << r.i;
+j0 << ruleone_context.j0;
+j1 << ruleone_context.j1;
+@@
+
+msg = "WARNING: Function \"%s\" should have of_node_put() before return around line %s." % (i, j1[0].line)
+coccilib.report.print_report(j0[0], msg)
+
+@script:python ruletwo_report depends on report@
+i << r.i;
+j0 << ruletwo_context.j0;
+j2 << ruletwo_context.j2;
+@@
+
+msg = "WARNING: Function \"%s\" should have of_node_put() before break around line %s." % (i,j2[0].line)
+coccilib.report.print_report(j0[0], msg)
+
+@script:python rulethree_report depends on report@
+i << r.i;
+j0 << rulethree_context.j0;
+j3 << rulethree_context.j3;
+@@
+
+msg = "WARNING: Function \"%s\" should have of_node_put() before goto around lines %s." % (i,j3[0].line)
+coccilib.report.print_report(j0[0], msg)
diff --git a/scripts/coccinelle/misc/excluded_middle.cocci b/scripts/coccinelle/misc/excluded_middle.cocci
new file mode 100644
index 000000000000..ab28393e4843
--- /dev/null
+++ b/scripts/coccinelle/misc/excluded_middle.cocci
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0-only
+///
+/// Condition !A || A && B is equivalent to !A || B.
+///
+// Confidence: High
+// Copyright: (C) 2020 Denis Efremov ISPRAS
+// Options: --no-includes --include-headers
+
+virtual patch
+virtual context
+virtual org
+virtual report
+
+@r depends on !patch@
+expression A, B;
+position p;
+@@
+
+* !A || (A &&@p B)
+
+@depends on patch@
+expression A, B;
+@@
+
+ !A ||
+- (A && B)
++ B
+
+@script:python depends on report@
+p << r.p;
+@@
+
+coccilib.report.print_report(p[0], "WARNING !A || A && B is equivalent to !A || B")
+
+@script:python depends on org@
+p << r.p;
+@@
+
+coccilib.org.print_todo(p[0], "WARNING !A || A && B is equivalent to !A || B")
diff --git a/scripts/coccinelle/misc/flexible_array.cocci b/scripts/coccinelle/misc/flexible_array.cocci
new file mode 100644
index 000000000000..947fbaff82a9
--- /dev/null
+++ b/scripts/coccinelle/misc/flexible_array.cocci
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0-only
+///
+/// Zero-length and one-element arrays are deprecated, see
+/// Documentation/process/deprecated.rst
+/// Flexible-array members should be used instead.
+///
+//
+// Confidence: High
+// Copyright: (C) 2020 Denis Efremov ISPRAS.
+// Comments:
+// Options: --no-includes --include-headers
+
+virtual context
+virtual report
+virtual org
+virtual patch
+
+@initialize:python@
+@@
+def relevant(positions):
+ for p in positions:
+ if "uapi" in p.file:
+ return False
+ return True
+
+@r depends on !patch@
+identifier name, array;
+type T;
+position p : script:python() { relevant(p) };
+@@
+
+(
+ struct name {
+ ...
+* T array@p[\(0\|1\)];
+ };
+|
+ struct {
+ ...
+* T array@p[\(0\|1\)];
+ };
+|
+ union name {
+ ...
+* T array@p[\(0\|1\)];
+ };
+|
+ union {
+ ...
+* T array@p[\(0\|1\)];
+ };
+)
+
+@depends on patch@
+identifier name, array;
+type T;
+position p : script:python() { relevant(p) };
+@@
+
+(
+ struct name {
+ ...
+ T array@p[
+- 0
+ ];
+ };
+|
+ struct {
+ ...
+ T array@p[
+- 0
+ ];
+ };
+)
+
+@script: python depends on report@
+p << r.p;
+@@
+
+msg = "WARNING use flexible-array member instead (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)"
+coccilib.report.print_report(p[0], msg)
+
+@script: python depends on org@
+p << r.p;
+@@
+
+msg = "WARNING use flexible-array member instead (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)"
+coccilib.org.print_todo(p[0], msg)
diff --git a/scripts/coccinelle/misc/uninitialized_var.cocci b/scripts/coccinelle/misc/uninitialized_var.cocci
new file mode 100644
index 000000000000..8fa845cefe11
--- /dev/null
+++ b/scripts/coccinelle/misc/uninitialized_var.cocci
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0-only
+///
+/// Please, don't reintroduce uninitialized_var().
+/// From Documentation/process/deprecated.rst:
+/// For any compiler warnings about uninitialized variables, just add
+/// an initializer. Using warning-silencing tricks is dangerous as it
+/// papers over real bugs (or can in the future), and suppresses unrelated
+/// compiler warnings (e.g. "unused variable"). If the compiler thinks it
+/// is uninitialized, either simply initialize the variable or make compiler
+/// changes. Keep in mind that in most cases, if an initialization is
+/// obviously redundant, the compiler's dead-store elimination pass will make
+/// sure there are no needless variable writes.
+///
+// Confidence: High
+// Copyright: (C) 2020 Denis Efremov ISPRAS
+// Options: --no-includes --include-headers
+//
+
+virtual context
+virtual report
+virtual org
+
+@r@
+identifier var;
+type T;
+position p;
+@@
+
+(
+* T var =@p var;
+|
+* T var =@p *(&(var));
+|
+* var =@p var
+|
+* var =@p *(&(var))
+)
+
+@script:python depends on report@
+p << r.p;
+@@
+
+coccilib.report.print_report(p[0],
+ "WARNING this kind of initialization is deprecated (https://www.kernel.org/doc/html/latest/process/deprecated.html#uninitialized-var)")
+
+@script:python depends on org@
+p << r.p;
+@@
+
+coccilib.org.print_todo(p[0],
+ "WARNING this kind of initialization is deprecated (https://www.kernel.org/doc/html/latest/process/deprecated.html#uninitialized-var)")
diff --git a/scripts/const_structs.checkpatch b/scripts/const_structs.checkpatch
index e9df9cc28a85..1aae4f4fdacc 100644
--- a/scripts/const_structs.checkpatch
+++ b/scripts/const_structs.checkpatch
@@ -39,6 +39,9 @@ nlmsvc_binding
nvkm_device_chip
of_device_id
pci_raw_ops
+phy_ops
+pinctrl_ops
+pinmux_ops
pipe_buf_operations
platform_hibernation_ops
platform_suspend_ops
diff --git a/scripts/gdb/linux/proc.py b/scripts/gdb/linux/proc.py
index 6a56bba233a9..09cd871925a5 100644
--- a/scripts/gdb/linux/proc.py
+++ b/scripts/gdb/linux/proc.py
@@ -167,6 +167,9 @@ values of that process namespace"""
if not namespace:
raise gdb.GdbError("No namespace for current process")
+ gdb.write("{:^18} {:^15} {:>9} {} {} options\n".format(
+ "mount", "super_block", "devname", "pathname", "fstype"))
+
for vfs in lists.list_for_each_entry(namespace['list'],
mount_ptr_type, "mnt_list"):
devname = vfs['mnt_devname'].string()
@@ -190,14 +193,10 @@ values of that process namespace"""
m_flags = int(vfs['mnt']['mnt_flags'])
rd = "ro" if (s_flags & constants.LX_SB_RDONLY) else "rw"
- gdb.write(
- "{} {} {} {}{}{} 0 0\n"
- .format(devname,
- pathname,
- fstype,
- rd,
- info_opts(FS_INFO, s_flags),
- info_opts(MNT_INFO, m_flags)))
+ gdb.write("{} {} {} {} {} {}{}{} 0 0\n".format(
+ vfs.format_string(), superblock.format_string(), devname,
+ pathname, fstype, rd, info_opts(FS_INFO, s_flags),
+ info_opts(MNT_INFO, m_flags)))
LxMounts()
diff --git a/scripts/gdb/linux/tasks.py b/scripts/gdb/linux/tasks.py
index 0301dc1e0138..17ec19e9b5bf 100644
--- a/scripts/gdb/linux/tasks.py
+++ b/scripts/gdb/linux/tasks.py
@@ -73,11 +73,12 @@ class LxPs(gdb.Command):
super(LxPs, self).__init__("lx-ps", gdb.COMMAND_DATA)
def invoke(self, arg, from_tty):
+ gdb.write("{:>10} {:>12} {:>7}\n".format("TASK", "PID", "COMM"))
for task in task_lists():
- gdb.write("{address} {pid} {comm}\n".format(
- address=task,
- pid=task["pid"],
- comm=task["comm"].string()))
+ gdb.write("{} {:^5} {}\n".format(
+ task.format_string().split()[0],
+ task["pid"].format_string(),
+ task["comm"].string()))
LxPs()
diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index 484d2fbf5921..2075db0c08b8 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -541,6 +541,9 @@ foreach my $file (@ARGV) {
die "$P: file '${file}' not found\n";
}
}
+ if ($from_filename && (vcs_exists() && !vcs_file_exists($file))) {
+ warn "$P: file '$file' not found in version control $!\n";
+ }
if ($from_filename || ($file ne "&STDIN" && vcs_file_exists($file))) {
$file =~ s/^\Q${cur_path}\E//; #strip any absolute path
$file =~ s/^\Q${lk_path}\E//; #or the path to the lk tree
@@ -954,8 +957,10 @@ sub get_maintainers {
foreach my $file (@files) {
if ($email &&
- ($email_git || ($email_git_fallback &&
- !$exact_pattern_match_hash{$file}))) {
+ ($email_git ||
+ ($email_git_fallback &&
+ $file !~ /MAINTAINERS$/ &&
+ !$exact_pattern_match_hash{$file}))) {
vcs_file_signoffs($file);
}
if ($email && $email_git_blame) {
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index 724528f4b7d6..c8f6b11d5da1 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -56,6 +56,13 @@ Output format selection (mutually exclusive):
-rst Output reStructuredText format.
-none Do not output documentation, only warnings.
+Output format selection modifier (affects only ReST output):
+
+ -sphinx-version Use the ReST C domain dialect compatible with an
+ specific Sphinx Version.
+ If not specified, kernel-doc will auto-detect using
+ the sphinx-build version found on PATH.
+
Output selection (mutually exclusive):
-export Only output documentation for symbols that have been
exported using EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL()
@@ -66,9 +73,8 @@ Output selection (mutually exclusive):
-function NAME Only output documentation for the given function(s)
or DOC: section title(s). All other functions and DOC:
sections are ignored. May be specified multiple times.
- -nofunction NAME Do NOT output documentation for the given function(s);
- only output documentation for the other functions and
- DOC: sections. May be specified multiple times.
+ -nosymbol NAME Exclude the specified symbols from the output
+ documentation. May be specified multiple times.
Output selection modifiers:
-no-doc-sections Do not output DOC: sections.
@@ -271,6 +277,8 @@ if ($#ARGV == -1) {
}
my $kernelversion;
+my ($sphinx_major, $sphinx_minor, $sphinx_patch);
+
my $dohighlight = "";
my $verbose = 0;
@@ -286,9 +294,8 @@ my $modulename = "Kernel API";
use constant {
OUTPUT_ALL => 0, # output all symbols and doc sections
OUTPUT_INCLUDE => 1, # output only specified symbols
- OUTPUT_EXCLUDE => 2, # output everything except specified symbols
- OUTPUT_EXPORTED => 3, # output exported symbols
- OUTPUT_INTERNAL => 4, # output non-exported symbols
+ OUTPUT_EXPORTED => 2, # output exported symbols
+ OUTPUT_INTERNAL => 3, # output non-exported symbols
};
my $output_selection = OUTPUT_ALL;
my $show_not_found = 0; # No longer used
@@ -313,6 +320,7 @@ my $man_date = ('January', 'February', 'March', 'April', 'May', 'June',
# CAVEAT EMPTOR! Some of the others I localised may not want to be, which
# could cause "use of undefined value" or other bugs.
my ($function, %function_table, %parametertypes, $declaration_purpose);
+my %nosymbol_table = ();
my $declaration_start_line;
my ($type, $declaration_name, $return_type);
my ($newsection, $newcontents, $prototype, $brcount, %source_map);
@@ -432,10 +440,9 @@ while ($ARGV[0] =~ m/^--?(.*)/) {
$output_selection = OUTPUT_INCLUDE;
$function = shift @ARGV;
$function_table{$function} = 1;
- } elsif ($cmd eq "nofunction") { # output all except specific functions
- $output_selection = OUTPUT_EXCLUDE;
- $function = shift @ARGV;
- $function_table{$function} = 1;
+ } elsif ($cmd eq "nosymbol") { # Exclude specific symbols
+ my $symbol = shift @ARGV;
+ $nosymbol_table{$symbol} = 1;
} elsif ($cmd eq "export") { # only exported symbols
$output_selection = OUTPUT_EXPORTED;
%function_table = ();
@@ -457,6 +464,23 @@ while ($ARGV[0] =~ m/^--?(.*)/) {
$enable_lineno = 1;
} elsif ($cmd eq 'show-not-found') {
$show_not_found = 1; # A no-op but don't fail
+ } elsif ($cmd eq "sphinx-version") {
+ my $ver_string = shift @ARGV;
+ if ($ver_string =~ m/^(\d+)(\.\d+)?(\.\d+)?/) {
+ $sphinx_major = $1;
+ if (defined($2)) {
+ $sphinx_minor = substr($2,1);
+ } else {
+ $sphinx_minor = 0;
+ }
+ if (defined($3)) {
+ $sphinx_patch = substr($3,1)
+ } else {
+ $sphinx_patch = 0;
+ }
+ } else {
+ die "Sphinx version should either major.minor or major.minor.patch format\n";
+ }
} else {
# Unknown argument
usage();
@@ -465,6 +489,51 @@ while ($ARGV[0] =~ m/^--?(.*)/) {
# continue execution near EOF;
+# The C domain dialect changed on Sphinx 3. So, we need to check the
+# version in order to produce the right tags.
+sub findprog($)
+{
+ foreach(split(/:/, $ENV{PATH})) {
+ return "$_/$_[0]" if(-x "$_/$_[0]");
+ }
+}
+
+sub get_sphinx_version()
+{
+ my $ver;
+
+ my $cmd = "sphinx-build";
+ if (!findprog($cmd)) {
+ my $cmd = "sphinx-build3";
+ if (!findprog($cmd)) {
+ $sphinx_major = 1;
+ $sphinx_minor = 2;
+ $sphinx_patch = 0;
+ printf STDERR "Warning: Sphinx version not found. Using default (Sphinx version %d.%d.%d)\n",
+ $sphinx_major, $sphinx_minor, $sphinx_patch;
+ return;
+ }
+ }
+
+ open IN, "$cmd --version 2>&1 |";
+ while (<IN>) {
+ if (m/^\s*sphinx-build\s+([\d]+)\.([\d\.]+)(\+\/[\da-f]+)?$/) {
+ $sphinx_major = $1;
+ $sphinx_minor = $2;
+ $sphinx_patch = $3;
+ last;
+ }
+ # Sphinx 1.2.x uses a different format
+ if (m/^\s*Sphinx.*\s+([\d]+)\.([\d\.]+)$/) {
+ $sphinx_major = $1;
+ $sphinx_minor = $2;
+ $sphinx_patch = $3;
+ last;
+ }
+ }
+ close IN;
+}
+
# get kernel version from env
sub get_kernel_version() {
my $version = 'unknown kernel version';
@@ -531,11 +600,11 @@ sub dump_doc_section {
return;
}
+ return if (defined($nosymbol_table{$name}));
+
if (($output_selection == OUTPUT_ALL) ||
- ($output_selection == OUTPUT_INCLUDE &&
- defined($function_table{$name})) ||
- ($output_selection == OUTPUT_EXCLUDE &&
- !defined($function_table{$name})))
+ (($output_selection == OUTPUT_INCLUDE) &&
+ defined($function_table{$name})))
{
dump_section($file, $name, $contents);
output_blockhead({'sectionlist' => \@sectionlist,
@@ -618,10 +687,10 @@ sub output_function_man(%) {
$type = $args{'parametertypes'}{$parameter};
if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) {
# pointer-to-function
- print ".BI \"" . $parenth . $1 . "\" " . $parameter . " \") (" . $2 . ")" . $post . "\"\n";
+ print ".BI \"" . $parenth . $1 . "\" " . " \") (" . $2 . ")" . $post . "\"\n";
} else {
$type =~ s/([^\*])$/$1 /;
- print ".BI \"" . $parenth . $type . "\" " . $parameter . " \"" . $post . "\"\n";
+ print ".BI \"" . $parenth . $type . "\" " . " \"" . $post . "\"\n";
}
$count++;
$parenth = "";
@@ -761,6 +830,8 @@ sub output_blockhead_rst(%) {
my ($parameter, $section);
foreach $section (@{$args{'sectionlist'}}) {
+ next if (defined($nosymbol_table{$section}));
+
if ($output_selection != OUTPUT_INCLUDE) {
print "**$section**\n\n";
}
@@ -846,16 +917,37 @@ sub output_function_rst(%) {
my ($parameter, $section);
my $oldprefix = $lineprefix;
my $start = "";
-
- if ($args{'typedef'}) {
- print ".. c:type:: ". $args{'function'} . "\n\n";
- print_lineno($declaration_start_line);
- print " **Typedef**: ";
- $lineprefix = "";
- output_highlight_rst($args{'purpose'});
- $start = "\n\n**Syntax**\n\n ``";
+ my $is_macro = 0;
+
+ if ($sphinx_major < 3) {
+ if ($args{'typedef'}) {
+ print ".. c:type:: ". $args{'function'} . "\n\n";
+ print_lineno($declaration_start_line);
+ print " **Typedef**: ";
+ $lineprefix = "";
+ output_highlight_rst($args{'purpose'});
+ $start = "\n\n**Syntax**\n\n ``";
+ $is_macro = 1;
+ } else {
+ print ".. c:function:: ";
+ }
} else {
- print ".. c:function:: ";
+ if ($args{'typedef'} || $args{'functiontype'} eq "") {
+ $is_macro = 1;
+ print ".. c:macro:: ". $args{'function'} . "\n\n";
+ } else {
+ print ".. c:function:: ";
+ }
+
+ if ($args{'typedef'}) {
+ print_lineno($declaration_start_line);
+ print " **Typedef**: ";
+ $lineprefix = "";
+ output_highlight_rst($args{'purpose'});
+ $start = "\n\n**Syntax**\n\n ``";
+ } else {
+ print "``" if ($is_macro);
+ }
}
if ($args{'functiontype'} ne "") {
$start .= $args{'functiontype'} . " " . $args{'function'} . " (";
@@ -876,13 +968,15 @@ sub output_function_rst(%) {
# pointer-to-function
print $1 . $parameter . ") (" . $2 . ")";
} else {
- print $type . " " . $parameter;
+ print $type;
}
}
- if ($args{'typedef'}) {
- print ");``\n\n";
+ if ($is_macro) {
+ print ")``\n\n";
} else {
print ")\n\n";
+ }
+ if (!$args{'typedef'}) {
print_lineno($declaration_start_line);
$lineprefix = " ";
output_highlight_rst($args{'purpose'});
@@ -897,7 +991,7 @@ sub output_function_rst(%) {
$type = $args{'parametertypes'}{$parameter};
if ($type ne "") {
- print "``$type $parameter``\n";
+ print "``$type``\n";
} else {
print "``$parameter``\n";
}
@@ -938,9 +1032,14 @@ sub output_enum_rst(%) {
my ($parameter);
my $oldprefix = $lineprefix;
my $count;
- my $name = "enum " . $args{'enum'};
- print "\n\n.. c:type:: " . $name . "\n\n";
+ if ($sphinx_major < 3) {
+ my $name = "enum " . $args{'enum'};
+ print "\n\n.. c:type:: " . $name . "\n\n";
+ } else {
+ my $name = $args{'enum'};
+ print "\n\n.. c:enum:: " . $name . "\n\n";
+ }
print_lineno($declaration_start_line);
$lineprefix = " ";
output_highlight_rst($args{'purpose'});
@@ -966,8 +1065,13 @@ sub output_typedef_rst(%) {
my %args = %{$_[0]};
my ($parameter);
my $oldprefix = $lineprefix;
- my $name = "typedef " . $args{'typedef'};
+ my $name;
+ if ($sphinx_major < 3) {
+ $name = "typedef " . $args{'typedef'};
+ } else {
+ $name = $args{'typedef'};
+ }
print "\n\n.. c:type:: " . $name . "\n\n";
print_lineno($declaration_start_line);
$lineprefix = " ";
@@ -982,9 +1086,14 @@ sub output_struct_rst(%) {
my %args = %{$_[0]};
my ($parameter);
my $oldprefix = $lineprefix;
- my $name = $args{'type'} . " " . $args{'struct'};
- print "\n\n.. c:type:: " . $name . "\n\n";
+ if ($sphinx_major < 3) {
+ my $name = $args{'type'} . " " . $args{'struct'};
+ print "\n\n.. c:type:: " . $name . "\n\n";
+ } else {
+ my $name = $args{'struct'};
+ print "\n\n.. c:struct:: " . $name . "\n\n";
+ }
print_lineno($declaration_start_line);
$lineprefix = " ";
output_highlight_rst($args{'purpose'});
@@ -1043,12 +1152,14 @@ sub output_declaration {
my $name = shift;
my $functype = shift;
my $func = "output_${functype}_$output_mode";
+
+ return if (defined($nosymbol_table{$name}));
+
if (($output_selection == OUTPUT_ALL) ||
(($output_selection == OUTPUT_INCLUDE ||
$output_selection == OUTPUT_EXPORTED) &&
defined($function_table{$name})) ||
- (($output_selection == OUTPUT_EXCLUDE ||
- $output_selection == OUTPUT_INTERNAL) &&
+ ($output_selection == OUTPUT_INTERNAL &&
!($functype eq "function" && defined($function_table{$name}))))
{
&$func(@_);
@@ -1229,6 +1340,8 @@ sub show_warnings($$) {
my $functype = shift;
my $name = shift;
+ return 0 if (defined($nosymbol_table{$name}));
+
return 1 if ($output_selection == OUTPUT_ALL);
if ($output_selection == OUTPUT_EXPORTED) {
@@ -1252,27 +1365,28 @@ sub show_warnings($$) {
return 0;
}
}
- if ($output_selection == OUTPUT_EXCLUDE) {
- if (!defined($function_table{$name})) {
- return 1;
- } else {
- return 0;
- }
- }
die("Please add the new output type at show_warnings()");
}
sub dump_enum($$) {
my $x = shift;
my $file = shift;
+ my $members;
+
$x =~ s@/\*.*?\*/@@gos; # strip comments.
# strip #define macros inside enums
$x =~ s@#\s*((define|ifdef)\s+|endif)[^;]*;@@gos;
- if ($x =~ /enum\s+(\w*)\s*\{(.*)\}/) {
+ if ($x =~ /typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;/) {
+ $declaration_name = $2;
+ $members = $1;
+ } elsif ($x =~ /enum\s+(\w*)\s*\{(.*)\}/) {
$declaration_name = $1;
- my $members = $2;
+ $members = $2;
+ }
+
+ if ($declaration_name) {
my %_members;
$members =~ s/\s+$//;
@@ -1307,8 +1421,7 @@ sub dump_enum($$) {
'sections' => \%sections,
'purpose' => $declaration_purpose
});
- }
- else {
+ } else {
print STDERR "${file}:$.: error: Cannot parse enum!\n";
++$errors;
}
@@ -1403,7 +1516,7 @@ sub create_parameterlist($$$$) {
# Treat preprocessor directive as a typeless variable just to fill
# corresponding data structures "correctly". Catch it later in
# output_* subs.
- push_parameter($arg, "", $file);
+ push_parameter($arg, "", "", $file);
} elsif ($arg =~ m/\(.+\)\s*\(/) {
# pointer-to-function
$arg =~ tr/#/,/;
@@ -1412,7 +1525,7 @@ sub create_parameterlist($$$$) {
$type = $arg;
$type =~ s/([^\(]+\(\*?)\s*$param/$1/;
save_struct_actual($param);
- push_parameter($param, $type, $file, $declaration_name);
+ push_parameter($param, $type, $arg, $file, $declaration_name);
} elsif ($arg) {
$arg =~ s/\s*:\s*/:/g;
$arg =~ s/\s*\[/\[/g;
@@ -1437,26 +1550,28 @@ sub create_parameterlist($$$$) {
foreach $param (@args) {
if ($param =~ m/^(\*+)\s*(.*)/) {
save_struct_actual($2);
- push_parameter($2, "$type $1", $file, $declaration_name);
+
+ push_parameter($2, "$type $1", $arg, $file, $declaration_name);
}
elsif ($param =~ m/(.*?):(\d+)/) {
if ($type ne "") { # skip unnamed bit-fields
save_struct_actual($1);
- push_parameter($1, "$type:$2", $file, $declaration_name)
+ push_parameter($1, "$type:$2", $arg, $file, $declaration_name)
}
}
else {
save_struct_actual($param);
- push_parameter($param, $type, $file, $declaration_name);
+ push_parameter($param, $type, $arg, $file, $declaration_name);
}
}
}
}
}
-sub push_parameter($$$$) {
+sub push_parameter($$$$$) {
my $param = shift;
my $type = shift;
+ my $org_arg = shift;
my $file = shift;
my $declaration_name = shift;
@@ -1520,8 +1635,8 @@ sub push_parameter($$$$) {
# "[blah" in a parameter string;
###$param =~ s/\s*//g;
push @parameterlist, $param;
- $type =~ s/\s\s+/ /g;
- $parametertypes{$param} = $type;
+ $org_arg =~ s/\s\s+/ /g;
+ $parametertypes{$param} = $org_arg;
}
sub check_sections($$$$$) {
@@ -1595,7 +1710,7 @@ sub dump_function($$) {
my $file = shift;
my $noret = 0;
- print_lineno($.);
+ print_lineno($new_start_line);
$prototype =~ s/^static +//;
$prototype =~ s/^extern +//;
@@ -1672,30 +1787,48 @@ sub dump_function($$) {
return;
}
- my $prms = join " ", @parameterlist;
- check_sections($file, $declaration_name, "function", $sectcheck, $prms);
+ my $prms = join " ", @parameterlist;
+ check_sections($file, $declaration_name, "function", $sectcheck, $prms);
- # This check emits a lot of warnings at the moment, because many
- # functions don't have a 'Return' doc section. So until the number
- # of warnings goes sufficiently down, the check is only performed in
- # verbose mode.
- # TODO: always perform the check.
- if ($verbose && !$noret) {
- check_return_section($file, $declaration_name, $return_type);
- }
+ # This check emits a lot of warnings at the moment, because many
+ # functions don't have a 'Return' doc section. So until the number
+ # of warnings goes sufficiently down, the check is only performed in
+ # verbose mode.
+ # TODO: always perform the check.
+ if ($verbose && !$noret) {
+ check_return_section($file, $declaration_name, $return_type);
+ }
- output_declaration($declaration_name,
- 'function',
- {'function' => $declaration_name,
- 'module' => $modulename,
- 'functiontype' => $return_type,
- 'parameterlist' => \@parameterlist,
- 'parameterdescs' => \%parameterdescs,
- 'parametertypes' => \%parametertypes,
- 'sectionlist' => \@sectionlist,
- 'sections' => \%sections,
- 'purpose' => $declaration_purpose
- });
+ # The function parser can be called with a typedef parameter.
+ # Handle it.
+ if ($return_type =~ /typedef/) {
+ output_declaration($declaration_name,
+ 'function',
+ {'function' => $declaration_name,
+ 'typedef' => 1,
+ 'module' => $modulename,
+ 'functiontype' => $return_type,
+ 'parameterlist' => \@parameterlist,
+ 'parameterdescs' => \%parameterdescs,
+ 'parametertypes' => \%parametertypes,
+ 'sectionlist' => \@sectionlist,
+ 'sections' => \%sections,
+ 'purpose' => $declaration_purpose
+ });
+ } else {
+ output_declaration($declaration_name,
+ 'function',
+ {'function' => $declaration_name,
+ 'module' => $modulename,
+ 'functiontype' => $return_type,
+ 'parameterlist' => \@parameterlist,
+ 'parameterdescs' => \%parameterdescs,
+ 'parametertypes' => \%parametertypes,
+ 'sectionlist' => \@sectionlist,
+ 'sections' => \%sections,
+ 'purpose' => $declaration_purpose
+ });
+ }
}
sub reset_state {
@@ -1873,6 +2006,7 @@ sub process_export_file($) {
while (<IN>) {
if (/$export_symbol/) {
+ next if (defined($nosymbol_table{$2}));
$function_table{$2} = 1;
}
}
@@ -1904,7 +2038,7 @@ sub process_name($$) {
if (/$doc_block/o) {
$state = STATE_DOCBLOCK;
$contents = "";
- $new_start_line = $. + 1;
+ $new_start_line = $.;
if ( $1 eq "" ) {
$section = $section_intro;
@@ -1987,6 +2121,7 @@ sub process_body($$) {
if ($state == STATE_BODY_WITH_BLANK_LINE && /^\s*\*\s?\S/) {
dump_section($file, $section, $contents);
$section = $section_default;
+ $new_start_line = $.;
$contents = "";
}
@@ -2042,6 +2177,7 @@ sub process_body($$) {
$prototype = "";
$state = STATE_PROTO;
$brcount = 0;
+ $new_start_line = $. + 1;
} elsif (/$doc_content/) {
if ($1 eq "") {
if ($section eq $section_context) {
@@ -2189,7 +2325,7 @@ sub process_file($) {
$file = map_filename($orig_file);
- if (!open(IN,"<$file")) {
+ if (!open(IN_FILE,"<$file")) {
print STDERR "Error: Cannot open file $file\n";
++$errors;
return;
@@ -2198,9 +2334,9 @@ sub process_file($) {
$. = 1;
$section_counter = 0;
- while (<IN>) {
+ while (<IN_FILE>) {
while (s/\\\s*$//) {
- $_ .= <IN>;
+ $_ .= <IN_FILE>;
}
# Replace tabs by spaces
while ($_ =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e) {};
@@ -2232,9 +2368,14 @@ sub process_file($) {
print STDERR "${file}:1: warning: no structured comments found\n";
}
}
+ close IN_FILE;
}
+if ($output_mode eq "rst") {
+ get_sphinx_version() if (!$sphinx_major);
+}
+
$kernelversion = get_kernel_version();
# generate a sequence of code that will splice in highlighting information
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index 38415d251075..97cbfb31b762 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -38,9 +38,8 @@ FEATURE_TESTS_BASIC := \
get_current_dir_name \
gettid \
glibc \
- gtk2 \
- gtk2-infobar \
libbfd \
+ libbfd-buildid \
libcap \
libelf \
libelf-getphdrnum \
@@ -80,6 +79,8 @@ FEATURE_TESTS_EXTRA := \
compile-32 \
compile-x32 \
cplus-demangle \
+ gtk2 \
+ gtk2-infobar \
hello \
libbabeltrace \
libbfd-liberty \
@@ -110,8 +111,8 @@ FEATURE_DISPLAY ?= \
dwarf \
dwarf_getlocations \
glibc \
- gtk2 \
libbfd \
+ libbfd-buildid \
libcap \
libelf \
libnuma \
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index b2a2347c67ed..cdde783f3018 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -15,6 +15,7 @@ FILES= \
test-hello.bin \
test-libaudit.bin \
test-libbfd.bin \
+ test-libbfd-buildid.bin \
test-disassembler-four-args.bin \
test-reallocarray.bin \
test-libbfd-liberty.bin \
@@ -89,7 +90,7 @@ __BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(
###############################
$(OUTPUT)test-all.bin:
- $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -I/usr/include/slang -lslang $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma
+ $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -I/usr/include/slang -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma -lzstd
$(OUTPUT)test-hello.bin:
$(BUILD)
@@ -225,6 +226,9 @@ $(OUTPUT)test-libpython-version.bin:
$(OUTPUT)test-libbfd.bin:
$(BUILD) -DPACKAGE='"perf"' -lbfd -ldl
+$(OUTPUT)test-libbfd-buildid.bin:
+ $(BUILD) -DPACKAGE='"perf"' -lbfd -ldl
+
$(OUTPUT)test-disassembler-four-args.bin:
$(BUILD) -DPACKAGE='"perf"' -lbfd -lopcodes
diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c
index 5284e6e9c756..a04e81321c66 100644
--- a/tools/build/feature/test-all.c
+++ b/tools/build/feature/test-all.c
@@ -74,18 +74,14 @@
# include "test-libslang.c"
#undef main
-#define main main_test_gtk2
-# include "test-gtk2.c"
-#undef main
-
-#define main main_test_gtk2_infobar
-# include "test-gtk2-infobar.c"
-#undef main
-
#define main main_test_libbfd
# include "test-libbfd.c"
#undef main
+#define main main_test_libbfd_buildid
+# include "test-libbfd-buildid.c"
+#undef main
+
#define main main_test_backtrace
# include "test-backtrace.c"
#undef main
@@ -201,9 +197,8 @@ int main(int argc, char *argv[])
main_test_libelf_getshdrstrndx();
main_test_libunwind();
main_test_libslang();
- main_test_gtk2(argc, argv);
- main_test_gtk2_infobar(argc, argv);
main_test_libbfd();
+ main_test_libbfd_buildid();
main_test_backtrace();
main_test_libnuma();
main_test_numa_num_possible_cpus();
diff --git a/tools/build/feature/test-libbfd-buildid.c b/tools/build/feature/test-libbfd-buildid.c
new file mode 100644
index 000000000000..157644b04c05
--- /dev/null
+++ b/tools/build/feature/test-libbfd-buildid.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <bfd.h>
+
+int main(void)
+{
+ bfd *abfd = bfd_openr("Pedro", 0);
+ return abfd && (!abfd->build_id || abfd->build_id->size > 0x506564726f);
+}
diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
index 2208444ecb44..cfcdbd7be066 100644
--- a/tools/lib/perf/evlist.c
+++ b/tools/lib/perf/evlist.c
@@ -45,6 +45,9 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
if (!evsel->own_cpus || evlist->has_user_cpus) {
perf_cpu_map__put(evsel->cpus);
evsel->cpus = perf_cpu_map__get(evlist->cpus);
+ } else if (!evsel->system_wide && perf_cpu_map__empty(evlist->cpus)) {
+ perf_cpu_map__put(evsel->cpus);
+ evsel->cpus = perf_cpu_map__get(evlist->cpus);
} else if (evsel->cpus != evsel->own_cpus) {
perf_cpu_map__put(evsel->cpus);
evsel->cpus = perf_cpu_map__get(evsel->own_cpus);
diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h
index 842028858d66..988c539bedb6 100644
--- a/tools/lib/perf/include/perf/event.h
+++ b/tools/lib/perf/include/perf/event.h
@@ -201,10 +201,20 @@ struct perf_record_header_tracing_data {
__u32 size;
};
+#define PERF_RECORD_MISC_BUILD_ID_SIZE (1 << 15)
+
struct perf_record_header_build_id {
struct perf_event_header header;
pid_t pid;
- __u8 build_id[24];
+ union {
+ __u8 build_id[24];
+ struct {
+ __u8 data[20];
+ __u8 size;
+ __u8 reserved1__;
+ __u16 reserved2__;
+ };
+ };
char filename[];
};
@@ -324,6 +334,10 @@ struct perf_record_time_conv {
__u64 time_shift;
__u64 time_mult;
__u64 time_zero;
+ __u64 time_cycles;
+ __u64 time_mask;
+ bool cap_user_time_zero;
+ bool cap_user_time_short;
};
struct perf_record_header_feature {
diff --git a/tools/lib/traceevent/event-parse-api.c b/tools/lib/traceevent/event-parse-api.c
index 4faf52a65791..f8361e45d446 100644
--- a/tools/lib/traceevent/event-parse-api.c
+++ b/tools/lib/traceevent/event-parse-api.c
@@ -92,7 +92,7 @@ bool tep_test_flag(struct tep_handle *tep, enum tep_flag flag)
return false;
}
-unsigned short tep_data2host2(struct tep_handle *tep, unsigned short data)
+__hidden unsigned short data2host2(struct tep_handle *tep, unsigned short data)
{
unsigned short swap;
@@ -105,7 +105,7 @@ unsigned short tep_data2host2(struct tep_handle *tep, unsigned short data)
return swap;
}
-unsigned int tep_data2host4(struct tep_handle *tep, unsigned int data)
+__hidden unsigned int data2host4(struct tep_handle *tep, unsigned int data)
{
unsigned int swap;
@@ -120,8 +120,8 @@ unsigned int tep_data2host4(struct tep_handle *tep, unsigned int data)
return swap;
}
-unsigned long long
-tep_data2host8(struct tep_handle *tep, unsigned long long data)
+__hidden unsigned long long
+data2host8(struct tep_handle *tep, unsigned long long data)
{
unsigned long long swap;
diff --git a/tools/lib/traceevent/event-parse-local.h b/tools/lib/traceevent/event-parse-local.h
index d805a920af6f..fd4bbcfbb849 100644
--- a/tools/lib/traceevent/event-parse-local.h
+++ b/tools/lib/traceevent/event-parse-local.h
@@ -15,6 +15,8 @@ struct event_handler;
struct func_resolver;
struct tep_plugins_dir;
+#define __hidden __attribute__((visibility ("hidden")))
+
struct tep_handle {
int ref_count;
@@ -102,12 +104,20 @@ struct tep_print_parse {
struct tep_print_arg *len_as_arg;
};
-void tep_free_event(struct tep_event *event);
-void tep_free_format_field(struct tep_format_field *field);
-void tep_free_plugin_paths(struct tep_handle *tep);
-
-unsigned short tep_data2host2(struct tep_handle *tep, unsigned short data);
-unsigned int tep_data2host4(struct tep_handle *tep, unsigned int data);
-unsigned long long tep_data2host8(struct tep_handle *tep, unsigned long long data);
+void free_tep_event(struct tep_event *event);
+void free_tep_format_field(struct tep_format_field *field);
+void free_tep_plugin_paths(struct tep_handle *tep);
+
+unsigned short data2host2(struct tep_handle *tep, unsigned short data);
+unsigned int data2host4(struct tep_handle *tep, unsigned int data);
+unsigned long long data2host8(struct tep_handle *tep, unsigned long long data);
+
+/* access to the internal parser */
+int peek_char(void);
+void init_input_buf(const char *buf, unsigned long long size);
+unsigned long long get_input_buf_ptr(void);
+const char *get_input_buf(void);
+enum tep_event_type read_token(char **tok);
+void free_token(char *tok);
#endif /* _PARSE_EVENTS_INT_H */
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 5acc18b32606..fe58843d047c 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -54,19 +54,26 @@ static int show_warning = 1;
warning(fmt, ##__VA_ARGS__); \
} while (0)
-static void init_input_buf(const char *buf, unsigned long long size)
+/**
+ * init_input_buf - init buffer for parsing
+ * @buf: buffer to parse
+ * @size: the size of the buffer
+ *
+ * Initializes the internal buffer that tep_read_token() will parse.
+ */
+__hidden void init_input_buf(const char *buf, unsigned long long size)
{
input_buf = buf;
input_buf_siz = size;
input_buf_ptr = 0;
}
-const char *tep_get_input_buf(void)
+__hidden const char *get_input_buf(void)
{
return input_buf;
}
-unsigned long long tep_get_input_buf_ptr(void)
+__hidden unsigned long long get_input_buf_ptr(void)
{
return input_buf_ptr;
}
@@ -100,26 +107,13 @@ process_defined_func(struct trace_seq *s, void *data, int size,
static void free_func_handle(struct tep_function_handler *func);
-/**
- * tep_buffer_init - init buffer for parsing
- * @buf: buffer to parse
- * @size: the size of the buffer
- *
- * For use with tep_read_token(), this initializes the internal
- * buffer that tep_read_token() will parse.
- */
-void tep_buffer_init(const char *buf, unsigned long long size)
-{
- init_input_buf(buf, size);
-}
-
void breakpoint(void)
{
static int x;
x++;
}
-struct tep_print_arg *alloc_arg(void)
+static struct tep_print_arg *alloc_arg(void)
{
return calloc(1, sizeof(struct tep_print_arg));
}
@@ -962,22 +956,17 @@ static int __read_char(void)
return input_buf[input_buf_ptr++];
}
-static int __peek_char(void)
-{
- if (input_buf_ptr >= input_buf_siz)
- return -1;
-
- return input_buf[input_buf_ptr];
-}
-
/**
- * tep_peek_char - peek at the next character that will be read
+ * peek_char - peek at the next character that will be read
*
* Returns the next character read, or -1 if end of buffer.
*/
-int tep_peek_char(void)
+__hidden int peek_char(void)
{
- return __peek_char();
+ if (input_buf_ptr >= input_buf_siz)
+ return -1;
+
+ return input_buf[input_buf_ptr];
}
static int extend_token(char **tok, char *buf, int size)
@@ -1033,7 +1022,7 @@ static enum tep_event_type __read_token(char **tok)
case TEP_EVENT_OP:
switch (ch) {
case '-':
- next_ch = __peek_char();
+ next_ch = peek_char();
if (next_ch == '>') {
buf[i++] = __read_char();
break;
@@ -1045,7 +1034,7 @@ static enum tep_event_type __read_token(char **tok)
case '>':
case '<':
last_ch = ch;
- ch = __peek_char();
+ ch = peek_char();
if (ch != last_ch)
goto test_equal;
buf[i++] = __read_char();
@@ -1068,7 +1057,7 @@ static enum tep_event_type __read_token(char **tok)
return type;
test_equal:
- ch = __peek_char();
+ ch = peek_char();
if (ch == '=')
buf[i++] = __read_char();
goto out;
@@ -1122,7 +1111,7 @@ static enum tep_event_type __read_token(char **tok)
break;
}
- while (get_type(__peek_char()) == type) {
+ while (get_type(peek_char()) == type) {
if (i == (BUFSIZ - 1)) {
buf[i] = 0;
tok_size += BUFSIZ;
@@ -1191,13 +1180,26 @@ static enum tep_event_type force_token(const char *str, char **tok)
return type;
}
-static void free_token(char *tok)
+/**
+ * free_token - free a token returned by tep_read_token
+ * @token: the token to free
+ */
+__hidden void free_token(char *tok)
{
if (tok)
free(tok);
}
-static enum tep_event_type read_token(char **tok)
+/**
+ * read_token - access to utilities to use the tep parser
+ * @tok: The token to return
+ *
+ * This will parse tokens from the string given by
+ * tep_init_data().
+ *
+ * Returns the token type.
+ */
+__hidden enum tep_event_type read_token(char **tok)
{
enum tep_event_type type;
@@ -1214,29 +1216,6 @@ static enum tep_event_type read_token(char **tok)
return TEP_EVENT_NONE;
}
-/**
- * tep_read_token - access to utilities to use the tep parser
- * @tok: The token to return
- *
- * This will parse tokens from the string given by
- * tep_init_data().
- *
- * Returns the token type.
- */
-enum tep_event_type tep_read_token(char **tok)
-{
- return read_token(tok);
-}
-
-/**
- * tep_free_token - free a token returned by tep_read_token
- * @token: the token to free
- */
-void tep_free_token(char *token)
-{
- free_token(token);
-}
-
/* no newline */
static enum tep_event_type read_token_item(char **tok)
{
@@ -3459,12 +3438,12 @@ unsigned long long tep_read_number(struct tep_handle *tep,
case 1:
return *(unsigned char *)ptr;
case 2:
- return tep_data2host2(tep, *(unsigned short *)ptr);
+ return data2host2(tep, *(unsigned short *)ptr);
case 4:
- return tep_data2host4(tep, *(unsigned int *)ptr);
+ return data2host4(tep, *(unsigned int *)ptr);
case 8:
memcpy(&val, (ptr), sizeof(unsigned long long));
- return tep_data2host8(tep, val);
+ return data2host8(tep, val);
default:
/* BUG! */
return 0;
@@ -4190,7 +4169,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
f = tep_find_any_field(event, arg->string.string);
arg->string.offset = f->offset;
}
- str_offset = tep_data2host4(tep, *(unsigned int *)(data + arg->string.offset));
+ str_offset = data2host4(tep, *(unsigned int *)(data + arg->string.offset));
str_offset &= 0xffff;
print_str_to_seq(s, format, len_arg, ((char *)data) + str_offset);
break;
@@ -4208,7 +4187,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
f = tep_find_any_field(event, arg->bitmask.bitmask);
arg->bitmask.offset = f->offset;
}
- bitmask_offset = tep_data2host4(tep, *(unsigned int *)(data + arg->bitmask.offset));
+ bitmask_offset = data2host4(tep, *(unsigned int *)(data + arg->bitmask.offset));
bitmask_size = bitmask_offset >> 16;
bitmask_offset &= 0xffff;
print_bitmask_to_seq(tep, s, format, len_arg,
@@ -6750,7 +6729,7 @@ static int find_event_handle(struct tep_handle *tep, struct tep_event *event)
}
/**
- * __tep_parse_format - parse the event format
+ * parse_format - parse the event format
* @buf: the buffer storing the event format string
* @size: the size of @buf
* @sys: the system the event belongs to
@@ -6762,9 +6741,9 @@ static int find_event_handle(struct tep_handle *tep, struct tep_event *event)
*
* /sys/kernel/debug/tracing/events/.../.../format
*/
-enum tep_errno __tep_parse_format(struct tep_event **eventp,
- struct tep_handle *tep, const char *buf,
- unsigned long size, const char *sys)
+static enum tep_errno parse_format(struct tep_event **eventp,
+ struct tep_handle *tep, const char *buf,
+ unsigned long size, const char *sys)
{
struct tep_event *event;
int ret;
@@ -6879,7 +6858,7 @@ __parse_event(struct tep_handle *tep,
const char *buf, unsigned long size,
const char *sys)
{
- int ret = __tep_parse_format(eventp, tep, buf, size, sys);
+ int ret = parse_format(eventp, tep, buf, size, sys);
struct tep_event *event = *eventp;
if (event == NULL)
@@ -6897,7 +6876,7 @@ __parse_event(struct tep_handle *tep,
return 0;
event_add_failed:
- tep_free_event(event);
+ free_tep_event(event);
return ret;
}
@@ -7490,7 +7469,7 @@ int tep_get_ref(struct tep_handle *tep)
return 0;
}
-void tep_free_format_field(struct tep_format_field *field)
+__hidden void free_tep_format_field(struct tep_format_field *field)
{
free(field->type);
if (field->alias != field->name)
@@ -7505,7 +7484,7 @@ static void free_format_fields(struct tep_format_field *field)
while (field) {
next = field->next;
- tep_free_format_field(field);
+ free_tep_format_field(field);
field = next;
}
}
@@ -7516,7 +7495,7 @@ static void free_formats(struct tep_format *format)
free_format_fields(format->fields);
}
-void tep_free_event(struct tep_event *event)
+__hidden void free_tep_event(struct tep_event *event)
{
free(event->name);
free(event->system);
@@ -7602,7 +7581,7 @@ void tep_free(struct tep_handle *tep)
}
for (i = 0; i < tep->nr_events; i++)
- tep_free_event(tep->events[i]);
+ free_tep_event(tep->events[i]);
while (tep->handlers) {
handle = tep->handlers;
@@ -7613,7 +7592,7 @@ void tep_free(struct tep_handle *tep)
free(tep->events);
free(tep->sort_events);
free(tep->func_resolver);
- tep_free_plugin_paths(tep);
+ free_tep_plugin_paths(tep);
free(tep);
}
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index c29b693e31ee..a67ad9a5b835 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -578,14 +578,6 @@ void tep_ref(struct tep_handle *tep);
void tep_unref(struct tep_handle *tep);
int tep_get_ref(struct tep_handle *tep);
-/* access to the internal parser */
-void tep_buffer_init(const char *buf, unsigned long long size);
-enum tep_event_type tep_read_token(char **tok);
-void tep_free_token(char *token);
-int tep_peek_char(void);
-const char *tep_get_input_buf(void);
-unsigned long long tep_get_input_buf_ptr(void);
-
/* for debugging */
void tep_print_funcs(struct tep_handle *tep);
void tep_print_printk(struct tep_handle *tep);
diff --git a/tools/lib/traceevent/event-plugin.c b/tools/lib/traceevent/event-plugin.c
index e7c2acb8680f..e7f93d5fe4fd 100644
--- a/tools/lib/traceevent/event-plugin.c
+++ b/tools/lib/traceevent/event-plugin.c
@@ -676,7 +676,7 @@ int tep_add_plugin_path(struct tep_handle *tep, char *path,
return 0;
}
-void tep_free_plugin_paths(struct tep_handle *tep)
+__hidden void free_tep_plugin_paths(struct tep_handle *tep)
{
struct tep_plugins_dir *dir;
diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c
index c271aeeb227d..368826bb5a57 100644
--- a/tools/lib/traceevent/parse-filter.c
+++ b/tools/lib/traceevent/parse-filter.c
@@ -38,8 +38,8 @@ static void show_error(char *error_buf, const char *fmt, ...)
int len;
int i;
- input = tep_get_input_buf();
- index = tep_get_input_buf_ptr();
+ input = get_input_buf();
+ index = get_input_buf_ptr();
len = input ? strlen(input) : 0;
if (len) {
@@ -57,25 +57,20 @@ static void show_error(char *error_buf, const char *fmt, ...)
va_end(ap);
}
-static void free_token(char *token)
-{
- tep_free_token(token);
-}
-
-static enum tep_event_type read_token(char **tok)
+static enum tep_event_type filter_read_token(char **tok)
{
enum tep_event_type type;
char *token = NULL;
do {
free_token(token);
- type = tep_read_token(&token);
+ type = read_token(&token);
} while (type == TEP_EVENT_NEWLINE || type == TEP_EVENT_SPACE);
/* If token is = or ! check to see if the next char is ~ */
if (token &&
(strcmp(token, "=") == 0 || strcmp(token, "!") == 0) &&
- tep_peek_char() == '~') {
+ peek_char() == '~') {
/* append it */
*tok = malloc(3);
if (*tok == NULL) {
@@ -85,7 +80,7 @@ static enum tep_event_type read_token(char **tok)
sprintf(*tok, "%c%c", *token, '~');
free_token(token);
/* Now remove the '~' from the buffer */
- tep_read_token(&token);
+ read_token(&token);
free_token(token);
} else
*tok = token;
@@ -959,7 +954,7 @@ process_filter(struct tep_event *event, struct tep_filter_arg **parg,
do {
free(token);
- type = read_token(&token);
+ type = filter_read_token(&token);
switch (type) {
case TEP_EVENT_SQUOTE:
case TEP_EVENT_DQUOTE:
@@ -1185,7 +1180,7 @@ process_event(struct tep_event *event, const char *filter_str,
{
int ret;
- tep_buffer_init(filter_str, strlen(filter_str));
+ init_input_buf(filter_str, strlen(filter_str));
ret = process_filter(event, parg, error_str, 0);
if (ret < 0)
@@ -1243,7 +1238,7 @@ filter_event(struct tep_event_filter *filter, struct tep_event *event,
static void filter_init_error_buf(struct tep_event_filter *filter)
{
/* clear buffer to reset show error */
- tep_buffer_init("", 0);
+ init_input_buf("", 0);
filter->error_buffer[0] = '\0';
}
diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt
index 98efdab5fbd4..c81d72e3eecf 100644
--- a/tools/perf/Documentation/perf-c2c.txt
+++ b/tools/perf/Documentation/perf-c2c.txt
@@ -174,34 +174,36 @@ For each cacheline in the 1) list we display following data:
Cacheline
- cacheline address (hex number)
- Total records
- - sum of all cachelines accesses
-
Rmt/Lcl Hitm
- cacheline percentage of all Remote/Local HITM accesses
- LLC Load Hitm - Total, Lcl, Rmt
+ LLC Load Hitm - Total, LclHitm, RmtHitm
- count of Total/Local/Remote load HITMs
- Store Reference - Total, L1Hit, L1Miss
- Total - all store accesses
- L1Hit - store accesses that hit L1
- L1Hit - store accesses that missed L1
+ Total records
+ - sum of all cachelines accesses
- Load Dram
- - count of local and remote DRAM accesses
+ Total loads
+ - sum of all load accesses
- LLC Ld Miss
- - count of all accesses that missed LLC
+ Total stores
+ - sum of all store accesses
- Total Loads
- - sum of all load accesses
+ Store Reference - L1Hit, L1Miss
+ L1Hit - store accesses that hit L1
+ L1Miss - store accesses that missed L1
Core Load Hit - FB, L1, L2
- count of load hits in FB (Fill Buffer), L1 and L2 cache
- LLC Load Hit - Llc, Rmt
- - count of LLC and Remote load hits
+ LLC Load Hit - LlcHit, LclHitm
+ - count of LLC load accesses, includes LLC hits and LLC HITMs
+
+ RMT Load Hit - RmtHit, RmtHitm
+ - count of remote load accesses, includes remote hits and remote HITMs
+
+ Load Dram - Lcl, Rmt
+ - count of local and remote DRAM accesses
For each offset in the 2) list we display following data:
diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
index 76408d986aed..31069d8a5304 100644
--- a/tools/perf/Documentation/perf-config.txt
+++ b/tools/perf/Documentation/perf-config.txt
@@ -242,6 +242,11 @@ annotate.*::
These are in control of addresses, jump function, source code
in lines of assembly code from a specific program.
+ annotate.disassembler_style:
+ Use this to change the default disassembler style to some other value
+ supported by binutils, such as "intel", see the '-M' option help in the
+ 'objdump' man page.
+
annotate.hide_src_code::
If a program which is analyzed has source code,
this option lets 'annotate' print a list of assembly code with the source code.
diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt
index f50ca0fef0a4..be65bd55ab2a 100644
--- a/tools/perf/Documentation/perf-diff.txt
+++ b/tools/perf/Documentation/perf-diff.txt
@@ -182,6 +182,10 @@ OPTIONS
--tid=::
Only diff samples for given thread ID (comma separated list).
+--stream::
+ Enable hot streams comparison. Stream can be a callchain which is
+ aggregated by the branch records from samples.
+
COMPARISON
----------
The comparison is governed by the baseline file. The baseline perf.data
diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt
index 78358af9a1c4..1e91121bac0f 100644
--- a/tools/perf/Documentation/perf-ftrace.txt
+++ b/tools/perf/Documentation/perf-ftrace.txt
@@ -33,7 +33,8 @@ OPTIONS
-F::
--funcs::
- List all available functions to trace.
+ List available functions to trace. It accepts a pattern to
+ only list interested functions.
-p::
--pid=::
diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt
index 70969ea73e01..a8eccff21281 100644
--- a/tools/perf/Documentation/perf-inject.txt
+++ b/tools/perf/Documentation/perf-inject.txt
@@ -24,8 +24,12 @@ information could make use of this facility.
OPTIONS
-------
-b::
---build-ids=::
+--build-ids::
Inject build-ids into the output stream
+
+--buildid-all:
+ Inject build-ids of all DSOs into the output stream
+
-v::
--verbose::
Be more verbose.
diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt
index d5a266d7f15b..cd362dc2af07 100644
--- a/tools/perf/Documentation/perf-intel-pt.txt
+++ b/tools/perf/Documentation/perf-intel-pt.txt
@@ -112,6 +112,32 @@ The flags are "bcrosyiABEx" which stand for branch, call, return, conditional,
system, asynchronous, interrupt, transaction abort, trace begin, trace end, and
in transaction, respectively.
+perf script also supports higher level ways to dump instruction traces:
+
+ perf script --insn-trace --xed
+
+Dump all instructions. This requires installing the xed tool (see XED below)
+Dumping all instructions in a long trace can be fairly slow. It is usually better
+to start with higher level decoding, like
+
+ perf script --call-trace
+
+or
+
+ perf script --call-ret-trace
+
+and then select a time range of interest. The time range can then be examined
+in detail with
+
+ perf script --time starttime,stoptime --insn-trace --xed
+
+While examining the trace it's also useful to filter on specific CPUs using
+the -C option
+
+ perf script --time starttime,stoptime --insn-trace --xed -C 1
+
+Dump all instructions in time range on CPU 1.
+
Another interesting field that is not printed by default is 'ipc' which can be
displayed as follows:
@@ -558,7 +584,7 @@ The mmap size and auxtrace mmap size are displayed if the -vv option is used e.g
Intel PT modes of operation
~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Intel PT can be used in 2 modes:
+Intel PT can be used in 3 modes:
full-trace mode
sample mode
snapshot mode
@@ -571,7 +597,8 @@ Sample mode attaches a Intel PT sample to other events e.g.
perf record --aux-sample -e intel_pt//u -e branch-misses:u
-Snapshot mode captures the available data when a signal is sent e.g.
+Snapshot mode captures the available data when a signal is sent or "snapshot"
+control command is issued. e.g. using a signal
perf record -v -e intel_pt//u -S ./loopy 1000000000 &
[1] 11435
@@ -582,7 +609,23 @@ Note that the signal sent is SIGUSR2.
Note that "Recording AUX area tracing snapshot" is displayed because the -v
option is used.
-The 2 modes cannot be used together.
+The advantage of using "snapshot" control command is that the access is
+controlled by access to a FIFO e.g.
+
+ $ mkfifo perf.control
+ $ mkfifo perf.ack
+ $ cat perf.ack &
+ [1] 15235
+ $ sudo ~/bin/perf record --control fifo:perf.control,perf.ack -S -e intel_pt//u -- sleep 60 &
+ [2] 15243
+ $ ps -e | grep perf
+ 15244 pts/1 00:00:00 perf
+ $ kill -USR2 15244
+ bash: kill: (15244) - Operation not permitted
+ $ echo snapshot > perf.control
+ ack
+
+The 3 Intel PT modes of operation cannot be used together.
Buffer handling
@@ -1093,6 +1136,10 @@ To display PEBS events from the Intel PT trace, use the itrace 'o' option e.g.
perf script --itrace=oe
+XED
+---
+
+include::build-xed.txt[]
SEE ALSO
--------
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 10ed539a8859..4c7db1da8fcc 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -58,6 +58,7 @@ counted. The following modifiers exist:
S - read sample value (PERF_SAMPLE_READ)
D - pin the event to the PMU
W - group is weak and will fallback to non-group if not schedulable,
+ e - group or event are exclusive and do not share the PMU
The 'p' modifier can be used for specifying how precise the instruction
address should be. The 'p' modifier can be specified multiple times:
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index bd50cdff08a8..768888b9326a 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -631,43 +631,45 @@ option. The -e option and this one can be mixed and matched. Events
can be grouped using the {} notation.
endif::HAVE_LIBPFM[]
---control fd:ctl-fd[,ack-fd]
+--control=fifo:ctl-fifo[,ack-fifo]::
+--control=fd:ctl-fd[,ack-fd]::
+ctl-fifo / ack-fifo are opened and used as ctl-fd / ack-fd as follows.
Listen on ctl-fd descriptor for command to control measurement ('enable': enable events,
-'disable': disable events). Measurements can be started with events disabled using
---delay=-1 option. Optionally send control command completion ('ack\n') to ack-fd descriptor
-to synchronize with the controlling process. Example of bash shell script to enable and
-disable events during measurements:
+'disable': disable events, 'snapshot': AUX area tracing snapshot). Measurements can be
+started with events disabled using --delay=-1 option. Optionally send control command
+completion ('ack\n') to ack-fd descriptor to synchronize with the controlling process.
+Example of bash shell script to enable and disable events during measurements:
-#!/bin/bash
+ #!/bin/bash
-ctl_dir=/tmp/
+ ctl_dir=/tmp/
-ctl_fifo=${ctl_dir}perf_ctl.fifo
-test -p ${ctl_fifo} && unlink ${ctl_fifo}
-mkfifo ${ctl_fifo}
-exec {ctl_fd}<>${ctl_fifo}
+ ctl_fifo=${ctl_dir}perf_ctl.fifo
+ test -p ${ctl_fifo} && unlink ${ctl_fifo}
+ mkfifo ${ctl_fifo}
+ exec {ctl_fd}<>${ctl_fifo}
-ctl_ack_fifo=${ctl_dir}perf_ctl_ack.fifo
-test -p ${ctl_ack_fifo} && unlink ${ctl_ack_fifo}
-mkfifo ${ctl_ack_fifo}
-exec {ctl_fd_ack}<>${ctl_ack_fifo}
+ ctl_ack_fifo=${ctl_dir}perf_ctl_ack.fifo
+ test -p ${ctl_ack_fifo} && unlink ${ctl_ack_fifo}
+ mkfifo ${ctl_ack_fifo}
+ exec {ctl_fd_ack}<>${ctl_ack_fifo}
-perf record -D -1 -e cpu-cycles -a \
- --control fd:${ctl_fd},${ctl_fd_ack} \
- -- sleep 30 &
-perf_pid=$!
+ perf record -D -1 -e cpu-cycles -a \
+ --control fd:${ctl_fd},${ctl_fd_ack} \
+ -- sleep 30 &
+ perf_pid=$!
-sleep 5 && echo 'enable' >&${ctl_fd} && read -u ${ctl_fd_ack} e1 && echo "enabled(${e1})"
-sleep 10 && echo 'disable' >&${ctl_fd} && read -u ${ctl_fd_ack} d1 && echo "disabled(${d1})"
+ sleep 5 && echo 'enable' >&${ctl_fd} && read -u ${ctl_fd_ack} e1 && echo "enabled(${e1})"
+ sleep 10 && echo 'disable' >&${ctl_fd} && read -u ${ctl_fd_ack} d1 && echo "disabled(${d1})"
-exec {ctl_fd_ack}>&-
-unlink ${ctl_ack_fifo}
+ exec {ctl_fd_ack}>&-
+ unlink ${ctl_ack_fifo}
-exec {ctl_fd}>&-
-unlink ${ctl_fifo}
+ exec {ctl_fd}>&-
+ unlink ${ctl_fifo}
-wait -n ${perf_pid}
-exit $?
+ wait -n ${perf_pid}
+ exit $?
SEE ALSO
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index db420dd75e43..9f9f29025e49 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -166,6 +166,11 @@ use '-e e1 -e e2 -G foo,foo' or just use '-e e1 -e e2 -G foo'.
If wanting to monitor, say, 'cycles' for a cgroup and also for system wide, this
command line can be used: 'perf stat -e cycles -G cgroup_name -a -e cycles'.
+--for-each-cgroup name::
+Expand event list for each cgroup in "name" (allow multiple cgroups separated
+by comma). This has same effect that repeating -e option and -G option for
+each event x name. This option cannot be used with -G/--cgroup option.
+
-o file::
--output file::
Print the output into the designated file.
@@ -180,43 +185,45 @@ with it. --append may be used here. Examples:
3>results perf stat --log-fd 3 -- $cmd
3>>results perf stat --log-fd 3 --append -- $cmd
---control fd:ctl-fd[,ack-fd]
+--control=fifo:ctl-fifo[,ack-fifo]::
+--control=fd:ctl-fd[,ack-fd]::
+ctl-fifo / ack-fifo are opened and used as ctl-fd / ack-fd as follows.
Listen on ctl-fd descriptor for command to control measurement ('enable': enable events,
'disable': disable events). Measurements can be started with events disabled using
--delay=-1 option. Optionally send control command completion ('ack\n') to ack-fd descriptor
to synchronize with the controlling process. Example of bash shell script to enable and
disable events during measurements:
-#!/bin/bash
+ #!/bin/bash
-ctl_dir=/tmp/
+ ctl_dir=/tmp/
-ctl_fifo=${ctl_dir}perf_ctl.fifo
-test -p ${ctl_fifo} && unlink ${ctl_fifo}
-mkfifo ${ctl_fifo}
-exec {ctl_fd}<>${ctl_fifo}
+ ctl_fifo=${ctl_dir}perf_ctl.fifo
+ test -p ${ctl_fifo} && unlink ${ctl_fifo}
+ mkfifo ${ctl_fifo}
+ exec {ctl_fd}<>${ctl_fifo}
-ctl_ack_fifo=${ctl_dir}perf_ctl_ack.fifo
-test -p ${ctl_ack_fifo} && unlink ${ctl_ack_fifo}
-mkfifo ${ctl_ack_fifo}
-exec {ctl_fd_ack}<>${ctl_ack_fifo}
+ ctl_ack_fifo=${ctl_dir}perf_ctl_ack.fifo
+ test -p ${ctl_ack_fifo} && unlink ${ctl_ack_fifo}
+ mkfifo ${ctl_ack_fifo}
+ exec {ctl_fd_ack}<>${ctl_ack_fifo}
-perf stat -D -1 -e cpu-cycles -a -I 1000 \
- --control fd:${ctl_fd},${ctl_fd_ack} \
- -- sleep 30 &
-perf_pid=$!
+ perf stat -D -1 -e cpu-cycles -a -I 1000 \
+ --control fd:${ctl_fd},${ctl_fd_ack} \
+ -- sleep 30 &
+ perf_pid=$!
-sleep 5 && echo 'enable' >&${ctl_fd} && read -u ${ctl_fd_ack} e1 && echo "enabled(${e1})"
-sleep 10 && echo 'disable' >&${ctl_fd} && read -u ${ctl_fd_ack} d1 && echo "disabled(${d1})"
+ sleep 5 && echo 'enable' >&${ctl_fd} && read -u ${ctl_fd_ack} e1 && echo "enabled(${e1})"
+ sleep 10 && echo 'disable' >&${ctl_fd} && read -u ${ctl_fd_ack} d1 && echo "disabled(${d1})"
-exec {ctl_fd_ack}>&-
-unlink ${ctl_ack_fifo}
+ exec {ctl_fd_ack}>&-
+ unlink ${ctl_ack_fifo}
-exec {ctl_fd}>&-
-unlink ${ctl_fifo}
+ exec {ctl_fd}>&-
+ unlink ${ctl_fifo}
-wait -n ${perf_pid}
-exit $?
+ wait -n ${perf_pid}
+ exit $?
--pre::
@@ -361,6 +368,11 @@ if the workload is actually bound by the CPU and not by something else.
For best results it is usually a good idea to use it with interval
mode like -I 1000, as the bottleneck of workloads can change often.
+This enables --metric-only, unless overridden with --no-metric-only.
+
+The following restrictions only apply to older Intel CPUs and Atom,
+on newer CPUs (IceLake and later) TopDown can be collected for any thread:
+
The top down metrics are collected per core instead of per
CPU thread. Per core mode is automatically enabled
and -a (global monitoring) is needed, requiring root rights or
@@ -372,8 +384,6 @@ echo 0 > /proc/sys/kernel/nmi_watchdog
for best results. Otherwise the bottlenecks may be inconsistent
on workload with changing phases.
-This enables --metric-only, unless overridden with --no-metric-only.
-
To interpret the results it is usually needed to know on which
CPUs the workload runs on. If needed the CPUs can be forced using
taskset.
diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt
index 3f37ded13f8c..c130a3c46a90 100644
--- a/tools/perf/Documentation/perf.txt
+++ b/tools/perf/Documentation/perf.txt
@@ -12,32 +12,57 @@ SYNOPSIS
OPTIONS
-------
---debug::
- Setup debug variable (see list below) in value
- range (0, 10). Use like:
- --debug verbose # sets verbose = 1
- --debug verbose=2 # sets verbose = 2
-
- List of debug variables allowed to set:
- verbose - general debug messages
- ordered-events - ordered events object debug messages
- data-convert - data convert command debug messages
- stderr - write debug output (option -v) to stderr
- in browser mode
- perf-event-open - Print perf_event_open() arguments and
- return value
-
---buildid-dir::
- Setup buildid cache directory. It has higher priority than
- buildid.dir config file option.
+-h::
+--help::
+ Run perf help command.
-v::
--version::
- Display perf version.
+ Display perf version.
--h::
---help::
- Run perf help command.
+-vv::
+ Print the compiled-in status of libraries.
+
+--exec-path::
+ Display or set exec path.
+
+--html-path::
+ Display html documentation path.
+
+-p::
+--paginate::
+ Set up pager.
+
+--no-pager::
+ Do not set pager.
+
+--buildid-dir::
+ Setup buildid cache directory. It has higher priority
+ than buildid.dir config file option.
+
+--list-cmds::
+ List the most commonly used perf commands.
+
+--list-opts::
+ List available perf options.
+
+--debugfs-dir::
+ Set debugfs directory or set environment variable PERF_DEBUGFS_DIR.
+
+--debug::
+ Setup debug variable (see list below) in value
+ range (0, 10). Use like:
+ --debug verbose # sets verbose = 1
+ --debug verbose=2 # sets verbose = 2
+
+ List of debug variables allowed to set:
+ verbose - general debug messages
+ ordered-events - ordered events object debug messages
+ data-convert - data convert command debug messages
+ stderr - write debug output (option -v) to stderr
+ in browser mode
+ perf-event-open - Print perf_event_open() arguments and
+ return value
DESCRIPTION
-----------
diff --git a/tools/perf/Documentation/topdown.txt b/tools/perf/Documentation/topdown.txt
new file mode 100644
index 000000000000..3c39bb3dc5fa
--- /dev/null
+++ b/tools/perf/Documentation/topdown.txt
@@ -0,0 +1,256 @@
+Using TopDown metrics in user space
+-----------------------------------
+
+Intel CPUs (since Sandy Bridge and Silvermont) support a TopDown
+methology to break down CPU pipeline execution into 4 bottlenecks:
+frontend bound, backend bound, bad speculation, retiring.
+
+For more details on Topdown see [1][5]
+
+Traditionally this was implemented by events in generic counters
+and specific formulas to compute the bottlenecks.
+
+perf stat --topdown implements this.
+
+Full Top Down includes more levels that can break down the
+bottlenecks further. This is not directly implemented in perf,
+but available in other tools that can run on top of perf,
+such as toplev[2] or vtune[3]
+
+New Topdown features in Ice Lake
+===============================
+
+With Ice Lake CPUs the TopDown metrics are directly available as
+fixed counters and do not require generic counters. This allows
+to collect TopDown always in addition to other events.
+
+% perf stat -a --topdown -I1000
+# time retiring bad speculation frontend bound backend bound
+ 1.001281330 23.0% 15.3% 29.6% 32.1%
+ 2.003009005 5.0% 6.8% 46.6% 41.6%
+ 3.004646182 6.7% 6.7% 46.0% 40.6%
+ 4.006326375 5.0% 6.4% 47.6% 41.0%
+ 5.007991804 5.1% 6.3% 46.3% 42.3%
+ 6.009626773 6.2% 7.1% 47.3% 39.3%
+ 7.011296356 4.7% 6.7% 46.2% 42.4%
+ 8.012951831 4.7% 6.7% 47.5% 41.1%
+...
+
+This also enables measuring TopDown per thread/process instead
+of only per core.
+
+Using TopDown through RDPMC in applications on Ice Lake
+======================================================
+
+For more fine grained measurements it can be useful to
+access the new directly from user space. This is more complicated,
+but drastically lowers overhead.
+
+On Ice Lake, there is a new fixed counter 3: SLOTS, which reports
+"pipeline SLOTS" (cycles multiplied by core issue width) and a
+metric register that reports slots ratios for the different bottleneck
+categories.
+
+The metrics counter is CPU model specific and is not available on older
+CPUs.
+
+Example code
+============
+
+Library functions to do the functionality described below
+is also available in libjevents [4]
+
+The application opens a group with fixed counter 3 (SLOTS) and any
+metric event, and allow user programs to read the performance counters.
+
+Fixed counter 3 is mapped to a pseudo event event=0x00, umask=04,
+so the perf_event_attr structure should be initialized with
+{ .config = 0x0400, .type = PERF_TYPE_RAW }
+The metric events are mapped to the pseudo event event=0x00, umask=0x8X.
+For example, the perf_event_attr structure can be initialized with
+{ .config = 0x8000, .type = PERF_TYPE_RAW } for Retiring metric event
+The Fixed counter 3 must be the leader of the group.
+
+#include <linux/perf_event.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+/* Provide own perf_event_open stub because glibc doesn't */
+__attribute__((weak))
+int perf_event_open(struct perf_event_attr *attr, pid_t pid,
+ int cpu, int group_fd, unsigned long flags)
+{
+ return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
+}
+
+/* Open slots counter file descriptor for current task. */
+struct perf_event_attr slots = {
+ .type = PERF_TYPE_RAW,
+ .size = sizeof(struct perf_event_attr),
+ .config = 0x400,
+ .exclude_kernel = 1,
+};
+
+int slots_fd = perf_event_open(&slots, 0, -1, -1, 0);
+if (slots_fd < 0)
+ ... error ...
+
+/*
+ * Open metrics event file descriptor for current task.
+ * Set slots event as the leader of the group.
+ */
+struct perf_event_attr metrics = {
+ .type = PERF_TYPE_RAW,
+ .size = sizeof(struct perf_event_attr),
+ .config = 0x8000,
+ .exclude_kernel = 1,
+};
+
+int metrics_fd = perf_event_open(&metrics, 0, -1, slots_fd, 0);
+if (metrics_fd < 0)
+ ... error ...
+
+
+The RDPMC instruction (or _rdpmc compiler intrinsic) can now be used
+to read slots and the topdown metrics at different points of the program:
+
+#include <stdint.h>
+#include <x86intrin.h>
+
+#define RDPMC_FIXED (1 << 30) /* return fixed counters */
+#define RDPMC_METRIC (1 << 29) /* return metric counters */
+
+#define FIXED_COUNTER_SLOTS 3
+#define METRIC_COUNTER_TOPDOWN_L1 0
+
+static inline uint64_t read_slots(void)
+{
+ return _rdpmc(RDPMC_FIXED | FIXED_COUNTER_SLOTS);
+}
+
+static inline uint64_t read_metrics(void)
+{
+ return _rdpmc(RDPMC_METRIC | METRIC_COUNTER_TOPDOWN_L1);
+}
+
+Then the program can be instrumented to read these metrics at different
+points.
+
+It's not a good idea to do this with too short code regions,
+as the parallelism and overlap in the CPU program execution will
+cause too much measurement inaccuracy. For example instrumenting
+individual basic blocks is definitely too fine grained.
+
+Decoding metrics values
+=======================
+
+The value reported by read_metrics() contains four 8 bit fields
+that represent a scaled ratio that represent the Level 1 bottleneck.
+All four fields add up to 0xff (= 100%)
+
+The binary ratios in the metric value can be converted to float ratios:
+
+#define GET_METRIC(m, i) (((m) >> (i*8)) & 0xff)
+
+#define TOPDOWN_RETIRING(val) ((float)GET_METRIC(val, 0) / 0xff)
+#define TOPDOWN_BAD_SPEC(val) ((float)GET_METRIC(val, 1) / 0xff)
+#define TOPDOWN_FE_BOUND(val) ((float)GET_METRIC(val, 2) / 0xff)
+#define TOPDOWN_BE_BOUND(val) ((float)GET_METRIC(val, 3) / 0xff)
+
+and then converted to percent for printing.
+
+The ratios in the metric accumulate for the time when the counter
+is enabled. For measuring programs it is often useful to measure
+specific sections. For this it is needed to deltas on metrics.
+
+This can be done by scaling the metrics with the slots counter
+read at the same time.
+
+Then it's possible to take deltas of these slots counts
+measured at different points, and determine the metrics
+for that time period.
+
+ slots_a = read_slots();
+ metric_a = read_metrics();
+
+ ... larger code region ...
+
+ slots_b = read_slots()
+ metric_b = read_metrics()
+
+ # compute scaled metrics for measurement a
+ retiring_slots_a = GET_METRIC(metric_a, 0) * slots_a
+ bad_spec_slots_a = GET_METRIC(metric_a, 1) * slots_a
+ fe_bound_slots_a = GET_METRIC(metric_a, 2) * slots_a
+ be_bound_slots_a = GET_METRIC(metric_a, 3) * slots_a
+
+ # compute delta scaled metrics between b and a
+ retiring_slots = GET_METRIC(metric_b, 0) * slots_b - retiring_slots_a
+ bad_spec_slots = GET_METRIC(metric_b, 1) * slots_b - bad_spec_slots_a
+ fe_bound_slots = GET_METRIC(metric_b, 2) * slots_b - fe_bound_slots_a
+ be_bound_slots = GET_METRIC(metric_b, 3) * slots_b - be_bound_slots_a
+
+Later the individual ratios for the measurement period can be recreated
+from these counts.
+
+ slots_delta = slots_b - slots_a
+ retiring_ratio = (float)retiring_slots / slots_delta
+ bad_spec_ratio = (float)bad_spec_slots / slots_delta
+ fe_bound_ratio = (float)fe_bound_slots / slots_delta
+ be_bound_ratio = (float)be_bound_slots / slota_delta
+
+ printf("Retiring %.2f%% Bad Speculation %.2f%% FE Bound %.2f%% BE Bound %.2f%%\n",
+ retiring_ratio * 100.,
+ bad_spec_ratio * 100.,
+ fe_bound_ratio * 100.,
+ be_bound_ratio * 100.);
+
+Resetting metrics counters
+==========================
+
+Since the individual metrics are only 8bit they lose precision for
+short regions over time because the number of cycles covered by each
+fraction bit shrinks. So the counters need to be reset regularly.
+
+When using the kernel perf API the kernel resets on every read.
+So as long as the reading is at reasonable intervals (every few
+seconds) the precision is good.
+
+When using perf stat it is recommended to always use the -I option,
+with no longer interval than a few seconds
+
+ perf stat -I 1000 --topdown ...
+
+For user programs using RDPMC directly the counter can
+be reset explicitly using ioctl:
+
+ ioctl(perf_fd, PERF_EVENT_IOC_RESET, 0);
+
+This "opens" a new measurement period.
+
+A program using RDPMC for TopDown should schedule such a reset
+regularly, as in every few seconds.
+
+Limits on Ice Lake
+==================
+
+Four pseudo TopDown metric events are exposed for the end-users,
+topdown-retiring, topdown-bad-spec, topdown-fe-bound and topdown-be-bound.
+They can be used to collect the TopDown value under the following
+rules:
+- All the TopDown metric events must be in a group with the SLOTS event.
+- The SLOTS event must be the leader of the group.
+- The PERF_FORMAT_GROUP flag must be applied for each TopDown metric
+ events
+
+The SLOTS event and the TopDown metric events can be counting members of
+a sampling read group. Since the SLOTS event must be the leader of a TopDown
+group, the second event of the group is the sampling event.
+For example, perf record -e '{slots, $sampling_event, topdown-retiring}:S'
+
+
+[1] https://software.intel.com/en-us/top-down-microarchitecture-analysis-method-win
+[2] https://github.com/andikleen/pmu-tools/wiki/toplev-manual
+[3] https://software.intel.com/en-us/intel-vtune-amplifier-xe
+[4] https://github.com/andikleen/pmu-tools/tree/master/jevents
+[5] https://sites.google.com/site/analysismethods/yasin-pubs
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index d1b5fb89992f..6890fc4b063a 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -223,14 +223,17 @@ endif
# Try different combinations to accommodate systems that only have
# python[2][-config] in weird combinations but always preferring
-# python2 and python2-config as per pep-0394. If we catch a
-# python[-config] in version 3, the version check will kill it.
-PYTHON2 := $(if $(call get-executable,python2),python2,python)
-override PYTHON := $(call get-executable-or-default,PYTHON,$(PYTHON2))
-PYTHON2_CONFIG := \
+# python2 and python2-config as per pep-0394. If python2 or python
+# aren't found, then python3 is used.
+PYTHON_AUTO := python
+PYTHON_AUTO := $(if $(call get-executable,python3),python3,$(PYTHON_AUTO))
+PYTHON_AUTO := $(if $(call get-executable,python),python,$(PYTHON_AUTO))
+PYTHON_AUTO := $(if $(call get-executable,python2),python2,$(PYTHON_AUTO))
+override PYTHON := $(call get-executable-or-default,PYTHON,$(PYTHON_AUTO))
+PYTHON_AUTO_CONFIG := \
$(if $(call get-executable,$(PYTHON)-config),$(PYTHON)-config,python-config)
override PYTHON_CONFIG := \
- $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON2_CONFIG))
+ $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON_AUTO_CONFIG))
grep-libs = $(filter -l%,$(1))
strip-libs = $(filter-out -l%,$(1))
@@ -720,12 +723,14 @@ ifndef NO_SLANG
endif
endif
-ifndef NO_GTK2
+ifdef GTK2
FLAGS_GTK2=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null)
+ $(call feature_check,gtk2)
ifneq ($(feature-gtk2), 1)
msg := $(warning GTK2 not found, disables GTK2 support. Please install gtk2-devel or libgtk2.0-dev);
NO_GTK2 := 1
else
+ $(call feature_check,gtk2-infobar)
ifeq ($(feature-gtk2-infobar), 1)
GTK_CFLAGS := -DHAVE_GTK_INFO_BAR_SUPPORT
endif
@@ -821,6 +826,12 @@ else
$(call feature_check,disassembler-four-args)
endif
+ifeq ($(feature-libbfd-buildid), 1)
+ CFLAGS += -DHAVE_LIBBFD_BUILDID_SUPPORT
+else
+ msg := $(warning Old version of libbfd/binutils things like PE executable profiling will not be available);
+endif
+
ifdef NO_DEMANGLE
CFLAGS += -DNO_DEMANGLE
else
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 6031167939ae..7ce3f2e8b9c7 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -48,7 +48,7 @@ include ../scripts/utilities.mak
#
# Define NO_SLANG if you do not want TUI support.
#
-# Define NO_GTK2 if you do not want GTK+ GUI support.
+# Define GTK2 if you want GTK+ GUI support.
#
# Define NO_DEMANGLE if you do not want C++ symbol demangling.
#
@@ -386,7 +386,7 @@ ifneq ($(OUTPUT),)
CFLAGS += -I$(OUTPUT)
endif
-ifndef NO_GTK2
+ifdef GTK2
ALL_PROGRAMS += $(OUTPUT)libperf-gtk.so
GTK_IN := $(OUTPUT)gtk-in.o
endif
@@ -537,6 +537,12 @@ mmap_flags_tbl := $(srctree)/tools/perf/trace/beauty/mmap_flags.sh
$(mmap_flags_array): $(linux_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman-common.h $(mmap_flags_tbl)
$(Q)$(SHELL) '$(mmap_flags_tbl)' $(linux_uapi_dir) $(asm_generic_uapi_dir) $(arch_asm_uapi_dir) > $@
+mremap_flags_array := $(beauty_outdir)/mremap_flags_array.c
+mremap_flags_tbl := $(srctree)/tools/perf/trace/beauty/mremap_flags.sh
+
+$(mremap_flags_array): $(linux_uapi_dir)/mman.h $(mremap_flags_tbl)
+ $(Q)$(SHELL) '$(mremap_flags_tbl)' $(linux_uapi_dir) > $@
+
mount_flags_array := $(beauty_outdir)/mount_flags_array.c
mount_flags_tbl := $(srctree)/tools/perf/trace/beauty/mount_flags.sh
@@ -549,6 +555,13 @@ move_mount_flags_tbl := $(srctree)/tools/perf/trace/beauty/move_mount_flags.sh
$(move_mount_flags_array): $(linux_uapi_dir)/fs.h $(move_mount_flags_tbl)
$(Q)$(SHELL) '$(move_mount_flags_tbl)' $(linux_uapi_dir) > $@
+
+mmap_prot_array := $(beauty_outdir)/mmap_prot_array.c
+mmap_prot_tbl := $(srctree)/tools/perf/trace/beauty/mmap_prot.sh
+
+$(mmap_prot_array): $(asm_generic_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman-common.h $(mmap_prot_tbl)
+ $(Q)$(SHELL) '$(mmap_prot_tbl)' $(asm_generic_uapi_dir) $(arch_asm_uapi_dir) > $@
+
prctl_option_array := $(beauty_outdir)/prctl_option_array.c
prctl_hdr_dir := $(srctree)/tools/include/uapi/linux/
prctl_option_tbl := $(srctree)/tools/perf/trace/beauty/prctl_option.sh
@@ -710,6 +723,8 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc
$(vhost_virtio_ioctl_array) \
$(madvise_behavior_array) \
$(mmap_flags_array) \
+ $(mmap_prot_array) \
+ $(mremap_flags_array) \
$(mount_flags_array) \
$(move_mount_flags_array) \
$(perf_ioctl_array) \
@@ -886,7 +901,7 @@ check: $(OUTPUT)common-cmds.h
### Installation rules
-ifndef NO_GTK2
+ifdef GTK2
install-gtk: $(OUTPUT)libperf-gtk.so
$(call QUIET_INSTALL, 'GTK UI') \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(libdir_SQ)'; \
@@ -961,6 +976,7 @@ install-tests: all install-gtk
$(call QUIET_INSTALL, tests) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \
$(INSTALL) tests/attr.py '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \
+ $(INSTALL) tests/pe-file.exe* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \
$(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell'; \
@@ -1007,6 +1023,8 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
$(OUTPUT)$(fspick_arrays) \
$(OUTPUT)$(madvise_behavior_array) \
$(OUTPUT)$(mmap_flags_array) \
+ $(OUTPUT)$(mmap_prot_array) \
+ $(OUTPUT)$(mremap_flags_array) \
$(OUTPUT)$(mount_flags_array) \
$(OUTPUT)$(move_mount_flags_array) \
$(OUTPUT)$(drm_ioctl_array) \
diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build
index 5c13438c7bd4..b53294d74b01 100644
--- a/tools/perf/arch/arm64/util/Build
+++ b/tools/perf/arch/arm64/util/Build
@@ -1,6 +1,7 @@
perf-y += header.o
perf-y += machine.o
perf-y += perf_regs.o
+perf-y += tsc.o
perf-$(CONFIG_DWARF) += dwarf-regs.o
perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
diff --git a/tools/perf/arch/arm64/util/tsc.c b/tools/perf/arch/arm64/util/tsc.c
new file mode 100644
index 000000000000..cc85bd9e73f1
--- /dev/null
+++ b/tools/perf/arch/arm64/util/tsc.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/types.h>
+
+#include "../../../util/tsc.h"
+
+u64 rdtsc(void)
+{
+ u64 val;
+
+ /*
+ * According to ARM DDI 0487F.c, from Armv8.0 to Armv8.5 inclusive, the
+ * system counter is at least 56 bits wide; from Armv8.6, the counter
+ * must be 64 bits wide. So the system counter could be less than 64
+ * bits wide and it is attributed with the flag 'cap_user_time_short'
+ * is true.
+ */
+ asm volatile("mrs %0, cntvct_el0" : "=r" (val));
+
+ return val;
+}
diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c
index 1a950171a66f..58b2d610aadb 100644
--- a/tools/perf/arch/powerpc/util/header.c
+++ b/tools/perf/arch/powerpc/util/header.c
@@ -40,8 +40,11 @@ get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
return bufp;
}
-int arch_get_runtimeparam(void)
+int arch_get_runtimeparam(struct pmu_event *pe)
{
int count;
- return sysfs__read_int("/devices/hv_24x7/interface/sockets", &count) < 0 ? 1 : count;
+ char path[PATH_MAX] = "/devices/hv_24x7/interface/";
+
+ atoi(pe->aggr_mode) == PerChip ? strcat(path, "sockets") : strcat(path, "coresperchip");
+ return sysfs__read_int(path, &count) < 0 ? 1 : count;
}
diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c
index 7eb5621c021d..24ea12ec7e02 100644
--- a/tools/perf/arch/x86/annotate/instructions.c
+++ b/tools/perf/arch/x86/annotate/instructions.c
@@ -110,6 +110,7 @@ static struct ins x86__instructions[] = {
{ .name = "por", .ops = &mov_ops, },
{ .name = "rclb", .ops = &mov_ops, },
{ .name = "rcll", .ops = &mov_ops, },
+ { .name = "ret", .ops = &ret_ops, },
{ .name = "retq", .ops = &ret_ops, },
{ .name = "sbb", .ops = &mov_ops, },
{ .name = "sbbl", .ops = &mov_ops, },
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index 47f9c56e744f..347c39b960eb 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -3,7 +3,7 @@ perf-y += tsc.o
perf-y += pmu.o
perf-y += kvm-stat.o
perf-y += perf_regs.o
-perf-y += group.o
+perf-y += topdown.o
perf-y += machine.o
perf-y += event.o
diff --git a/tools/perf/arch/x86/util/group.c b/tools/perf/arch/x86/util/group.c
deleted file mode 100644
index e2f8034b8973..000000000000
--- a/tools/perf/arch/x86/util/group.c
+++ /dev/null
@@ -1,28 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <stdio.h>
-#include "api/fs/fs.h"
-#include "util/group.h"
-
-/*
- * Check whether we can use a group for top down.
- * Without a group may get bad results due to multiplexing.
- */
-bool arch_topdown_check_group(bool *warn)
-{
- int n;
-
- if (sysctl__read_int("kernel/nmi_watchdog", &n) < 0)
- return false;
- if (n > 0) {
- *warn = true;
- return false;
- }
- return true;
-}
-
-void arch_topdown_group_warn(void)
-{
- fprintf(stderr,
- "nmi_watchdog enabled with topdown. May give wrong results.\n"
- "Disable with echo 0 > /proc/sys/kernel/nmi_watchdog\n");
-}
diff --git a/tools/perf/arch/x86/util/topdown.c b/tools/perf/arch/x86/util/topdown.c
new file mode 100644
index 000000000000..2f3d96aa92a5
--- /dev/null
+++ b/tools/perf/arch/x86/util/topdown.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include "api/fs/fs.h"
+#include "util/pmu.h"
+#include "util/topdown.h"
+
+/*
+ * Check whether we can use a group for top down.
+ * Without a group may get bad results due to multiplexing.
+ */
+bool arch_topdown_check_group(bool *warn)
+{
+ int n;
+
+ if (sysctl__read_int("kernel/nmi_watchdog", &n) < 0)
+ return false;
+ if (n > 0) {
+ *warn = true;
+ return false;
+ }
+ return true;
+}
+
+void arch_topdown_group_warn(void)
+{
+ fprintf(stderr,
+ "nmi_watchdog enabled with topdown. May give wrong results.\n"
+ "Disable with echo 0 > /proc/sys/kernel/nmi_watchdog\n");
+}
+
+#define TOPDOWN_SLOTS 0x0400
+
+static bool is_topdown_slots_event(struct evsel *counter)
+{
+ if (!counter->pmu_name)
+ return false;
+
+ if (strcmp(counter->pmu_name, "cpu"))
+ return false;
+
+ if (counter->core.attr.config == TOPDOWN_SLOTS)
+ return true;
+
+ return false;
+}
+
+/*
+ * Check whether a topdown group supports sample-read.
+ *
+ * Only Topdown metic supports sample-read. The slots
+ * event must be the leader of the topdown group.
+ */
+
+bool arch_topdown_sample_read(struct evsel *leader)
+{
+ if (!pmu_have_event("cpu", "slots"))
+ return false;
+
+ if (is_topdown_slots_event(leader))
+ return true;
+
+ return false;
+}
diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c
index 2f55afb14e1f..559365f8fe52 100644
--- a/tools/perf/arch/x86/util/tsc.c
+++ b/tools/perf/arch/x86/util/tsc.c
@@ -1,45 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
-#include <stdbool.h>
-#include <errno.h>
-
-#include <linux/stddef.h>
-#include <linux/perf_event.h>
-
#include <linux/types.h>
-#include <asm/barrier.h>
-#include "../../../util/debug.h"
-#include "../../../util/event.h"
-#include "../../../util/synthetic-events.h"
-#include "../../../util/tsc.h"
-
-int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
- struct perf_tsc_conversion *tc)
-{
- bool cap_user_time_zero;
- u32 seq;
- int i = 0;
-
- while (1) {
- seq = pc->lock;
- rmb();
- tc->time_mult = pc->time_mult;
- tc->time_shift = pc->time_shift;
- tc->time_zero = pc->time_zero;
- cap_user_time_zero = pc->cap_user_time_zero;
- rmb();
- if (pc->lock == seq && !(seq & 1))
- break;
- if (++i > 10000) {
- pr_debug("failed to get perf_event_mmap_page lock\n");
- return -EINVAL;
- }
- }
- if (!cap_user_time_zero)
- return -EOPNOTSUPP;
-
- return 0;
-}
+#include "../../../util/tsc.h"
u64 rdtsc(void)
{
@@ -49,36 +11,3 @@ u64 rdtsc(void)
return low | ((u64)high) << 32;
}
-
-int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc,
- struct perf_tool *tool,
- perf_event__handler_t process,
- struct machine *machine)
-{
- union perf_event event = {
- .time_conv = {
- .header = {
- .type = PERF_RECORD_TIME_CONV,
- .size = sizeof(struct perf_record_time_conv),
- },
- },
- };
- struct perf_tsc_conversion tc;
- int err;
-
- if (!pc)
- return 0;
- err = perf_read_tsc_conversion(pc, &tc);
- if (err == -EOPNOTSUPP)
- return 0;
- if (err)
- return err;
-
- pr_debug2("Synthesizing TSC conversion information\n");
-
- event.time_conv.time_mult = tc.time_mult;
- event.time_conv.time_shift = tc.time_shift;
- event.time_conv.time_zero = tc.time_zero;
-
- return process(tool, &event, NULL, machine);
-}
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
index 878db6a59a41..e43f46931b41 100644
--- a/tools/perf/bench/Build
+++ b/tools/perf/bench/Build
@@ -12,6 +12,7 @@ perf-y += epoll-ctl.o
perf-y += synthesize.o
perf-y += kallsyms-parse.o
perf-y += find-bit-bench.o
+perf-y += inject-buildid.o
perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 2804812d4154..eac36afab2b3 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -47,6 +47,7 @@ int bench_epoll_wait(int argc, const char **argv);
int bench_epoll_ctl(int argc, const char **argv);
int bench_synthesize(int argc, const char **argv);
int bench_kallsyms_parse(int argc, const char **argv);
+int bench_inject_build_id(int argc, const char **argv);
#define BENCH_FORMAT_DEFAULT_STR "default"
#define BENCH_FORMAT_DEFAULT 0
diff --git a/tools/perf/bench/inject-buildid.c b/tools/perf/bench/inject-buildid.c
new file mode 100644
index 000000000000..280227e3ffd7
--- /dev/null
+++ b/tools/perf/bench/inject-buildid.c
@@ -0,0 +1,476 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdlib.h>
+#include <stddef.h>
+#include <ftw.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include <linux/kernel.h>
+#include <linux/time64.h>
+#include <linux/list.h>
+#include <linux/err.h>
+#include <internal/lib.h>
+#include <subcmd/parse-options.h>
+
+#include "bench.h"
+#include "util/data.h"
+#include "util/stat.h"
+#include "util/debug.h"
+#include "util/event.h"
+#include "util/symbol.h"
+#include "util/session.h"
+#include "util/build-id.h"
+#include "util/synthetic-events.h"
+
+#define MMAP_DEV_MAJOR 8
+#define DSO_MMAP_RATIO 4
+
+static unsigned int iterations = 100;
+static unsigned int nr_mmaps = 100;
+static unsigned int nr_samples = 100; /* samples per mmap */
+
+static u64 bench_sample_type;
+static u16 bench_id_hdr_size;
+
+struct bench_data {
+ int pid;
+ int input_pipe[2];
+ int output_pipe[2];
+ pthread_t th;
+};
+
+struct bench_dso {
+ struct list_head list;
+ char *name;
+ int ino;
+};
+
+static int nr_dsos;
+static struct bench_dso *dsos;
+
+extern int cmd_inject(int argc, const char *argv[]);
+
+static const struct option options[] = {
+ OPT_UINTEGER('i', "iterations", &iterations,
+ "Number of iterations used to compute average (default: 100)"),
+ OPT_UINTEGER('m', "nr-mmaps", &nr_mmaps,
+ "Number of mmap events for each iteration (default: 100)"),
+ OPT_UINTEGER('n', "nr-samples", &nr_samples,
+ "Number of sample events per mmap event (default: 100)"),
+ OPT_INCR('v', "verbose", &verbose,
+ "be more verbose (show iteration count, DSO name, etc)"),
+ OPT_END()
+};
+
+static const char *const bench_usage[] = {
+ "perf bench internals inject-build-id <options>",
+ NULL
+};
+
+/*
+ * Helper for collect_dso that adds the given file as a dso to dso_list
+ * if it contains a build-id. Stops after collecting 4 times more than
+ * we need (for MMAP2 events).
+ */
+static int add_dso(const char *fpath, const struct stat *sb __maybe_unused,
+ int typeflag, struct FTW *ftwbuf __maybe_unused)
+{
+ struct bench_dso *dso = &dsos[nr_dsos];
+ struct build_id bid;
+
+ if (typeflag == FTW_D || typeflag == FTW_SL)
+ return 0;
+
+ if (filename__read_build_id(fpath, &bid) < 0)
+ return 0;
+
+ dso->name = realpath(fpath, NULL);
+ if (dso->name == NULL)
+ return -1;
+
+ dso->ino = nr_dsos++;
+ pr_debug2(" Adding DSO: %s\n", fpath);
+
+ /* stop if we collected enough DSOs */
+ if ((unsigned int)nr_dsos == DSO_MMAP_RATIO * nr_mmaps)
+ return 1;
+
+ return 0;
+}
+
+static void collect_dso(void)
+{
+ dsos = calloc(nr_mmaps * DSO_MMAP_RATIO, sizeof(*dsos));
+ if (dsos == NULL) {
+ printf(" Memory allocation failed\n");
+ exit(1);
+ }
+
+ if (nftw("/usr/lib/", add_dso, 10, FTW_PHYS) < 0)
+ return;
+
+ pr_debug(" Collected %d DSOs\n", nr_dsos);
+}
+
+static void release_dso(void)
+{
+ int i;
+
+ for (i = 0; i < nr_dsos; i++) {
+ struct bench_dso *dso = &dsos[i];
+
+ free(dso->name);
+ }
+ free(dsos);
+}
+
+/* Fake address used by mmap and sample events */
+static u64 dso_map_addr(struct bench_dso *dso)
+{
+ return 0x400000ULL + dso->ino * 8192ULL;
+}
+
+static u32 synthesize_attr(struct bench_data *data)
+{
+ union perf_event event;
+
+ memset(&event, 0, sizeof(event.attr) + sizeof(u64));
+
+ event.header.type = PERF_RECORD_HEADER_ATTR;
+ event.header.size = sizeof(event.attr) + sizeof(u64);
+
+ event.attr.attr.type = PERF_TYPE_SOFTWARE;
+ event.attr.attr.config = PERF_COUNT_SW_TASK_CLOCK;
+ event.attr.attr.exclude_kernel = 1;
+ event.attr.attr.sample_id_all = 1;
+ event.attr.attr.sample_type = bench_sample_type;
+
+ return writen(data->input_pipe[1], &event, event.header.size);
+}
+
+static u32 synthesize_fork(struct bench_data *data)
+{
+ union perf_event event;
+
+ memset(&event, 0, sizeof(event.fork) + bench_id_hdr_size);
+
+ event.header.type = PERF_RECORD_FORK;
+ event.header.misc = PERF_RECORD_MISC_FORK_EXEC;
+ event.header.size = sizeof(event.fork) + bench_id_hdr_size;
+
+ event.fork.ppid = 1;
+ event.fork.ptid = 1;
+ event.fork.pid = data->pid;
+ event.fork.tid = data->pid;
+
+ return writen(data->input_pipe[1], &event, event.header.size);
+}
+
+static u32 synthesize_mmap(struct bench_data *data, struct bench_dso *dso,
+ u64 timestamp)
+{
+ union perf_event event;
+ size_t len = offsetof(struct perf_record_mmap2, filename);
+ u64 *id_hdr_ptr = (void *)&event;
+ int ts_idx;
+
+ len += roundup(strlen(dso->name) + 1, 8) + bench_id_hdr_size;
+
+ memset(&event, 0, min(len, sizeof(event.mmap2)));
+
+ event.header.type = PERF_RECORD_MMAP2;
+ event.header.misc = PERF_RECORD_MISC_USER;
+ event.header.size = len;
+
+ event.mmap2.pid = data->pid;
+ event.mmap2.tid = data->pid;
+ event.mmap2.maj = MMAP_DEV_MAJOR;
+ event.mmap2.ino = dso->ino;
+
+ strcpy(event.mmap2.filename, dso->name);
+
+ event.mmap2.start = dso_map_addr(dso);
+ event.mmap2.len = 4096;
+ event.mmap2.prot = PROT_EXEC;
+
+ if (len > sizeof(event.mmap2)) {
+ /* write mmap2 event first */
+ writen(data->input_pipe[1], &event, len - bench_id_hdr_size);
+ /* zero-fill sample id header */
+ memset(id_hdr_ptr, 0, bench_id_hdr_size);
+ /* put timestamp in the right position */
+ ts_idx = (bench_id_hdr_size / sizeof(u64)) - 2;
+ id_hdr_ptr[ts_idx] = timestamp;
+ writen(data->input_pipe[1], id_hdr_ptr, bench_id_hdr_size);
+ } else {
+ ts_idx = (len / sizeof(u64)) - 2;
+ id_hdr_ptr[ts_idx] = timestamp;
+ writen(data->input_pipe[1], &event, len);
+ }
+ return len;
+}
+
+static u32 synthesize_sample(struct bench_data *data, struct bench_dso *dso,
+ u64 timestamp)
+{
+ union perf_event event;
+ struct perf_sample sample = {
+ .tid = data->pid,
+ .pid = data->pid,
+ .ip = dso_map_addr(dso),
+ .time = timestamp,
+ };
+
+ event.header.type = PERF_RECORD_SAMPLE;
+ event.header.misc = PERF_RECORD_MISC_USER;
+ event.header.size = perf_event__sample_event_size(&sample, bench_sample_type, 0);
+
+ perf_event__synthesize_sample(&event, bench_sample_type, 0, &sample);
+
+ return writen(data->input_pipe[1], &event, event.header.size);
+}
+
+static u32 synthesize_flush(struct bench_data *data)
+{
+ struct perf_event_header header = {
+ .size = sizeof(header),
+ .type = PERF_RECORD_FINISHED_ROUND,
+ };
+
+ return writen(data->input_pipe[1], &header, header.size);
+}
+
+static void *data_reader(void *arg)
+{
+ struct bench_data *data = arg;
+ char buf[8192];
+ int flag;
+ int n;
+
+ flag = fcntl(data->output_pipe[0], F_GETFL);
+ fcntl(data->output_pipe[0], F_SETFL, flag | O_NONBLOCK);
+
+ /* read out data from child */
+ while (true) {
+ n = read(data->output_pipe[0], buf, sizeof(buf));
+ if (n > 0)
+ continue;
+ if (n == 0)
+ break;
+
+ if (errno != EINTR && errno != EAGAIN)
+ break;
+
+ usleep(100);
+ }
+
+ close(data->output_pipe[0]);
+ return NULL;
+}
+
+static int setup_injection(struct bench_data *data, bool build_id_all)
+{
+ int ready_pipe[2];
+ int dev_null_fd;
+ char buf;
+
+ if (pipe(ready_pipe) < 0)
+ return -1;
+
+ if (pipe(data->input_pipe) < 0)
+ return -1;
+
+ if (pipe(data->output_pipe) < 0)
+ return -1;
+
+ data->pid = fork();
+ if (data->pid < 0)
+ return -1;
+
+ if (data->pid == 0) {
+ const char **inject_argv;
+ int inject_argc = 2;
+
+ close(data->input_pipe[1]);
+ close(data->output_pipe[0]);
+ close(ready_pipe[0]);
+
+ dup2(data->input_pipe[0], STDIN_FILENO);
+ close(data->input_pipe[0]);
+ dup2(data->output_pipe[1], STDOUT_FILENO);
+ close(data->output_pipe[1]);
+
+ dev_null_fd = open("/dev/null", O_WRONLY);
+ if (dev_null_fd < 0)
+ exit(1);
+
+ dup2(dev_null_fd, STDERR_FILENO);
+
+ if (build_id_all)
+ inject_argc++;
+
+ inject_argv = calloc(inject_argc + 1, sizeof(*inject_argv));
+ if (inject_argv == NULL)
+ exit(1);
+
+ inject_argv[0] = strdup("inject");
+ inject_argv[1] = strdup("-b");
+ if (build_id_all)
+ inject_argv[2] = strdup("--buildid-all");
+
+ /* signal that we're ready to go */
+ close(ready_pipe[1]);
+
+ cmd_inject(inject_argc, inject_argv);
+
+ exit(0);
+ }
+
+ pthread_create(&data->th, NULL, data_reader, data);
+
+ close(ready_pipe[1]);
+ close(data->input_pipe[0]);
+ close(data->output_pipe[1]);
+
+ /* wait for child ready */
+ if (read(ready_pipe[0], &buf, 1) < 0)
+ return -1;
+ close(ready_pipe[0]);
+
+ return 0;
+}
+
+static int inject_build_id(struct bench_data *data, u64 *max_rss)
+{
+ int status;
+ unsigned int i, k;
+ struct rusage rusage;
+ u64 len = 0;
+
+ /* this makes the child to run */
+ if (perf_header__write_pipe(data->input_pipe[1]) < 0)
+ return -1;
+
+ len += synthesize_attr(data);
+ len += synthesize_fork(data);
+
+ for (i = 0; i < nr_mmaps; i++) {
+ int idx = rand() % (nr_dsos - 1);
+ struct bench_dso *dso = &dsos[idx];
+ u64 timestamp = rand() % 1000000;
+
+ pr_debug2(" [%d] injecting: %s\n", i+1, dso->name);
+ len += synthesize_mmap(data, dso, timestamp);
+
+ for (k = 0; k < nr_samples; k++)
+ len += synthesize_sample(data, dso, timestamp + k * 1000);
+
+ if ((i + 1) % 10 == 0)
+ len += synthesize_flush(data);
+ }
+
+ /* tihs makes the child to finish */
+ close(data->input_pipe[1]);
+
+ wait4(data->pid, &status, 0, &rusage);
+ *max_rss = rusage.ru_maxrss;
+
+ pr_debug(" Child %d exited with %d\n", data->pid, status);
+
+ return 0;
+}
+
+static void do_inject_loop(struct bench_data *data, bool build_id_all)
+{
+ unsigned int i;
+ struct stats time_stats, mem_stats;
+ double time_average, time_stddev;
+ double mem_average, mem_stddev;
+
+ init_stats(&time_stats);
+ init_stats(&mem_stats);
+
+ pr_debug(" Build-id%s injection benchmark\n", build_id_all ? "-all" : "");
+
+ for (i = 0; i < iterations; i++) {
+ struct timeval start, end, diff;
+ u64 runtime_us, max_rss;
+
+ pr_debug(" Iteration #%d\n", i+1);
+
+ if (setup_injection(data, build_id_all) < 0) {
+ printf(" Build-id injection setup failed\n");
+ break;
+ }
+
+ gettimeofday(&start, NULL);
+ if (inject_build_id(data, &max_rss) < 0) {
+ printf(" Build-id injection failed\n");
+ break;
+ }
+
+ gettimeofday(&end, NULL);
+ timersub(&end, &start, &diff);
+ runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+ update_stats(&time_stats, runtime_us);
+ update_stats(&mem_stats, max_rss);
+
+ pthread_join(data->th, NULL);
+ }
+
+ time_average = avg_stats(&time_stats) / USEC_PER_MSEC;
+ time_stddev = stddev_stats(&time_stats) / USEC_PER_MSEC;
+ printf(" Average build-id%s injection took: %.3f msec (+- %.3f msec)\n",
+ build_id_all ? "-all" : "", time_average, time_stddev);
+
+ /* each iteration, it processes MMAP2 + BUILD_ID + nr_samples * SAMPLE */
+ time_average = avg_stats(&time_stats) / (nr_mmaps * (nr_samples + 2));
+ time_stddev = stddev_stats(&time_stats) / (nr_mmaps * (nr_samples + 2));
+ printf(" Average time per event: %.3f usec (+- %.3f usec)\n",
+ time_average, time_stddev);
+
+ mem_average = avg_stats(&mem_stats);
+ mem_stddev = stddev_stats(&mem_stats);
+ printf(" Average memory usage: %.0f KB (+- %.0f KB)\n",
+ mem_average, mem_stddev);
+}
+
+static int do_inject_loops(struct bench_data *data)
+{
+
+ srand(time(NULL));
+ symbol__init(NULL);
+
+ bench_sample_type = PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP;
+ bench_sample_type |= PERF_SAMPLE_TID | PERF_SAMPLE_TIME;
+ bench_id_hdr_size = 32;
+
+ collect_dso();
+ if (nr_dsos == 0) {
+ printf(" Cannot collect DSOs for injection\n");
+ return -1;
+ }
+
+ do_inject_loop(data, false);
+ do_inject_loop(data, true);
+
+ release_dso();
+ return 0;
+}
+
+int bench_inject_build_id(int argc, const char **argv)
+{
+ struct bench_data data;
+
+ argc = parse_options(argc, argv, options, bench_usage, 0);
+ if (argc) {
+ usage_with_options(bench_usage, options);
+ exit(EXIT_FAILURE);
+ }
+
+ return do_inject_loops(&data);
+}
+
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index f85bceccc459..11726ec6285f 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -137,12 +137,13 @@ struct global_info {
u8 *data;
pthread_mutex_t startup_mutex;
+ pthread_cond_t startup_cond;
int nr_tasks_started;
- pthread_mutex_t startup_done_mutex;
-
pthread_mutex_t start_work_mutex;
+ pthread_cond_t start_work_cond;
int nr_tasks_working;
+ bool start_work;
pthread_mutex_t stop_work_mutex;
u64 bytes_done;
@@ -483,6 +484,18 @@ static void init_global_mutex(pthread_mutex_t *mutex)
pthread_mutex_init(mutex, &attr);
}
+/*
+ * Return a process-shared (global) condition variable:
+ */
+static void init_global_cond(pthread_cond_t *cond)
+{
+ pthread_condattr_t attr;
+
+ pthread_condattr_init(&attr);
+ pthread_condattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
+ pthread_cond_init(cond, &attr);
+}
+
static int parse_cpu_list(const char *arg)
{
p0.cpu_list_str = strdup(arg);
@@ -1136,15 +1149,18 @@ static void *worker_thread(void *__tdata)
if (g->p.serialize_startup) {
pthread_mutex_lock(&g->startup_mutex);
g->nr_tasks_started++;
+ /* The last thread wakes the main process. */
+ if (g->nr_tasks_started == g->p.nr_tasks)
+ pthread_cond_signal(&g->startup_cond);
+
pthread_mutex_unlock(&g->startup_mutex);
/* Here we will wait for the main process to start us all at once: */
pthread_mutex_lock(&g->start_work_mutex);
+ g->start_work = false;
g->nr_tasks_working++;
-
- /* Last one wake the main process: */
- if (g->nr_tasks_working == g->p.nr_tasks)
- pthread_mutex_unlock(&g->startup_done_mutex);
+ while (!g->start_work)
+ pthread_cond_wait(&g->start_work_cond, &g->start_work_mutex);
pthread_mutex_unlock(&g->start_work_mutex);
}
@@ -1441,8 +1457,9 @@ static int init(void)
/* Startup serialization: */
init_global_mutex(&g->start_work_mutex);
+ init_global_cond(&g->start_work_cond);
init_global_mutex(&g->startup_mutex);
- init_global_mutex(&g->startup_done_mutex);
+ init_global_cond(&g->startup_cond);
init_global_mutex(&g->stop_work_mutex);
init_thread_data();
@@ -1502,9 +1519,6 @@ static int __bench_numa(const char *name)
pids = zalloc(g->p.nr_proc * sizeof(*pids));
pid = -1;
- /* All threads try to acquire it, this way we can wait for them to start up: */
- pthread_mutex_lock(&g->start_work_mutex);
-
if (g->p.serialize_startup) {
tprintf(" #\n");
tprintf(" # Startup synchronization: ..."); fflush(stdout);
@@ -1526,22 +1540,29 @@ static int __bench_numa(const char *name)
pids[i] = pid;
}
- /* Wait for all the threads to start up: */
- while (g->nr_tasks_started != g->p.nr_tasks)
- usleep(USEC_PER_MSEC);
-
- BUG_ON(g->nr_tasks_started != g->p.nr_tasks);
if (g->p.serialize_startup) {
+ bool threads_ready = false;
double startup_sec;
- pthread_mutex_lock(&g->startup_done_mutex);
+ /*
+ * Wait for all the threads to start up. The last thread will
+ * signal this process.
+ */
+ pthread_mutex_lock(&g->startup_mutex);
+ while (g->nr_tasks_started != g->p.nr_tasks)
+ pthread_cond_wait(&g->startup_cond, &g->startup_mutex);
- /* This will start all threads: */
- pthread_mutex_unlock(&g->start_work_mutex);
+ pthread_mutex_unlock(&g->startup_mutex);
- /* This mutex is locked - the last started thread will wake us: */
- pthread_mutex_lock(&g->startup_done_mutex);
+ /* Wait for all threads to be at the start_work_cond. */
+ while (!threads_ready) {
+ pthread_mutex_lock(&g->start_work_mutex);
+ threads_ready = (g->nr_tasks_working == g->p.nr_tasks);
+ pthread_mutex_unlock(&g->start_work_mutex);
+ if (!threads_ready)
+ usleep(1);
+ }
gettimeofday(&stop, NULL);
@@ -1555,7 +1576,11 @@ static int __bench_numa(const char *name)
tprintf(" #\n");
start = stop;
- pthread_mutex_unlock(&g->startup_done_mutex);
+ /* Start all threads running. */
+ pthread_mutex_lock(&g->start_work_mutex);
+ g->start_work = true;
+ pthread_mutex_unlock(&g->start_work_mutex);
+ pthread_cond_broadcast(&g->start_work_cond);
} else {
gettimeofday(&start, NULL);
}
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index 4f176039fc8f..62a7b7420a44 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -87,6 +87,7 @@ static struct bench epoll_benchmarks[] = {
static struct bench internals_benchmarks[] = {
{ "synthesize", "Benchmark perf event synthesis", bench_synthesize },
{ "kallsyms-parse", "Benchmark kallsyms parsing", bench_kallsyms_parse },
+ { "inject-build-id", "Benchmark build-id injection", bench_inject_build_id },
{ NULL, NULL, NULL }
};
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index 39efa51d7fb3..a25411926e48 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -174,19 +174,19 @@ static int build_id_cache__add_kcore(const char *filename, bool force)
static int build_id_cache__add_file(const char *filename, struct nsinfo *nsi)
{
char sbuild_id[SBUILD_ID_SIZE];
- u8 build_id[BUILD_ID_SIZE];
+ struct build_id bid;
int err;
struct nscookie nsc;
nsinfo__mountns_enter(nsi, &nsc);
- err = filename__read_build_id(filename, &build_id, sizeof(build_id));
+ err = filename__read_build_id(filename, &bid);
nsinfo__mountns_exit(&nsc);
if (err < 0) {
pr_debug("Couldn't read a build-id in %s\n", filename);
return -1;
}
- build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
+ build_id__sprintf(&bid, sbuild_id);
err = build_id_cache__add_s(sbuild_id, filename, nsi,
false, false);
pr_debug("Adding %s %s: %s\n", sbuild_id, filename,
@@ -196,21 +196,21 @@ static int build_id_cache__add_file(const char *filename, struct nsinfo *nsi)
static int build_id_cache__remove_file(const char *filename, struct nsinfo *nsi)
{
- u8 build_id[BUILD_ID_SIZE];
char sbuild_id[SBUILD_ID_SIZE];
+ struct build_id bid;
struct nscookie nsc;
int err;
nsinfo__mountns_enter(nsi, &nsc);
- err = filename__read_build_id(filename, &build_id, sizeof(build_id));
+ err = filename__read_build_id(filename, &bid);
nsinfo__mountns_exit(&nsc);
if (err < 0) {
pr_debug("Couldn't read a build-id in %s\n", filename);
return -1;
}
- build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
+ build_id__sprintf(&bid, sbuild_id);
err = build_id_cache__remove_s(sbuild_id);
pr_debug("Removing %s %s: %s\n", sbuild_id, filename,
err ? "FAIL" : "Ok");
@@ -274,17 +274,16 @@ static int build_id_cache__purge_all(void)
static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused)
{
char filename[PATH_MAX];
- u8 build_id[BUILD_ID_SIZE];
+ struct build_id bid;
if (dso__build_id_filename(dso, filename, sizeof(filename), false) &&
- filename__read_build_id(filename, build_id,
- sizeof(build_id)) != sizeof(build_id)) {
+ filename__read_build_id(filename, &bid) == -1) {
if (errno == ENOENT)
return false;
pr_warning("Problems with %s file, consider removing it from the cache\n",
filename);
- } else if (memcmp(dso->build_id, build_id, sizeof(dso->build_id))) {
+ } else if (memcmp(dso->bid.data, bid.data, bid.size)) {
pr_warning("Problems with %s file, consider removing it from the cache\n",
filename);
}
@@ -300,14 +299,14 @@ static int build_id_cache__fprintf_missing(struct perf_session *session, FILE *f
static int build_id_cache__update_file(const char *filename, struct nsinfo *nsi)
{
- u8 build_id[BUILD_ID_SIZE];
char sbuild_id[SBUILD_ID_SIZE];
+ struct build_id bid;
struct nscookie nsc;
int err;
nsinfo__mountns_enter(nsi, &nsc);
- err = filename__read_build_id(filename, &build_id, sizeof(build_id));
+ err = filename__read_build_id(filename, &bid);
nsinfo__mountns_exit(&nsc);
if (err < 0) {
pr_debug("Couldn't read a build-id in %s\n", filename);
@@ -315,7 +314,7 @@ static int build_id_cache__update_file(const char *filename, struct nsinfo *nsi)
}
err = 0;
- build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
+ build_id__sprintf(&bid, sbuild_id);
if (build_id_cache__cached(sbuild_id))
err = build_id_cache__remove_s(sbuild_id);
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 5938b100eaf4..d5bea5d3cd51 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -652,45 +652,6 @@ STAT_FN(ld_l2hit)
STAT_FN(ld_llchit)
STAT_FN(rmt_hit)
-static uint64_t llc_miss(struct c2c_stats *stats)
-{
- uint64_t llcmiss;
-
- llcmiss = stats->lcl_dram +
- stats->rmt_dram +
- stats->rmt_hitm +
- stats->rmt_hit;
-
- return llcmiss;
-}
-
-static int
-ld_llcmiss_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
- struct hist_entry *he)
-{
- struct c2c_hist_entry *c2c_he;
- int width = c2c_width(fmt, hpp, he->hists);
-
- c2c_he = container_of(he, struct c2c_hist_entry, he);
-
- return scnprintf(hpp->buf, hpp->size, "%*lu", width,
- llc_miss(&c2c_he->stats));
-}
-
-static int64_t
-ld_llcmiss_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
- struct hist_entry *left, struct hist_entry *right)
-{
- struct c2c_hist_entry *c2c_left;
- struct c2c_hist_entry *c2c_right;
-
- c2c_left = container_of(left, struct c2c_hist_entry, he);
- c2c_right = container_of(right, struct c2c_hist_entry, he);
-
- return (uint64_t) llc_miss(&c2c_left->stats) -
- (uint64_t) llc_miss(&c2c_right->stats);
-}
-
static uint64_t total_records(struct c2c_stats *stats)
{
uint64_t lclmiss, ldcnt, total;
@@ -1328,7 +1289,7 @@ static struct c2c_dimension dim_iaddr = {
};
static struct c2c_dimension dim_tot_hitm = {
- .header = HEADER_SPAN("----- LLC Load Hitm -----", "Total", 2),
+ .header = HEADER_SPAN("------- Load Hitm -------", "Total", 2),
.name = "tot_hitm",
.cmp = tot_hitm_cmp,
.entry = tot_hitm_entry,
@@ -1336,7 +1297,7 @@ static struct c2c_dimension dim_tot_hitm = {
};
static struct c2c_dimension dim_lcl_hitm = {
- .header = HEADER_SPAN_LOW("Lcl"),
+ .header = HEADER_SPAN_LOW("LclHitm"),
.name = "lcl_hitm",
.cmp = lcl_hitm_cmp,
.entry = lcl_hitm_entry,
@@ -1344,7 +1305,7 @@ static struct c2c_dimension dim_lcl_hitm = {
};
static struct c2c_dimension dim_rmt_hitm = {
- .header = HEADER_SPAN_LOW("Rmt"),
+ .header = HEADER_SPAN_LOW("RmtHitm"),
.name = "rmt_hitm",
.cmp = rmt_hitm_cmp,
.entry = rmt_hitm_entry,
@@ -1367,16 +1328,16 @@ static struct c2c_dimension dim_cl_lcl_hitm = {
.width = 7,
};
-static struct c2c_dimension dim_stores = {
- .header = HEADER_SPAN("---- Store Reference ----", "Total", 2),
- .name = "stores",
+static struct c2c_dimension dim_tot_stores = {
+ .header = HEADER_BOTH("Total", "Stores"),
+ .name = "tot_stores",
.cmp = store_cmp,
.entry = store_entry,
.width = 7,
};
static struct c2c_dimension dim_stores_l1hit = {
- .header = HEADER_SPAN_LOW("L1Hit"),
+ .header = HEADER_SPAN("---- Stores ----", "L1Hit", 1),
.name = "stores_l1hit",
.cmp = st_l1hit_cmp,
.entry = st_l1hit_entry,
@@ -1432,7 +1393,7 @@ static struct c2c_dimension dim_ld_l2hit = {
};
static struct c2c_dimension dim_ld_llchit = {
- .header = HEADER_SPAN("-- LLC Load Hit --", "Llc", 1),
+ .header = HEADER_SPAN("- LLC Load Hit --", "LclHit", 1),
.name = "ld_lclhit",
.cmp = ld_llchit_cmp,
.entry = ld_llchit_entry,
@@ -1440,21 +1401,13 @@ static struct c2c_dimension dim_ld_llchit = {
};
static struct c2c_dimension dim_ld_rmthit = {
- .header = HEADER_SPAN_LOW("Rmt"),
+ .header = HEADER_SPAN("- RMT Load Hit --", "RmtHit", 1),
.name = "ld_rmthit",
.cmp = rmt_hit_cmp,
.entry = rmt_hit_entry,
.width = 8,
};
-static struct c2c_dimension dim_ld_llcmiss = {
- .header = HEADER_BOTH("LLC", "Ld Miss"),
- .name = "ld_llcmiss",
- .cmp = ld_llcmiss_cmp,
- .entry = ld_llcmiss_entry,
- .width = 7,
-};
-
static struct c2c_dimension dim_tot_recs = {
.header = HEADER_BOTH("Total", "records"),
.name = "tot_recs",
@@ -1486,7 +1439,7 @@ static struct c2c_dimension dim_percent_hitm = {
};
static struct c2c_dimension dim_percent_rmt_hitm = {
- .header = HEADER_SPAN("----- HITM -----", "Rmt", 1),
+ .header = HEADER_SPAN("----- HITM -----", "RmtHitm", 1),
.name = "percent_rmt_hitm",
.cmp = percent_rmt_hitm_cmp,
.entry = percent_rmt_hitm_entry,
@@ -1495,7 +1448,7 @@ static struct c2c_dimension dim_percent_rmt_hitm = {
};
static struct c2c_dimension dim_percent_lcl_hitm = {
- .header = HEADER_SPAN_LOW("Lcl"),
+ .header = HEADER_SPAN_LOW("LclHitm"),
.name = "percent_lcl_hitm",
.cmp = percent_lcl_hitm_cmp,
.entry = percent_lcl_hitm_entry,
@@ -1648,7 +1601,7 @@ static struct c2c_dimension *dimensions[] = {
&dim_rmt_hitm,
&dim_cl_lcl_hitm,
&dim_cl_rmt_hitm,
- &dim_stores,
+ &dim_tot_stores,
&dim_stores_l1hit,
&dim_stores_l1miss,
&dim_cl_stores_l1hit,
@@ -1658,7 +1611,6 @@ static struct c2c_dimension *dimensions[] = {
&dim_ld_l2hit,
&dim_ld_llchit,
&dim_ld_rmthit,
- &dim_ld_llcmiss,
&dim_tot_recs,
&dim_tot_loads,
&dim_percent_hitm,
@@ -2846,15 +2798,16 @@ static int perf_c2c__report(int argc, const char **argv)
"dcacheline,"
"dcacheline_node,"
"dcacheline_count,"
- "tot_recs,"
"percent_hitm,"
"tot_hitm,lcl_hitm,rmt_hitm,"
- "stores,stores_l1hit,stores_l1miss,"
- "dram_lcl,dram_rmt,"
- "ld_llcmiss,"
+ "tot_recs,"
"tot_loads,"
+ "tot_stores,"
+ "stores_l1hit,stores_l1miss,"
"ld_fbhit,ld_l1hit,ld_l2hit,"
- "ld_lclhit,ld_rmthit",
+ "ld_lclhit,lcl_hitm,"
+ "ld_rmthit,rmt_hitm,"
+ "dram_lcl,dram_rmt",
c2c.display == DISPLAY_TOT ? "tot_hitm" :
c2c.display == DISPLAY_LCL ? "lcl_hitm" : "rmt_hitm"
);
@@ -2916,7 +2869,7 @@ static int perf_c2c__record(int argc, const char **argv)
bool event_set = false;
struct option options[] = {
OPT_CALLBACK('e', "event", &event_set, "event",
- "event selector. Use 'perf mem record -e list' to list available events",
+ "event selector. Use 'perf c2c record -e list' to list available events",
parse_record_events),
OPT_BOOLEAN('u', "all-user", &all_user, "collect only user level data"),
OPT_BOOLEAN('k', "all-kernel", &all_kernel, "collect only kernel level data"),
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index f8c9bdd8269a..584e2e1a3793 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -25,6 +25,7 @@
#include "util/map.h"
#include "util/spark.h"
#include "util/block-info.h"
+#include "util/stream.h"
#include <linux/err.h>
#include <linux/zalloc.h>
#include <subcmd/pager.h>
@@ -42,6 +43,7 @@ struct perf_diff {
int range_size;
int range_num;
bool has_br_stack;
+ bool stream;
};
/* Diff command specific HPP columns. */
@@ -72,6 +74,7 @@ struct data__file {
struct perf_data data;
int idx;
struct hists *hists;
+ struct evlist_streams *evlist_streams;
struct diff_hpp_fmt fmt[PERF_HPP_DIFF__MAX_INDEX];
};
@@ -106,6 +109,7 @@ enum {
COMPUTE_DELTA_ABS,
COMPUTE_CYCLES,
COMPUTE_MAX,
+ COMPUTE_STREAM, /* After COMPUTE_MAX to avoid use current compute arrays */
};
const char *compute_names[COMPUTE_MAX] = {
@@ -393,6 +397,11 @@ static int diff__process_sample_event(struct perf_tool *tool,
struct perf_diff *pdiff = container_of(tool, struct perf_diff, tool);
struct addr_location al;
struct hists *hists = evsel__hists(evsel);
+ struct hist_entry_iter iter = {
+ .evsel = evsel,
+ .sample = sample,
+ .ops = &hist_iter_normal,
+ };
int ret = -1;
if (perf_time__ranges_skip_sample(pdiff->ptime_range, pdiff->range_num,
@@ -411,14 +420,8 @@ static int diff__process_sample_event(struct perf_tool *tool,
goto out_put;
}
- if (compute != COMPUTE_CYCLES) {
- if (!hists__add_entry(hists, &al, NULL, NULL, NULL, sample,
- true)) {
- pr_warning("problem incrementing symbol period, "
- "skipping event\n");
- goto out_put;
- }
- } else {
+ switch (compute) {
+ case COMPUTE_CYCLES:
if (!hists__add_entry_ops(hists, &block_hist_ops, &al, NULL,
NULL, NULL, sample, true)) {
pr_warning("problem incrementing symbol period, "
@@ -428,6 +431,23 @@ static int diff__process_sample_event(struct perf_tool *tool,
hist__account_cycles(sample->branch_stack, &al, sample, false,
NULL);
+ break;
+
+ case COMPUTE_STREAM:
+ if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH,
+ NULL)) {
+ pr_debug("problem adding hist entry, skipping event\n");
+ goto out_put;
+ }
+ break;
+
+ default:
+ if (!hists__add_entry(hists, &al, NULL, NULL, NULL, sample,
+ true)) {
+ pr_warning("problem incrementing symbol period, "
+ "skipping event\n");
+ goto out_put;
+ }
}
/*
@@ -996,10 +1016,55 @@ static void data_process(void)
}
}
+static int process_base_stream(struct data__file *data_base,
+ struct data__file *data_pair,
+ const char *title __maybe_unused)
+{
+ struct evlist *evlist_base = data_base->session->evlist;
+ struct evlist *evlist_pair = data_pair->session->evlist;
+ struct evsel *evsel_base, *evsel_pair;
+ struct evsel_streams *es_base, *es_pair;
+
+ evlist__for_each_entry(evlist_base, evsel_base) {
+ evsel_pair = evsel_match(evsel_base, evlist_pair);
+ if (!evsel_pair)
+ continue;
+
+ es_base = evsel_streams__entry(data_base->evlist_streams,
+ evsel_base->idx);
+ if (!es_base)
+ return -1;
+
+ es_pair = evsel_streams__entry(data_pair->evlist_streams,
+ evsel_pair->idx);
+ if (!es_pair)
+ return -1;
+
+ evsel_streams__match(es_base, es_pair);
+ evsel_streams__report(es_base, es_pair);
+ }
+
+ return 0;
+}
+
+static void stream_process(void)
+{
+ /*
+ * Stream comparison only supports two data files.
+ * perf.data.old and perf.data. data__files[0] is perf.data.old,
+ * data__files[1] is perf.data.
+ */
+ process_base_stream(&data__files[0], &data__files[1],
+ "# Output based on old perf data:\n#\n");
+}
+
static void data__free(struct data__file *d)
{
int col;
+ if (d->evlist_streams)
+ evlist_streams__delete(d->evlist_streams);
+
for (col = 0; col < PERF_HPP_DIFF__MAX_INDEX; col++) {
struct diff_hpp_fmt *fmt = &d->fmt[col];
@@ -1153,9 +1218,19 @@ static int __cmd_diff(void)
if (pdiff.ptime_range)
zfree(&pdiff.ptime_range);
+
+ if (compute == COMPUTE_STREAM) {
+ d->evlist_streams = evlist__create_streams(
+ d->session->evlist, 5);
+ if (!d->evlist_streams)
+ goto out_delete;
+ }
}
- data_process();
+ if (compute == COMPUTE_STREAM)
+ stream_process();
+ else
+ data_process();
out_delete:
data__for_each_file(i, d) {
@@ -1228,6 +1303,8 @@ static const struct option options[] = {
"only consider symbols in these pids"),
OPT_STRING(0, "tid", &symbol_conf.tid_list_str, "tid[,tid...]",
"only consider symbols in these tids"),
+ OPT_BOOLEAN(0, "stream", &pdiff.stream,
+ "Enable hot streams comparison."),
OPT_END()
};
@@ -1887,6 +1964,9 @@ int cmd_diff(int argc, const char **argv)
if (cycles_hist && (compute != COMPUTE_CYCLES))
usage_with_options(diff_usage, options);
+ if (pdiff.stream)
+ compute = COMPUTE_STREAM;
+
symbol__annotation_init();
if (symbol__init(NULL) < 0)
@@ -1898,13 +1978,26 @@ int cmd_diff(int argc, const char **argv)
if (check_file_brstack() < 0)
return -1;
- if (compute == COMPUTE_CYCLES && !pdiff.has_br_stack)
+ if ((compute == COMPUTE_CYCLES || compute == COMPUTE_STREAM)
+ && !pdiff.has_br_stack) {
return -1;
+ }
- if (ui_init() < 0)
- return -1;
+ if (compute == COMPUTE_STREAM) {
+ symbol_conf.show_branchflag_count = true;
+ symbol_conf.disable_add2line_warn = true;
+ callchain_param.mode = CHAIN_FLAT;
+ callchain_param.key = CCKEY_SRCLINE;
+ callchain_param.branch_callstack = 1;
+ symbol_conf.use_callchain = true;
+ callchain_register_param(&callchain_param);
+ sort_order = "srcline,symbol,dso";
+ } else {
+ if (ui_init() < 0)
+ return -1;
- sort__mode = SORT_MODE__DIFF;
+ sort__mode = SORT_MODE__DIFF;
+ }
if (setup_sorting(NULL) < 0)
usage_with_options(diff_usage, options);
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index 1d44bc2f63d8..9366fad591dc 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -25,6 +25,7 @@
#include "target.h"
#include "cpumap.h"
#include "thread_map.h"
+#include "strfilter.h"
#include "util/cap.h"
#include "util/config.h"
#include "util/units.h"
@@ -36,7 +37,6 @@ struct perf_ftrace {
struct evlist *evlist;
struct target target;
const char *tracer;
- bool list_avail_functions;
struct list_head filters;
struct list_head notrace;
struct list_head graph_funcs;
@@ -181,6 +181,40 @@ out:
return ret;
}
+static int read_tracing_file_by_line(const char *name,
+ void (*cb)(char *str, void *arg),
+ void *cb_arg)
+{
+ char *line = NULL;
+ size_t len = 0;
+ char *file;
+ FILE *fp;
+
+ file = get_tracing_file(name);
+ if (!file) {
+ pr_debug("cannot get tracing file: %s\n", name);
+ return -1;
+ }
+
+ fp = fopen(file, "r");
+ if (fp == NULL) {
+ pr_debug("cannot open tracing file: %s\n", name);
+ put_tracing_file(file);
+ return -1;
+ }
+
+ while (getline(&line, &len, fp) != -1) {
+ cb(line, cb_arg);
+ }
+
+ if (line)
+ free(line);
+
+ fclose(fp);
+ put_tracing_file(file);
+ return 0;
+}
+
static int write_tracing_file_int(const char *name, int value)
{
char buf[16];
@@ -557,9 +591,6 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
signal(SIGCHLD, sig_handler);
signal(SIGPIPE, sig_handler);
- if (ftrace->list_avail_functions)
- return read_tracing_file_to_stdout("available_filter_functions");
-
if (reset_tracing_files(ftrace) < 0) {
pr_err("failed to reset ftrace\n");
goto out;
@@ -683,6 +714,46 @@ static int perf_ftrace_config(const char *var, const char *value, void *cb)
return -1;
}
+static void list_function_cb(char *str, void *arg)
+{
+ struct strfilter *filter = (struct strfilter *)arg;
+
+ if (strfilter__compare(filter, str))
+ printf("%s", str);
+}
+
+static int opt_list_avail_functions(const struct option *opt __maybe_unused,
+ const char *str, int unset)
+{
+ struct strfilter *filter;
+ const char *err = NULL;
+ int ret;
+
+ if (unset || !str)
+ return -1;
+
+ filter = strfilter__new(str, &err);
+ if (!filter)
+ return err ? -EINVAL : -ENOMEM;
+
+ ret = strfilter__or(filter, str, &err);
+ if (ret == -EINVAL) {
+ pr_err("Filter parse error at %td.\n", err - str + 1);
+ pr_err("Source: \"%s\"\n", str);
+ pr_err(" %*c\n", (int)(err - str + 1), '^');
+ strfilter__delete(filter);
+ return ret;
+ }
+
+ ret = read_tracing_file_by_line("available_filter_functions",
+ list_function_cb, filter);
+ strfilter__delete(filter);
+ if (ret < 0)
+ return ret;
+
+ exit(0);
+}
+
static int parse_filter_func(const struct option *opt, const char *str,
int unset __maybe_unused)
{
@@ -817,8 +888,9 @@ int cmd_ftrace(int argc, const char **argv)
const struct option ftrace_options[] = {
OPT_STRING('t', "tracer", &ftrace.tracer, "tracer",
"Tracer to use: function_graph(default) or function"),
- OPT_BOOLEAN('F', "funcs", &ftrace.list_avail_functions,
- "Show available functions to filter"),
+ OPT_CALLBACK_DEFAULT('F', "funcs", NULL, "[FILTER]",
+ "Show available functions to filter",
+ opt_list_avail_functions, "*"),
OPT_STRING('p', "pid", &ftrace.target.pid, "pid",
"Trace on existing process id"),
/* TODO: Add short option -t after -t/--tracer can be removed. */
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 6d2f410d773a..452a75fe68e5 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -10,6 +10,7 @@
#include "util/color.h"
#include "util/dso.h"
+#include "util/vdso.h"
#include "util/evlist.h"
#include "util/evsel.h"
#include "util/map.h"
@@ -23,9 +24,11 @@
#include "util/symbol.h"
#include "util/synthetic-events.h"
#include "util/thread.h"
-#include <linux/err.h>
+#include "util/namespaces.h"
+#include <linux/err.h>
#include <subcmd/parse-options.h>
+#include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
#include <linux/list.h>
#include <errno.h>
@@ -35,6 +38,7 @@ struct perf_inject {
struct perf_tool tool;
struct perf_session *session;
bool build_ids;
+ bool build_id_all;
bool sched_stat;
bool have_auxtrace;
bool strip;
@@ -54,6 +58,9 @@ struct event_entry {
union perf_event event[];
};
+static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool,
+ struct machine *machine, u8 cpumode, u32 flags);
+
static int output_bytes(struct perf_inject *inject, void *buf, size_t sz)
{
ssize_t size;
@@ -97,6 +104,13 @@ static int perf_event__repipe_op2_synth(struct perf_session *session,
return perf_event__repipe_synth(session->tool, event);
}
+static int perf_event__repipe_op4_synth(struct perf_session *session,
+ union perf_event *event,
+ u64 data __maybe_unused)
+{
+ return perf_event__repipe_synth(session->tool, event);
+}
+
static int perf_event__repipe_attr(struct perf_tool *tool,
union perf_event *event,
struct evlist **pevlist)
@@ -115,6 +129,13 @@ static int perf_event__repipe_attr(struct perf_tool *tool,
return perf_event__repipe_synth(tool, event);
}
+static int perf_event__repipe_event_update(struct perf_tool *tool,
+ union perf_event *event,
+ struct evlist **pevlist __maybe_unused)
+{
+ return perf_event__repipe_synth(tool, event);
+}
+
#ifdef HAVE_AUXTRACE_SUPPORT
static int copy_bytes(struct perf_inject *inject, int fd, off_t size)
@@ -303,6 +324,68 @@ static int perf_event__jit_repipe_mmap(struct perf_tool *tool,
}
#endif
+static struct dso *findnew_dso(int pid, int tid, const char *filename,
+ struct dso_id *id, struct machine *machine)
+{
+ struct thread *thread;
+ struct nsinfo *nsi = NULL;
+ struct nsinfo *nnsi;
+ struct dso *dso;
+ bool vdso;
+
+ thread = machine__findnew_thread(machine, pid, tid);
+ if (thread == NULL) {
+ pr_err("cannot find or create a task %d/%d.\n", tid, pid);
+ return NULL;
+ }
+
+ vdso = is_vdso_map(filename);
+ nsi = nsinfo__get(thread->nsinfo);
+
+ if (vdso) {
+ /* The vdso maps are always on the host and not the
+ * container. Ensure that we don't use setns to look
+ * them up.
+ */
+ nnsi = nsinfo__copy(nsi);
+ if (nnsi) {
+ nsinfo__put(nsi);
+ nnsi->need_setns = false;
+ nsi = nnsi;
+ }
+ dso = machine__findnew_vdso(machine, thread);
+ } else {
+ dso = machine__findnew_dso_id(machine, filename, id);
+ }
+
+ if (dso)
+ dso->nsinfo = nsi;
+ else
+ nsinfo__put(nsi);
+
+ thread__put(thread);
+ return dso;
+}
+
+static int perf_event__repipe_buildid_mmap(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct dso *dso;
+
+ dso = findnew_dso(event->mmap.pid, event->mmap.tid,
+ event->mmap.filename, NULL, machine);
+
+ if (dso && !dso->hit) {
+ dso->hit = 1;
+ dso__inject_build_id(dso, tool, machine, sample->cpumode, 0);
+ dso__put(dso);
+ }
+
+ return perf_event__repipe(tool, event, sample, machine);
+}
+
static int perf_event__repipe_mmap2(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
@@ -341,6 +424,34 @@ static int perf_event__jit_repipe_mmap2(struct perf_tool *tool,
}
#endif
+static int perf_event__repipe_buildid_mmap2(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct dso_id dso_id = {
+ .maj = event->mmap2.maj,
+ .min = event->mmap2.min,
+ .ino = event->mmap2.ino,
+ .ino_generation = event->mmap2.ino_generation,
+ };
+ struct dso *dso;
+
+ dso = findnew_dso(event->mmap2.pid, event->mmap2.tid,
+ event->mmap2.filename, &dso_id, machine);
+
+ if (dso && !dso->hit) {
+ dso->hit = 1;
+ dso__inject_build_id(dso, tool, machine, sample->cpumode,
+ event->mmap2.flags);
+ dso__put(dso);
+ }
+
+ perf_event__repipe(tool, event, sample, machine);
+
+ return 0;
+}
+
static int perf_event__repipe_fork(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
@@ -405,34 +516,36 @@ static int perf_event__repipe_tracing_data(struct perf_session *session,
static int dso__read_build_id(struct dso *dso)
{
+ struct nscookie nsc;
+
if (dso->has_build_id)
return 0;
- if (filename__read_build_id(dso->long_name, dso->build_id,
- sizeof(dso->build_id)) > 0) {
+ nsinfo__mountns_enter(dso->nsinfo, &nsc);
+ if (filename__read_build_id(dso->long_name, &dso->bid) > 0)
dso->has_build_id = true;
- return 0;
- }
+ nsinfo__mountns_exit(&nsc);
- return -1;
+ return dso->has_build_id ? 0 : -1;
}
static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool,
- struct machine *machine)
+ struct machine *machine, u8 cpumode, u32 flags)
{
- u16 misc = PERF_RECORD_MISC_USER;
int err;
+ if (is_anon_memory(dso->long_name) || flags & MAP_HUGETLB)
+ return 0;
+ if (is_no_dso_memory(dso->long_name))
+ return 0;
+
if (dso__read_build_id(dso) < 0) {
pr_debug("no build_id found for %s\n", dso->long_name);
return -1;
}
- if (dso->kernel)
- misc = PERF_RECORD_MISC_KERNEL;
-
- err = perf_event__synthesize_build_id(tool, dso, misc, perf_event__repipe,
- machine);
+ err = perf_event__synthesize_build_id(tool, dso, cpumode,
+ perf_event__repipe, machine);
if (err) {
pr_err("Can't synthesize build_id event for %s\n", dso->long_name);
return -1;
@@ -441,11 +554,10 @@ static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool,
return 0;
}
-static int perf_event__inject_buildid(struct perf_tool *tool,
- union perf_event *event,
- struct perf_sample *sample,
- struct evsel *evsel __maybe_unused,
- struct machine *machine)
+int perf_event__inject_buildid(struct perf_tool *tool, union perf_event *event,
+ struct perf_sample *sample,
+ struct evsel *evsel __maybe_unused,
+ struct machine *machine)
{
struct addr_location al;
struct thread *thread;
@@ -460,19 +572,8 @@ static int perf_event__inject_buildid(struct perf_tool *tool,
if (thread__find_map(thread, sample->cpumode, sample->ip, &al)) {
if (!al.map->dso->hit) {
al.map->dso->hit = 1;
- if (map__load(al.map) >= 0) {
- dso__inject_build_id(al.map->dso, tool, machine);
- /*
- * If this fails, too bad, let the other side
- * account this as unresolved.
- */
- } else {
-#ifdef HAVE_LIBELF_SUPPORT
- pr_warning("no symbols found in %s, maybe "
- "install a debug package?\n",
- al.map->dso->long_name);
-#endif
- }
+ dso__inject_build_id(al.map->dso, tool, machine,
+ sample->cpumode, al.map->flags);
}
}
@@ -606,7 +707,7 @@ static int __cmd_inject(struct perf_inject *inject)
signal(SIGINT, sig_handler);
if (inject->build_ids || inject->sched_stat ||
- inject->itrace_synth_opts.set) {
+ inject->itrace_synth_opts.set || inject->build_id_all) {
inject->tool.mmap = perf_event__repipe_mmap;
inject->tool.mmap2 = perf_event__repipe_mmap2;
inject->tool.fork = perf_event__repipe_fork;
@@ -615,7 +716,10 @@ static int __cmd_inject(struct perf_inject *inject)
output_data_offset = session->header.data_offset;
- if (inject->build_ids) {
+ if (inject->build_id_all) {
+ inject->tool.mmap = perf_event__repipe_buildid_mmap;
+ inject->tool.mmap2 = perf_event__repipe_buildid_mmap2;
+ } else if (inject->build_ids) {
inject->tool.sample = perf_event__inject_buildid;
} else if (inject->sched_stat) {
struct evsel *evsel;
@@ -708,9 +812,12 @@ int cmd_inject(int argc, const char **argv)
struct perf_inject inject = {
.tool = {
.sample = perf_event__repipe_sample,
+ .read = perf_event__repipe_sample,
.mmap = perf_event__repipe,
.mmap2 = perf_event__repipe,
.comm = perf_event__repipe,
+ .namespaces = perf_event__repipe,
+ .cgroup = perf_event__repipe,
.fork = perf_event__repipe,
.exit = perf_event__repipe,
.lost = perf_event__repipe,
@@ -718,19 +825,28 @@ int cmd_inject(int argc, const char **argv)
.aux = perf_event__repipe,
.itrace_start = perf_event__repipe,
.context_switch = perf_event__repipe,
- .read = perf_event__repipe_sample,
.throttle = perf_event__repipe,
.unthrottle = perf_event__repipe,
+ .ksymbol = perf_event__repipe,
+ .bpf = perf_event__repipe,
+ .text_poke = perf_event__repipe,
.attr = perf_event__repipe_attr,
+ .event_update = perf_event__repipe_event_update,
.tracing_data = perf_event__repipe_op2_synth,
- .auxtrace_info = perf_event__repipe_op2_synth,
- .auxtrace = perf_event__repipe_auxtrace,
- .auxtrace_error = perf_event__repipe_op2_synth,
- .time_conv = perf_event__repipe_op2_synth,
.finished_round = perf_event__repipe_oe_synth,
.build_id = perf_event__repipe_op2_synth,
.id_index = perf_event__repipe_op2_synth,
+ .auxtrace_info = perf_event__repipe_op2_synth,
+ .auxtrace_error = perf_event__repipe_op2_synth,
+ .time_conv = perf_event__repipe_op2_synth,
+ .thread_map = perf_event__repipe_op2_synth,
+ .cpu_map = perf_event__repipe_op2_synth,
+ .stat_config = perf_event__repipe_op2_synth,
+ .stat = perf_event__repipe_op2_synth,
+ .stat_round = perf_event__repipe_op2_synth,
.feature = perf_event__repipe_op2_synth,
+ .compressed = perf_event__repipe_op4_synth,
+ .auxtrace = perf_event__repipe_auxtrace,
},
.input_name = "-",
.samples = LIST_HEAD_INIT(inject.samples),
@@ -747,6 +863,8 @@ int cmd_inject(int argc, const char **argv)
struct option options[] = {
OPT_BOOLEAN('b', "build-ids", &inject.build_ids,
"Inject build-ids into the output stream"),
+ OPT_BOOLEAN(0, "buildid-all", &inject.build_id_all,
+ "Inject build-ids of all DSOs into the output stream"),
OPT_STRING('i', "input", &inject.input_name, "file",
"input file name"),
OPT_STRING('o', "output", &inject.output.path, "file",
@@ -795,8 +913,6 @@ int cmd_inject(int argc, const char **argv)
return -1;
}
- inject.tool.ordered_events = inject.sched_stat;
-
data.path = inject.input_name;
inject.session = perf_session__new(&data, true, &inject.tool);
if (IS_ERR(inject.session))
@@ -805,7 +921,7 @@ int cmd_inject(int argc, const char **argv)
if (zstd_init(&(inject.session->zstd_data), 0) < 0)
pr_warning("Decompression initialization failed.\n");
- if (inject.build_ids) {
+ if (inject.build_ids && !inject.build_id_all) {
/*
* to make sure the mmap records are ordered correctly
* and so that the correct especially due to jitted code
@@ -815,6 +931,11 @@ int cmd_inject(int argc, const char **argv)
inject.tool.ordered_events = true;
inject.tool.ordering_requires_timestamps = true;
}
+
+ if (inject.sched_stat) {
+ inject.tool.ordered_events = true;
+ }
+
#ifdef HAVE_JITDUMP
if (inject.jit_mode) {
inject.tool.mmap2 = perf_event__jit_repipe_mmap2;
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index 0a7fe4cb5555..10ab5e40a34f 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -92,13 +92,6 @@ int cmd_list(int argc, const char **argv)
else if ((sep = strchr(argv[i], ':')) != NULL) {
int sep_idx;
- if (sep == NULL) {
- print_events(argv[i], raw_dump, !desc_flag,
- long_desc_flag,
- details_flag,
- deprecated);
- continue;
- }
sep_idx = sep - argv[i];
s = strdup(argv[i]);
if (s == NULL)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 772f1057647f..adf311d15d3d 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1593,6 +1593,16 @@ static int record__init_clock(struct record *rec)
return 0;
}
+static void hit_auxtrace_snapshot_trigger(struct record *rec)
+{
+ if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
+ trigger_hit(&auxtrace_snapshot_trigger);
+ auxtrace_record__snapshot_started = 1;
+ if (auxtrace_record__snapshot_start(rec->itr))
+ trigger_error(&auxtrace_snapshot_trigger);
+ }
+}
+
static int __cmd_record(struct record *rec, int argc, const char **argv)
{
int err;
@@ -1937,6 +1947,10 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
case EVLIST_CTL_CMD_DISABLE:
pr_info(EVLIST_DISABLED_MSG);
break;
+ case EVLIST_CTL_CMD_SNAPSHOT:
+ hit_auxtrace_snapshot_trigger(rec);
+ evlist__ctlfd_ack(rec->evlist);
+ break;
case EVLIST_CTL_CMD_ACK:
case EVLIST_CTL_CMD_UNSUPPORTED:
default:
@@ -2234,27 +2248,9 @@ static int parse_control_option(const struct option *opt,
const char *str,
int unset __maybe_unused)
{
- char *comma = NULL, *endptr = NULL;
- struct record_opts *config = (struct record_opts *)opt->value;
-
- if (strncmp(str, "fd:", 3))
- return -EINVAL;
-
- config->ctl_fd = strtoul(&str[3], &endptr, 0);
- if (endptr == &str[3])
- return -EINVAL;
-
- comma = strchr(str, ',');
- if (comma) {
- if (endptr != comma)
- return -EINVAL;
-
- config->ctl_fd_ack = strtoul(comma + 1, &endptr, 0);
- if (endptr == comma + 1 || *endptr != '\0')
- return -EINVAL;
- }
+ struct record_opts *opts = opt->value;
- return 0;
+ return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close);
}
static void switch_output_size_warn(struct record *rec)
@@ -2596,9 +2592,11 @@ static struct option __record_options[] = {
"libpfm4 event selector. use 'perf list' to list available events",
parse_libpfm_events_option),
#endif
- OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd]",
- "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events).\n"
- "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.",
+ OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
+ "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n"
+ "\t\t\t 'snapshot': AUX area tracing snapshot).\n"
+ "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
+ "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
parse_control_option),
OPT_END()
};
@@ -2671,12 +2669,14 @@ int cmd_record(int argc, const char **argv)
!perf_can_record_switch_events()) {
ui__error("kernel does not support recording context switch events\n");
parse_options_usage(record_usage, record_options, "switch-events", 0);
- return -EINVAL;
+ err = -EINVAL;
+ goto out_opts;
}
if (switch_output_setup(rec)) {
parse_options_usage(record_usage, record_options, "switch-output", 0);
- return -EINVAL;
+ err = -EINVAL;
+ goto out_opts;
}
if (rec->switch_output.time) {
@@ -2687,8 +2687,10 @@ int cmd_record(int argc, const char **argv)
if (rec->switch_output.num_files) {
rec->switch_output.filenames = calloc(sizeof(char *),
rec->switch_output.num_files);
- if (!rec->switch_output.filenames)
- return -EINVAL;
+ if (!rec->switch_output.filenames) {
+ err = -EINVAL;
+ goto out_opts;
+ }
}
/*
@@ -2704,7 +2706,8 @@ int cmd_record(int argc, const char **argv)
rec->affinity_mask.bits = bitmap_alloc(rec->affinity_mask.nbits);
if (!rec->affinity_mask.bits) {
pr_err("Failed to allocate thread mask for %zd cpus\n", rec->affinity_mask.nbits);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto out_opts;
}
pr_debug2("thread mask[%zd]: empty\n", rec->affinity_mask.nbits);
}
@@ -2835,6 +2838,8 @@ out:
evlist__delete(rec->evlist);
symbol__exit();
auxtrace_record__free(rec->itr);
+out_opts:
+ evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close);
return err;
}
@@ -2842,12 +2847,7 @@ static void snapshot_sig_handler(int sig __maybe_unused)
{
struct record *rec = &record;
- if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
- trigger_hit(&auxtrace_snapshot_trigger);
- auxtrace_record__snapshot_started = 1;
- if (auxtrace_record__snapshot_start(record.itr))
- trigger_error(&auxtrace_snapshot_trigger);
- }
+ hit_auxtrace_snapshot_trigger(rec);
if (switch_output_signal(rec))
trigger_hit(&switch_output_trigger);
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index e6fc297cee91..0e16f9d5a947 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -130,7 +130,8 @@ struct work_atoms {
struct thread *thread;
struct rb_node node;
u64 max_lat;
- u64 max_lat_at;
+ u64 max_lat_start;
+ u64 max_lat_end;
u64 total_lat;
u64 nb_atoms;
u64 total_runtime;
@@ -1096,7 +1097,8 @@ add_sched_in_event(struct work_atoms *atoms, u64 timestamp)
atoms->total_lat += delta;
if (delta > atoms->max_lat) {
atoms->max_lat = delta;
- atoms->max_lat_at = timestamp;
+ atoms->max_lat_start = atom->wake_up_time;
+ atoms->max_lat_end = timestamp;
}
atoms->nb_atoms++;
}
@@ -1322,7 +1324,7 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_
int i;
int ret;
u64 avg;
- char max_lat_at[32];
+ char max_lat_start[32], max_lat_end[32];
if (!work_list->nb_atoms)
return;
@@ -1344,13 +1346,14 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_
printf(" ");
avg = work_list->total_lat / work_list->nb_atoms;
- timestamp__scnprintf_usec(work_list->max_lat_at, max_lat_at, sizeof(max_lat_at));
+ timestamp__scnprintf_usec(work_list->max_lat_start, max_lat_start, sizeof(max_lat_start));
+ timestamp__scnprintf_usec(work_list->max_lat_end, max_lat_end, sizeof(max_lat_end));
- printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %13s s\n",
+ printf("|%11.3f ms |%9" PRIu64 " | avg:%8.3f ms | max:%8.3f ms | max start: %12s s | max end: %12s s\n",
(double)work_list->total_runtime / NSEC_PER_MSEC,
work_list->nb_atoms, (double)avg / NSEC_PER_MSEC,
(double)work_list->max_lat / NSEC_PER_MSEC,
- max_lat_at);
+ max_lat_start, max_lat_end);
}
static int pid_cmp(struct work_atoms *l, struct work_atoms *r)
@@ -3137,7 +3140,8 @@ static void __merge_work_atoms(struct rb_root_cached *root, struct work_atoms *d
list_splice(&data->work_list, &this->work_list);
if (this->max_lat < data->max_lat) {
this->max_lat = data->max_lat;
- this->max_lat_at = data->max_lat_at;
+ this->max_lat_start = data->max_lat_start;
+ this->max_lat_end = data->max_lat_end;
}
zfree(&data);
return;
@@ -3176,9 +3180,9 @@ static int perf_sched__lat(struct perf_sched *sched)
perf_sched__merge_lat(sched);
perf_sched__sort_lat(sched);
- printf("\n -----------------------------------------------------------------------------------------------------------------\n");
- printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n");
- printf(" -----------------------------------------------------------------------------------------------------------------\n");
+ printf("\n -------------------------------------------------------------------------------------------------------------------------------------------\n");
+ printf(" Task | Runtime ms | Switches | Avg delay ms | Max delay ms | Max delay start | Max delay end |\n");
+ printf(" -------------------------------------------------------------------------------------------------------------------------------------------\n");
next = rb_first_cached(&sched->sorted_atom_root);
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 484ce6067d23..48588ccf902e 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -702,12 +702,14 @@ static int perf_sample__fprintf_start(struct perf_script *script,
char tstr[128];
if (PRINT_FIELD(COMM)) {
+ const char *comm = thread ? thread__comm_str(thread) : ":-1";
+
if (latency_format)
- printed += fprintf(fp, "%8.8s ", thread__comm_str(thread));
+ printed += fprintf(fp, "%8.8s ", comm);
else if (PRINT_FIELD(IP) && evsel__has_callchain(evsel) && symbol_conf.use_callchain)
- printed += fprintf(fp, "%s ", thread__comm_str(thread));
+ printed += fprintf(fp, "%s ", comm);
else
- printed += fprintf(fp, "%16s ", thread__comm_str(thread));
+ printed += fprintf(fp, "%16s ", comm);
}
if (PRINT_FIELD(PID) && PRINT_FIELD(TID))
@@ -2238,7 +2240,7 @@ static int print_event_with_time(struct perf_tool *tool,
if (tid != -1)
thread = machine__findnew_thread(machine, pid, tid);
- if (thread && evsel) {
+ if (evsel) {
perf_sample__fprintf_start(script, sample, thread, evsel,
event->header.type, stdout);
}
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index fddc97cac984..b01af171d94f 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -56,7 +56,7 @@
#include "util/cpumap.h"
#include "util/thread_map.h"
#include "util/counts.h"
-#include "util/group.h"
+#include "util/topdown.h"
#include "util/session.h"
#include "util/tool.h"
#include "util/string2.h"
@@ -128,6 +128,15 @@ static const char * topdown_attrs[] = {
NULL,
};
+static const char *topdown_metric_attrs[] = {
+ "slots",
+ "topdown-retiring",
+ "topdown-bad-spec",
+ "topdown-fe-bound",
+ "topdown-be-bound",
+ NULL,
+};
+
static const char *smi_cost_attrs = {
"{"
"msr/aperf/,"
@@ -578,6 +587,7 @@ static void process_evlist(struct evlist *evlist, unsigned int interval)
process_interval();
pr_info(EVLIST_DISABLED_MSG);
break;
+ case EVLIST_CTL_CMD_SNAPSHOT:
case EVLIST_CTL_CMD_ACK:
case EVLIST_CTL_CMD_UNSUPPORTED:
default:
@@ -1045,27 +1055,20 @@ static int parse_control_option(const struct option *opt,
const char *str,
int unset __maybe_unused)
{
- char *comma = NULL, *endptr = NULL;
- struct perf_stat_config *config = (struct perf_stat_config *)opt->value;
-
- if (strncmp(str, "fd:", 3))
- return -EINVAL;
-
- config->ctl_fd = strtoul(&str[3], &endptr, 0);
- if (endptr == &str[3])
- return -EINVAL;
+ struct perf_stat_config *config = opt->value;
- comma = strchr(str, ',');
- if (comma) {
- if (endptr != comma)
- return -EINVAL;
+ return evlist__parse_control(str, &config->ctl_fd, &config->ctl_fd_ack, &config->ctl_fd_close);
+}
- config->ctl_fd_ack = strtoul(comma + 1, &endptr, 0);
- if (endptr == comma + 1 || *endptr != '\0')
- return -EINVAL;
+static int parse_stat_cgroups(const struct option *opt,
+ const char *str, int unset)
+{
+ if (stat_config.cgroup_list) {
+ pr_err("--cgroup and --for-each-cgroup cannot be used together\n");
+ return -1;
}
- return 0;
+ return parse_cgroups(opt, str, unset);
}
static struct option stat_options[] = {
@@ -1111,7 +1114,9 @@ static struct option stat_options[] = {
OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator",
"print counts with custom separator"),
OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
- "monitor event in cgroup name only", parse_cgroups),
+ "monitor event in cgroup name only", parse_stat_cgroups),
+ OPT_STRING(0, "for-each-cgroup", &stat_config.cgroup_list, "name",
+ "expand events for each cgroup"),
OPT_STRING('o', "output", &output_name, "file", "output file name"),
OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
OPT_INTEGER(0, "log-fd", &output_fd,
@@ -1171,9 +1176,10 @@ static struct option stat_options[] = {
"libpfm4 event selector. use 'perf list' to list available events",
parse_libpfm_events_option),
#endif
- OPT_CALLBACK(0, "control", &stat_config, "fd:ctl-fd[,ack-fd]",
+ OPT_CALLBACK(0, "control", &stat_config, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
"Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events).\n"
- "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.",
+ "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
+ "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
parse_control_option),
OPT_END()
};
@@ -1497,55 +1503,6 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
return 0;
}
-static int topdown_filter_events(const char **attr, char **str, bool use_group)
-{
- int off = 0;
- int i;
- int len = 0;
- char *s;
-
- for (i = 0; attr[i]; i++) {
- if (pmu_have_event("cpu", attr[i])) {
- len += strlen(attr[i]) + 1;
- attr[i - off] = attr[i];
- } else
- off++;
- }
- attr[i - off] = NULL;
-
- *str = malloc(len + 1 + 2);
- if (!*str)
- return -1;
- s = *str;
- if (i - off == 0) {
- *s = 0;
- return 0;
- }
- if (use_group)
- *s++ = '{';
- for (i = 0; attr[i]; i++) {
- strcpy(s, attr[i]);
- s += strlen(s);
- *s++ = ',';
- }
- if (use_group) {
- s[-1] = '}';
- *s = 0;
- } else
- s[-1] = 0;
- return 0;
-}
-
-__weak bool arch_topdown_check_group(bool *warn)
-{
- *warn = false;
- return false;
-}
-
-__weak void arch_topdown_group_warn(void)
-{
-}
-
/*
* Add default attributes, if there were no attributes specified or
* if -d/--detailed, -d -d or -d -d -d is used:
@@ -1742,6 +1699,24 @@ static int add_default_attributes(void)
char *str = NULL;
bool warn = false;
+ if (!force_metric_only)
+ stat_config.metric_only = true;
+
+ if (topdown_filter_events(topdown_metric_attrs, &str, 1) < 0) {
+ pr_err("Out of memory\n");
+ return -1;
+ }
+ if (topdown_metric_attrs[0] && str) {
+ if (!stat_config.interval && !stat_config.metric_only) {
+ fprintf(stat_config.output,
+ "Topdown accuracy may decrease when measuring long periods.\n"
+ "Please print the result regularly, e.g. -I1000\n");
+ }
+ goto setup_metrics;
+ }
+
+ zfree(&str);
+
if (stat_config.aggr_mode != AGGR_GLOBAL &&
stat_config.aggr_mode != AGGR_CORE) {
pr_err("top down event configuration requires --per-core mode\n");
@@ -1753,8 +1728,6 @@ static int add_default_attributes(void)
return -1;
}
- if (!force_metric_only)
- stat_config.metric_only = true;
if (topdown_filter_events(topdown_attrs, &str,
arch_topdown_check_group(&warn)) < 0) {
pr_err("Out of memory\n");
@@ -1763,6 +1736,7 @@ static int add_default_attributes(void)
if (topdown_attrs[0] && str) {
if (warn)
arch_topdown_group_warn();
+setup_metrics:
err = parse_events(evsel_list, str, &errinfo);
if (err) {
fprintf(stderr,
@@ -2063,8 +2037,10 @@ static void setup_system_wide(int forks)
struct evsel *counter;
evlist__for_each_entry(evsel_list, counter) {
- if (!counter->core.system_wide)
+ if (!counter->core.system_wide &&
+ strcmp(counter->name, "duration_time")) {
return;
+ }
}
if (evsel_list->core.nr_entries)
@@ -2250,6 +2226,19 @@ int cmd_stat(int argc, const char **argv)
if (add_default_attributes())
goto out;
+ if (stat_config.cgroup_list) {
+ if (nr_cgroups > 0) {
+ pr_err("--cgroup and --for-each-cgroup cannot be used together\n");
+ parse_options_usage(stat_usage, stat_options, "G", 1);
+ parse_options_usage(NULL, stat_options, "for-each-cgroup", 0);
+ goto out;
+ }
+
+ if (evlist__expand_cgroup(evsel_list, stat_config.cgroup_list,
+ &stat_config.metric_events, true) < 0)
+ goto out;
+ }
+
target__validate(&target);
if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide))
@@ -2416,6 +2405,7 @@ out:
metricgroup__rblist_exit(&stat_config.metric_events);
runtime_stat_delete(&stat_config);
+ evlist__close_control(stat_config.ctl_fd, stat_config.ctl_fd_ack, &stat_config.ctl_fd_close);
return status;
}
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index bea461b6f937..44a75f234db1 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1762,7 +1762,11 @@ static int trace__read_syscall_info(struct trace *trace, int id)
if (table == NULL)
return -ENOMEM;
- memset(table + trace->sctbl->syscalls.max_id, 0, (id - trace->sctbl->syscalls.max_id) * sizeof(*sc));
+ // Need to memset from offset 0 and +1 members if brand new
+ if (trace->syscalls.table == NULL)
+ memset(table, 0, (id + 1) * sizeof(*sc));
+ else
+ memset(table + trace->sctbl->syscalls.max_id + 1, 0, (id - trace->sctbl->syscalls.max_id) * sizeof(*sc));
trace->syscalls.table = table;
trace->sctbl->syscalls.max_id = id;
diff --git a/tools/perf/builtin-version.c b/tools/perf/builtin-version.c
index 05cf2af9e2c2..d09ec2f03071 100644
--- a/tools/perf/builtin-version.c
+++ b/tools/perf/builtin-version.c
@@ -60,7 +60,6 @@ static void library_status(void)
STATUS(HAVE_DWARF_SUPPORT, dwarf);
STATUS(HAVE_DWARF_GETLOCATIONS_SUPPORT, dwarf_getlocations);
STATUS(HAVE_GLIBC_SUPPORT, glibc);
- STATUS(HAVE_GTK2_SUPPORT, gtk2);
#ifndef HAVE_SYSCALL_TABLE_SUPPORT
STATUS(HAVE_LIBAUDIT_SUPPORT, libaudit);
#endif
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index 0b4d6431b072..15ecb1803fb9 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -75,6 +75,15 @@ include/uapi/asm-generic/mman-common.h
include/uapi/asm-generic/unistd.h
'
+# These copies are under tools/perf/trace/beauty/ as they are not used to in
+# building object files only by scripts in tools/perf/trace/beauty/ to generate
+# tables that then gets included in .c files for things like id->string syscall
+# tables (and the reverse lookup as well: string -> id)
+
+BEAUTY_FILES='
+include/linux/socket.h
+'
+
check_2 () {
file1=$1
file2=$2
@@ -100,6 +109,14 @@ check () {
check_2 tools/$file $file $*
}
+beauty_check () {
+ file=$1
+
+ shift
+
+ check_2 tools/perf/trace/beauty/$file $file $*
+}
+
# Check if we have the kernel headers (tools/perf/../../include), else
# we're probably on a detached tarball, so no point in trying to check
# differences.
@@ -128,8 +145,9 @@ check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in
# diff non-symmetric files
check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl
-# These will require a beauty_check when we get some more like that
-check_2 tools/perf/trace/beauty/include/linux/socket.h include/linux/socket.h
+for i in $BEAUTY_FILES; do
+ beauty_check $i -B
+done
# check duplicated library files
check_2 tools/perf/util/hashmap.h tools/lib/bpf/hashmap.h
diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h
index 15e458e150bd..7a2264e1e4e1 100644
--- a/tools/perf/perf-sys.h
+++ b/tools/perf/perf-sys.h
@@ -9,31 +9,13 @@
struct perf_event_attr;
-extern bool test_attr__enabled;
-void test_attr__ready(void);
-void test_attr__init(void);
-void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
- int fd, int group_fd, unsigned long flags);
-
-#ifndef HAVE_ATTR_TEST
-#define HAVE_ATTR_TEST 1
-#endif
-
static inline int
sys_perf_event_open(struct perf_event_attr *attr,
pid_t pid, int cpu, int group_fd,
unsigned long flags)
{
- int fd;
-
- fd = syscall(__NR_perf_event_open, attr, pid, cpu,
- group_fd, flags);
-
-#if HAVE_ATTR_TEST
- if (unlikely(test_attr__enabled))
- test_attr__open(attr, pid, cpu, fd, group_fd, flags);
-#endif
- return fd;
+ return syscall(__NR_perf_event_open, attr, pid, cpu,
+ group_fd, flags);
}
#endif /* _PERF_SYS_H */
diff --git a/tools/perf/pmu-events/arch/powerpc/power8/cache.json b/tools/perf/pmu-events/arch/powerpc/power8/cache.json
index 6b792b2c87e2..05a17084d939 100644
--- a/tools/perf/pmu-events/arch/powerpc/power8/cache.json
+++ b/tools/perf/pmu-events/arch/powerpc/power8/cache.json
@@ -32,8 +32,8 @@
{
"EventCode": "0x1c04e",
"EventName": "PM_DATA_FROM_L2MISS_MOD",
- "BriefDescription": "The processor's data cache was reloaded from a localtion other than the local core's L2 due to a demand load",
- "PublicDescription": "The processor's data cache was reloaded from a localtion other than the local core's L2 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L2 due to a demand load",
+ "PublicDescription": "The processor's data cache was reloaded from a location other than the local core's L2 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
},
{
"EventCode": "0x3c040",
@@ -74,8 +74,8 @@
{
"EventCode": "0x4c04e",
"EventName": "PM_DATA_FROM_L3MISS_MOD",
- "BriefDescription": "The processor's data cache was reloaded from a localtion other than the local core's L3 due to a demand load",
- "PublicDescription": "The processor's data cache was reloaded from a localtion other than the local core's L3 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L3 due to a demand load",
+ "PublicDescription": "The processor's data cache was reloaded from a location other than the local core's L3 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
},
{
"EventCode": "0x3c042",
@@ -134,7 +134,7 @@
{
"EventCode": "0x4e04e",
"EventName": "PM_DPTEG_FROM_L3MISS",
- "BriefDescription": "A Page Table Entry was loaded into the TLB from a localtion other than the local core's L3 due to a data side request",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L3 due to a data side request",
"PublicDescription": ""
},
{
diff --git a/tools/perf/pmu-events/arch/powerpc/power8/frontend.json b/tools/perf/pmu-events/arch/powerpc/power8/frontend.json
index 1ddc30655d43..1c902a8263b6 100644
--- a/tools/perf/pmu-events/arch/powerpc/power8/frontend.json
+++ b/tools/perf/pmu-events/arch/powerpc/power8/frontend.json
@@ -116,8 +116,8 @@
{
"EventCode": "0x1404e",
"EventName": "PM_INST_FROM_L2MISS",
- "BriefDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L2 due to an instruction fetch (not prefetch)",
- "PublicDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L2 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ "BriefDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L2 due to an instruction fetch (not prefetch)",
+ "PublicDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L2 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
},
{
"EventCode": "0x34040",
@@ -158,8 +158,8 @@
{
"EventCode": "0x4404e",
"EventName": "PM_INST_FROM_L3MISS_MOD",
- "BriefDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L3 due to a instruction fetch",
- "PublicDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L3 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ "BriefDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L3 due to a instruction fetch",
+ "PublicDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L3 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
},
{
"EventCode": "0x34042",
@@ -320,7 +320,7 @@
{
"EventCode": "0x1504e",
"EventName": "PM_IPTEG_FROM_L2MISS",
- "BriefDescription": "A Page Table Entry was loaded into the TLB from a localtion other than the local core's L2 due to a instruction side request",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L2 due to a instruction side request",
"PublicDescription": ""
},
{
@@ -344,7 +344,7 @@
{
"EventCode": "0x4504e",
"EventName": "PM_IPTEG_FROM_L3MISS",
- "BriefDescription": "A Page Table Entry was loaded into the TLB from a localtion other than the local core's L3 due to a instruction side request",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L3 due to a instruction side request",
"PublicDescription": ""
},
{
diff --git a/tools/perf/pmu-events/arch/powerpc/power8/marked.json b/tools/perf/pmu-events/arch/powerpc/power8/marked.json
index 94dc58b83b7e..6de61a797bbd 100644
--- a/tools/perf/pmu-events/arch/powerpc/power8/marked.json
+++ b/tools/perf/pmu-events/arch/powerpc/power8/marked.json
@@ -92,7 +92,7 @@
{
"EventCode": "0x4c12e",
"EventName": "PM_MRK_DATA_FROM_L2MISS_CYC",
- "BriefDescription": "Duration in cycles to reload from a localtion other than the local core's L2 due to a marked load",
+ "BriefDescription": "Duration in cycles to reload from a location other than the local core's L2 due to a marked load",
"PublicDescription": ""
},
{
@@ -158,13 +158,13 @@
{
"EventCode": "0x201e4",
"EventName": "PM_MRK_DATA_FROM_L3MISS",
- "BriefDescription": "The processor's data cache was reloaded from a localtion other than the local core's L3 due to a marked load",
+ "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L3 due to a marked load",
"PublicDescription": ""
},
{
"EventCode": "0x2d12e",
"EventName": "PM_MRK_DATA_FROM_L3MISS_CYC",
- "BriefDescription": "Duration in cycles to reload from a localtion other than the local core's L3 due to a marked load",
+ "BriefDescription": "Duration in cycles to reload from a location other than the local core's L3 due to a marked load",
"PublicDescription": ""
},
{
@@ -392,7 +392,7 @@
{
"EventCode": "0x1f14e",
"EventName": "PM_MRK_DPTEG_FROM_L2MISS",
- "BriefDescription": "A Page Table Entry was loaded into the TLB from a localtion other than the local core's L2 due to a marked data side request",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L2 due to a marked data side request",
"PublicDescription": ""
},
{
@@ -416,7 +416,7 @@
{
"EventCode": "0x4f14e",
"EventName": "PM_MRK_DPTEG_FROM_L3MISS",
- "BriefDescription": "A Page Table Entry was loaded into the TLB from a localtion other than the local core's L3 due to a marked data side request",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L3 due to a marked data side request",
"PublicDescription": ""
},
{
diff --git a/tools/perf/pmu-events/arch/powerpc/power8/other.json b/tools/perf/pmu-events/arch/powerpc/power8/other.json
index f4e760cab111..84a0cedf1fd9 100644
--- a/tools/perf/pmu-events/arch/powerpc/power8/other.json
+++ b/tools/perf/pmu-events/arch/powerpc/power8/other.json
@@ -410,8 +410,8 @@
{
"EventCode": "0x61c04e",
"EventName": "PM_DATA_ALL_FROM_L2MISS_MOD",
- "BriefDescription": "The processor's data cache was reloaded from a localtion other than the local core's L2 due to either demand loads or data prefetch",
- "PublicDescription": "The processor's data cache was reloaded from a localtion other than the local core's L2 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L2 due to either demand loads or data prefetch",
+ "PublicDescription": "The processor's data cache was reloaded from a location other than the local core's L2 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
},
{
"EventCode": "0x63c040",
@@ -470,8 +470,8 @@
{
"EventCode": "0x64c04e",
"EventName": "PM_DATA_ALL_FROM_L3MISS_MOD",
- "BriefDescription": "The processor's data cache was reloaded from a localtion other than the local core's L3 due to either demand loads or data prefetch",
- "PublicDescription": "The processor's data cache was reloaded from a localtion other than the local core's L3 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L3 due to either demand loads or data prefetch",
+ "PublicDescription": "The processor's data cache was reloaded from a location other than the local core's L3 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
},
{
"EventCode": "0x63c042",
@@ -1280,8 +1280,8 @@
{
"EventCode": "0x51404e",
"EventName": "PM_INST_ALL_FROM_L2MISS",
- "BriefDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L2 due to instruction fetches and prefetches",
- "PublicDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L2 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ "BriefDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L2 due to instruction fetches and prefetches",
+ "PublicDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L2 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
},
{
"EventCode": "0x534040",
@@ -1340,8 +1340,8 @@
{
"EventCode": "0x54404e",
"EventName": "PM_INST_ALL_FROM_L3MISS_MOD",
- "BriefDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L3 due to a instruction fetch",
- "PublicDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L3 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ "BriefDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L3 due to a instruction fetch",
+ "PublicDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L3 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
},
{
"EventCode": "0x534042",
diff --git a/tools/perf/pmu-events/arch/powerpc/power8/translation.json b/tools/perf/pmu-events/arch/powerpc/power8/translation.json
index 623e7475b010..a1657f5fdc6b 100644
--- a/tools/perf/pmu-events/arch/powerpc/power8/translation.json
+++ b/tools/perf/pmu-events/arch/powerpc/power8/translation.json
@@ -44,7 +44,7 @@
{
"EventCode": "0x1e04e",
"EventName": "PM_DPTEG_FROM_L2MISS",
- "BriefDescription": "A Page Table Entry was loaded into the TLB from a localtion other than the local core's L2 due to a data side request",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L2 due to a data side request",
"PublicDescription": ""
},
{
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json b/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json
index 8383a37647ad..7a5d1bf543f8 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json
@@ -1,37 +1,46 @@
[
{
- "MetricExpr": "(hv_24x7@PM_MCS01_128B_RD_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS01_128B_RD_DISP_PORT23\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_RD_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_RD_DISP_PORT23\\,chip\\=?@)",
- "MetricName": "Memory_RD_BW_Chip",
- "MetricGroup": "Memory_BW",
- "ScaleUnit": "1.6e-2MB"
+ "MetricExpr": "(hv_24x7@PM_MCS01_128B_RD_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS01_128B_RD_DISP_PORT23\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_RD_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_RD_DISP_PORT23\\,chip\\=?@)",
+ "MetricName": "Memory_RD_BW_Chip",
+ "MetricGroup": "Memory_BW",
+ "ScaleUnit": "1.6e-2MB",
+ "AggregationMode": "PerChip"
},
{
"MetricExpr": "(hv_24x7@PM_MCS01_128B_WR_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS01_128B_WR_DISP_PORT23\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_WR_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_WR_DISP_PORT23\\,chip\\=?@ )",
- "MetricName": "Memory_WR_BW_Chip",
- "MetricGroup": "Memory_BW",
- "ScaleUnit": "1.6e-2MB"
+ "MetricName": "Memory_WR_BW_Chip",
+ "MetricGroup": "Memory_BW",
+ "ScaleUnit": "1.6e-2MB",
+ "AggregationMode": "PerChip"
},
{
"MetricExpr": "(hv_24x7@PM_PB_CYC\\,chip\\=?@ )",
- "MetricName": "PowerBUS_Frequency",
- "ScaleUnit": "2.5e-7GHz"
+ "MetricName": "PowerBUS_Frequency",
+ "ScaleUnit": "2.5e-7GHz",
+ "AggregationMode": "PerChip"
+ },
+ {
+ "MetricExpr": "(hv_24x7@CPM_CS_32MHZ_CYC\\,domain\\=3\\,core\\=?@ )",
+ "MetricName": "CPM_CS_32MHZ_CYC",
+ "ScaleUnit": "1MHz",
+ "AggregationMode": "PerCore"
},
{
"MetricExpr" : "nest_mcs01_imc@PM_MCS01_128B_RD_DISP_PORT01@ + nest_mcs01_imc@PM_MCS01_128B_RD_DISP_PORT23@",
"MetricName" : "mcs01-read",
- "MetricGroup" : "memory_bw",
+ "MetricGroup" : "memory-bandwidth",
"ScaleUnit": "6.1e-5MB"
},
{
"MetricExpr" : "nest_mcs23_imc@PM_MCS23_128B_RD_DISP_PORT01@ + nest_mcs23_imc@PM_MCS23_128B_RD_DISP_PORT23@",
"MetricName" : "mcs23-read",
- "MetricGroup" : "memory_bw",
+ "MetricGroup" : "memory-bandwidth",
"ScaleUnit": "6.1e-5MB"
},
{
"MetricExpr" : "nest_mcs01_imc@PM_MCS01_128B_WR_DISP_PORT01@ + nest_mcs01_imc@PM_MCS01_128B_WR_DISP_PORT23@",
"MetricName" : "mcs01-write",
- "MetricGroup" : "memory_bw",
+ "MetricGroup" : "memory-bandwidth",
"ScaleUnit": "6.1e-5MB"
},
{
@@ -48,7 +57,7 @@
{
"MetricExpr" : "(nest_mcs01_imc@PM_MCS01_128B_RD_DISP_PORT01@ + nest_mcs01_imc@PM_MCS01_128B_RD_DISP_PORT23@ + nest_mcs23_imc@PM_MCS23_128B_RD_DISP_PORT01@ + nest_mcs23_imc@PM_MCS23_128B_RD_DISP_PORT23@ + nest_mcs01_imc@PM_MCS01_128B_WR_DISP_PORT01@ + nest_mcs01_imc@PM_MCS01_128B_WR_DISP_PORT23@ + nest_mcs23_imc@PM_MCS23_128B_WR_DISP_PORT01@ + nest_mcs23_imc@PM_MCS23_128B_WR_DISP_PORT23@)",
"MetricName" : "Memory-bandwidth-MCS",
- "MetricGroup" : "memory_bw",
+ "MetricGroup" : "memory-bandwidth",
"ScaleUnit": "6.1e-5MB"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/branch.json b/tools/perf/pmu-events/arch/x86/amdzen1/branch.json
index a9943eeb8d6b..4ceb67a0db21 100644
--- a/tools/perf/pmu-events/arch/x86/amdzen1/branch.json
+++ b/tools/perf/pmu-events/arch/x86/amdzen1/branch.json
@@ -19,5 +19,10 @@
"EventName": "bp_de_redirect",
"EventCode": "0x91",
"BriefDescription": "Decoder Overrides Existing Branch Prediction (speculative)."
+ },
+ {
+ "EventName": "bp_l1_tlb_fetch_hit",
+ "EventCode": "0x94",
+ "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB."
}
]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/cache.json b/tools/perf/pmu-events/arch/x86/amdzen1/cache.json
index 404d4c569c01..4ea7ec4f496e 100644
--- a/tools/perf/pmu-events/arch/x86/amdzen1/cache.json
+++ b/tools/perf/pmu-events/arch/x86/amdzen1/cache.json
@@ -118,6 +118,11 @@
"UMask": "0x1"
},
{
+ "EventName": "l2_request_g1.all_no_prefetch",
+ "EventCode": "0x60",
+ "UMask": "0xf9"
+ },
+ {
"EventName": "l2_request_g2.group1",
"EventCode": "0x61",
"BriefDescription": "Miscellaneous events covered in more detail by l2_request_g1 (PMCx060).",
@@ -244,12 +249,48 @@
"UMask": "0x1"
},
{
+ "EventName": "l2_cache_req_stat.ic_access_in_l2",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache requests in L2.",
+ "UMask": "0x7"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ic_dc_miss_in_l2",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2 and Data cache request miss in L2 (all types).",
+ "UMask": "0x9"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ic_dc_hit_in_l2",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request hit in L2 and Data cache request hit in L2 (all types).",
+ "UMask": "0xf6"
+ },
+ {
"EventName": "l2_fill_pending.l2_fill_busy",
"EventCode": "0x6d",
"BriefDescription": "Cycles with fill pending from L2. Total cycles spent with one or more fill requests in flight from L2.",
"UMask": "0x1"
},
{
+ "EventName": "l2_pf_hit_l2",
+ "EventCode": "0x70",
+ "BriefDescription": "L2 prefetch hit in L2.",
+ "UMask": "0xff"
+ },
+ {
+ "EventName": "l2_pf_miss_l2_hit_l3",
+ "EventCode": "0x71",
+ "BriefDescription": "L2 prefetcher hits in L3. Counts all L2 prefetches accepted by the L2 pipeline which miss the L2 cache and hit the L3.",
+ "UMask": "0xff"
+ },
+ {
+ "EventName": "l2_pf_miss_l2_l3",
+ "EventCode": "0x72",
+ "BriefDescription": "L2 prefetcher misses in L3. All L2 prefetches accepted by the L2 pipeline which miss the L2 and the L3 caches.",
+ "UMask": "0xff"
+ },
+ {
"EventName": "l3_request_g1.caching_l3_cache_accesses",
"EventCode": "0x01",
"BriefDescription": "Caching: L3 cache accesses",
diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/data-fabric.json b/tools/perf/pmu-events/arch/x86/amdzen1/data-fabric.json
new file mode 100644
index 000000000000..40271df40015
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/amdzen1/data-fabric.json
@@ -0,0 +1,98 @@
+[
+ {
+ "EventName": "remote_outbound_data_controller_0",
+ "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 0",
+ "EventCode": "0x7c7",
+ "UMask": "0x02",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "remote_outbound_data_controller_1",
+ "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 1",
+ "EventCode": "0x807",
+ "UMask": "0x02",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "remote_outbound_data_controller_2",
+ "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 2",
+ "EventCode": "0x847",
+ "UMask": "0x02",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "remote_outbound_data_controller_3",
+ "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 3",
+ "EventCode": "0x887",
+ "UMask": "0x02",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "dram_channel_data_controller_0",
+ "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0",
+ "EventCode": "0x07",
+ "UMask": "0x38",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "dram_channel_data_controller_1",
+ "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0",
+ "EventCode": "0x47",
+ "UMask": "0x38",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "dram_channel_data_controller_2",
+ "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0",
+ "EventCode": "0x87",
+ "UMask": "0x38",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "dram_channel_data_controller_3",
+ "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0",
+ "EventCode": "0xc7",
+ "UMask": "0x38",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "dram_channel_data_controller_4",
+ "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0",
+ "EventCode": "0x107",
+ "UMask": "0x38",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "dram_channel_data_controller_5",
+ "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0",
+ "EventCode": "0x147",
+ "UMask": "0x38",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "dram_channel_data_controller_6",
+ "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0",
+ "EventCode": "0x187",
+ "UMask": "0x38",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "dram_channel_data_controller_7",
+ "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0",
+ "EventCode": "0x1c7",
+ "UMask": "0x38",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/recommended.json b/tools/perf/pmu-events/arch/x86/amdzen1/recommended.json
new file mode 100644
index 000000000000..2cfe2d2f3bfd
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/amdzen1/recommended.json
@@ -0,0 +1,178 @@
+[
+ {
+ "MetricName": "branch_misprediction_ratio",
+ "BriefDescription": "Execution-Time Branch Misprediction Ratio (Non-Speculative)",
+ "MetricExpr": "d_ratio(ex_ret_brn_misp, ex_ret_brn)",
+ "MetricGroup": "branch_prediction",
+ "ScaleUnit": "100%"
+ },
+ {
+ "EventName": "all_dc_accesses",
+ "EventCode": "0x29",
+ "BriefDescription": "All L1 Data Cache Accesses",
+ "UMask": "0x7"
+ },
+ {
+ "MetricName": "all_l2_cache_accesses",
+ "BriefDescription": "All L2 Cache Accesses",
+ "MetricExpr": "l2_request_g1.all_no_prefetch + l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3",
+ "MetricGroup": "l2_cache"
+ },
+ {
+ "EventName": "l2_cache_accesses_from_ic_misses",
+ "EventCode": "0x60",
+ "BriefDescription": "L2 Cache Accesses from L1 Instruction Cache Misses (including prefetch)",
+ "UMask": "0x10"
+ },
+ {
+ "EventName": "l2_cache_accesses_from_dc_misses",
+ "EventCode": "0x60",
+ "BriefDescription": "L2 Cache Accesses from L1 Data Cache Misses (including prefetch)",
+ "UMask": "0xc8"
+ },
+ {
+ "MetricName": "l2_cache_accesses_from_l2_hwpf",
+ "BriefDescription": "L2 Cache Accesses from L2 HWPF",
+ "MetricExpr": "l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3",
+ "MetricGroup": "l2_cache"
+ },
+ {
+ "MetricName": "all_l2_cache_misses",
+ "BriefDescription": "All L2 Cache Misses",
+ "MetricExpr": "l2_cache_req_stat.ic_dc_miss_in_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3",
+ "MetricGroup": "l2_cache"
+ },
+ {
+ "EventName": "l2_cache_misses_from_ic_miss",
+ "EventCode": "0x64",
+ "BriefDescription": "L2 Cache Misses from L1 Instruction Cache Misses",
+ "UMask": "0x01"
+ },
+ {
+ "EventName": "l2_cache_misses_from_dc_misses",
+ "EventCode": "0x64",
+ "BriefDescription": "L2 Cache Misses from L1 Data Cache Misses",
+ "UMask": "0x08"
+ },
+ {
+ "MetricName": "l2_cache_misses_from_l2_hwpf",
+ "BriefDescription": "L2 Cache Misses from L2 HWPF",
+ "MetricExpr": "l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3",
+ "MetricGroup": "l2_cache"
+ },
+ {
+ "MetricName": "all_l2_cache_hits",
+ "BriefDescription": "All L2 Cache Hits",
+ "MetricExpr": "l2_cache_req_stat.ic_dc_hit_in_l2 + l2_pf_hit_l2",
+ "MetricGroup": "l2_cache"
+ },
+ {
+ "EventName": "l2_cache_hits_from_ic_misses",
+ "EventCode": "0x64",
+ "BriefDescription": "L2 Cache Hits from L1 Instruction Cache Misses",
+ "UMask": "0x06"
+ },
+ {
+ "EventName": "l2_cache_hits_from_dc_misses",
+ "EventCode": "0x64",
+ "BriefDescription": "L2 Cache Hits from L1 Data Cache Misses",
+ "UMask": "0x70"
+ },
+ {
+ "MetricName": "l2_cache_hits_from_l2_hwpf",
+ "BriefDescription": "L2 Cache Hits from L2 HWPF",
+ "MetricExpr": "l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3",
+ "MetricGroup": "l2_cache"
+ },
+ {
+ "EventName": "l3_accesses",
+ "EventCode": "0x04",
+ "BriefDescription": "L3 Accesses",
+ "UMask": "0xff",
+ "Unit": "L3PMC"
+ },
+ {
+ "EventName": "l3_misses",
+ "EventCode": "0x04",
+ "BriefDescription": "L3 Misses (includes Chg2X)",
+ "UMask": "0x01",
+ "Unit": "L3PMC"
+ },
+ {
+ "MetricName": "l3_read_miss_latency",
+ "BriefDescription": "Average L3 Read Miss Latency (in core clocks)",
+ "MetricExpr": "(xi_sys_fill_latency * 16) / xi_ccx_sdp_req1.all_l3_miss_req_typs",
+ "MetricGroup": "l3_cache",
+ "ScaleUnit": "1core clocks"
+ },
+ {
+ "MetricName": "ic_fetch_miss_ratio",
+ "BriefDescription": "L1 Instruction Cache (32B) Fetch Miss Ratio",
+ "MetricExpr": "d_ratio(l2_cache_req_stat.ic_access_in_l2, bp_l1_tlb_fetch_hit + bp_l1_tlb_miss_l2_hit + bp_l1_tlb_miss_l2_miss)",
+ "MetricGroup": "l2_cache",
+ "ScaleUnit": "100%"
+ },
+ {
+ "MetricName": "l1_itlb_misses",
+ "BriefDescription": "L1 ITLB Misses",
+ "MetricExpr": "bp_l1_tlb_miss_l2_hit + bp_l1_tlb_miss_l2_miss",
+ "MetricGroup": "tlb"
+ },
+ {
+ "EventName": "l2_itlb_misses",
+ "EventCode": "0x85",
+ "BriefDescription": "L2 ITLB Misses & Instruction page walks",
+ "UMask": "0x07"
+ },
+ {
+ "EventName": "l1_dtlb_misses",
+ "EventCode": "0x45",
+ "BriefDescription": "L1 DTLB Misses",
+ "UMask": "0xff"
+ },
+ {
+ "EventName": "l2_dtlb_misses",
+ "EventCode": "0x45",
+ "BriefDescription": "L2 DTLB Misses & Data page walks",
+ "UMask": "0xf0"
+ },
+ {
+ "EventName": "all_tlbs_flushed",
+ "EventCode": "0x78",
+ "BriefDescription": "All TLBs Flushed",
+ "UMask": "0xdf"
+ },
+ {
+ "EventName": "uops_dispatched",
+ "EventCode": "0xaa",
+ "BriefDescription": "Micro-ops Dispatched",
+ "UMask": "0x03"
+ },
+ {
+ "EventName": "sse_avx_stalls",
+ "EventCode": "0x0e",
+ "BriefDescription": "Mixed SSE/AVX Stalls",
+ "UMask": "0x0e"
+ },
+ {
+ "EventName": "uops_retired",
+ "EventCode": "0xc1",
+ "BriefDescription": "Micro-ops Retired"
+ },
+ {
+ "MetricName": "all_remote_links_outbound",
+ "BriefDescription": "Approximate: Outbound data bytes for all Remote Links for a node (die)",
+ "MetricExpr": "remote_outbound_data_controller_0 + remote_outbound_data_controller_1 + remote_outbound_data_controller_2 + remote_outbound_data_controller_3",
+ "MetricGroup": "data_fabric",
+ "PerPkg": "1",
+ "ScaleUnit": "3e-5MiB"
+ },
+ {
+ "MetricName": "nps1_die_to_dram",
+ "BriefDescription": "Approximate: Combined DRAM B/bytes of all channels on a NPS1 node (die) (may need --metric-no-group)",
+ "MetricExpr": "dram_channel_data_controller_0 + dram_channel_data_controller_1 + dram_channel_data_controller_2 + dram_channel_data_controller_3 + dram_channel_data_controller_4 + dram_channel_data_controller_5 + dram_channel_data_controller_6 + dram_channel_data_controller_7",
+ "MetricGroup": "data_fabric",
+ "PerPkg": "1",
+ "ScaleUnit": "6.1e-5MiB"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/cache.json b/tools/perf/pmu-events/arch/x86/amdzen2/cache.json
index 1c60bfa0f00b..f61b982f83ca 100644
--- a/tools/perf/pmu-events/arch/x86/amdzen2/cache.json
+++ b/tools/perf/pmu-events/arch/x86/amdzen2/cache.json
@@ -48,6 +48,11 @@
"UMask": "0x1"
},
{
+ "EventName": "l2_request_g1.all_no_prefetch",
+ "EventCode": "0x60",
+ "UMask": "0xf9"
+ },
+ {
"EventName": "l2_request_g2.group1",
"EventCode": "0x61",
"BriefDescription": "Miscellaneous events covered in more detail by l2_request_g1 (PMCx060).",
@@ -174,6 +179,24 @@
"UMask": "0x1"
},
{
+ "EventName": "l2_cache_req_stat.ic_access_in_l2",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache requests in L2.",
+ "UMask": "0x7"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ic_dc_miss_in_l2",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2 and Data cache request miss in L2 (all types).",
+ "UMask": "0x9"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ic_dc_hit_in_l2",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request hit in L2 and Data cache request hit in L2 (all types).",
+ "UMask": "0xf6"
+ },
+ {
"EventName": "l2_fill_pending.l2_fill_busy",
"EventCode": "0x6d",
"BriefDescription": "Cycles with fill pending from L2. Total cycles spent with one or more fill requests in flight from L2.",
diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/data-fabric.json b/tools/perf/pmu-events/arch/x86/amdzen2/data-fabric.json
new file mode 100644
index 000000000000..40271df40015
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/amdzen2/data-fabric.json
@@ -0,0 +1,98 @@
+[
+ {
+ "EventName": "remote_outbound_data_controller_0",
+ "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 0",
+ "EventCode": "0x7c7",
+ "UMask": "0x02",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "remote_outbound_data_controller_1",
+ "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 1",
+ "EventCode": "0x807",
+ "UMask": "0x02",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "remote_outbound_data_controller_2",
+ "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 2",
+ "EventCode": "0x847",
+ "UMask": "0x02",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "remote_outbound_data_controller_3",
+ "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 3",
+ "EventCode": "0x887",
+ "UMask": "0x02",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "dram_channel_data_controller_0",
+ "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0",
+ "EventCode": "0x07",
+ "UMask": "0x38",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "dram_channel_data_controller_1",
+ "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0",
+ "EventCode": "0x47",
+ "UMask": "0x38",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "dram_channel_data_controller_2",
+ "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0",
+ "EventCode": "0x87",
+ "UMask": "0x38",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "dram_channel_data_controller_3",
+ "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0",
+ "EventCode": "0xc7",
+ "UMask": "0x38",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "dram_channel_data_controller_4",
+ "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0",
+ "EventCode": "0x107",
+ "UMask": "0x38",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "dram_channel_data_controller_5",
+ "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0",
+ "EventCode": "0x147",
+ "UMask": "0x38",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "dram_channel_data_controller_6",
+ "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0",
+ "EventCode": "0x187",
+ "UMask": "0x38",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "dram_channel_data_controller_7",
+ "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0",
+ "EventCode": "0x1c7",
+ "UMask": "0x38",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/recommended.json b/tools/perf/pmu-events/arch/x86/amdzen2/recommended.json
new file mode 100644
index 000000000000..2ef91e25e661
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/amdzen2/recommended.json
@@ -0,0 +1,178 @@
+[
+ {
+ "MetricName": "branch_misprediction_ratio",
+ "BriefDescription": "Execution-Time Branch Misprediction Ratio (Non-Speculative)",
+ "MetricExpr": "d_ratio(ex_ret_brn_misp, ex_ret_brn)",
+ "MetricGroup": "branch_prediction",
+ "ScaleUnit": "100%"
+ },
+ {
+ "EventName": "all_dc_accesses",
+ "EventCode": "0x29",
+ "BriefDescription": "All L1 Data Cache Accesses",
+ "UMask": "0x7"
+ },
+ {
+ "MetricName": "all_l2_cache_accesses",
+ "BriefDescription": "All L2 Cache Accesses",
+ "MetricExpr": "l2_request_g1.all_no_prefetch + l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3",
+ "MetricGroup": "l2_cache"
+ },
+ {
+ "EventName": "l2_cache_accesses_from_ic_misses",
+ "EventCode": "0x60",
+ "BriefDescription": "L2 Cache Accesses from L1 Instruction Cache Misses (including prefetch)",
+ "UMask": "0x10"
+ },
+ {
+ "EventName": "l2_cache_accesses_from_dc_misses",
+ "EventCode": "0x60",
+ "BriefDescription": "L2 Cache Accesses from L1 Data Cache Misses (including prefetch)",
+ "UMask": "0xc8"
+ },
+ {
+ "MetricName": "l2_cache_accesses_from_l2_hwpf",
+ "BriefDescription": "L2 Cache Accesses from L2 HWPF",
+ "MetricExpr": "l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3",
+ "MetricGroup": "l2_cache"
+ },
+ {
+ "MetricName": "all_l2_cache_misses",
+ "BriefDescription": "All L2 Cache Misses",
+ "MetricExpr": "l2_cache_req_stat.ic_dc_miss_in_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3",
+ "MetricGroup": "l2_cache"
+ },
+ {
+ "EventName": "l2_cache_misses_from_ic_miss",
+ "EventCode": "0x64",
+ "BriefDescription": "L2 Cache Misses from L1 Instruction Cache Misses",
+ "UMask": "0x01"
+ },
+ {
+ "EventName": "l2_cache_misses_from_dc_misses",
+ "EventCode": "0x64",
+ "BriefDescription": "L2 Cache Misses from L1 Data Cache Misses",
+ "UMask": "0x08"
+ },
+ {
+ "MetricName": "l2_cache_misses_from_l2_hwpf",
+ "BriefDescription": "L2 Cache Misses from L2 HWPF",
+ "MetricExpr": "l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3",
+ "MetricGroup": "l2_cache"
+ },
+ {
+ "MetricName": "all_l2_cache_hits",
+ "BriefDescription": "All L2 Cache Hits",
+ "MetricExpr": "l2_cache_req_stat.ic_dc_hit_in_l2 + l2_pf_hit_l2",
+ "MetricGroup": "l2_cache"
+ },
+ {
+ "EventName": "l2_cache_hits_from_ic_misses",
+ "EventCode": "0x64",
+ "BriefDescription": "L2 Cache Hits from L1 Instruction Cache Misses",
+ "UMask": "0x06"
+ },
+ {
+ "EventName": "l2_cache_hits_from_dc_misses",
+ "EventCode": "0x64",
+ "BriefDescription": "L2 Cache Hits from L1 Data Cache Misses",
+ "UMask": "0x70"
+ },
+ {
+ "MetricName": "l2_cache_hits_from_l2_hwpf",
+ "BriefDescription": "L2 Cache Hits from L2 HWPF",
+ "MetricExpr": "l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3",
+ "MetricGroup": "l2_cache"
+ },
+ {
+ "EventName": "l3_accesses",
+ "EventCode": "0x04",
+ "BriefDescription": "L3 Accesses",
+ "UMask": "0xff",
+ "Unit": "L3PMC"
+ },
+ {
+ "EventName": "l3_misses",
+ "EventCode": "0x04",
+ "BriefDescription": "L3 Misses (includes Chg2X)",
+ "UMask": "0x01",
+ "Unit": "L3PMC"
+ },
+ {
+ "MetricName": "l3_read_miss_latency",
+ "BriefDescription": "Average L3 Read Miss Latency (in core clocks)",
+ "MetricExpr": "(xi_sys_fill_latency * 16) / xi_ccx_sdp_req1.all_l3_miss_req_typs",
+ "MetricGroup": "l3_cache",
+ "ScaleUnit": "1core clocks"
+ },
+ {
+ "MetricName": "ic_fetch_miss_ratio",
+ "BriefDescription": "L1 Instruction Cache (32B) Fetch Miss Ratio",
+ "MetricExpr": "d_ratio(l2_cache_req_stat.ic_access_in_l2, bp_l1_tlb_fetch_hit + bp_l1_tlb_miss_l2_hit + bp_l1_tlb_miss_l2_tlb_miss)",
+ "MetricGroup": "l2_cache",
+ "ScaleUnit": "100%"
+ },
+ {
+ "MetricName": "l1_itlb_misses",
+ "BriefDescription": "L1 ITLB Misses",
+ "MetricExpr": "bp_l1_tlb_miss_l2_hit + bp_l1_tlb_miss_l2_tlb_miss",
+ "MetricGroup": "tlb"
+ },
+ {
+ "EventName": "l2_itlb_misses",
+ "EventCode": "0x85",
+ "BriefDescription": "L2 ITLB Misses & Instruction page walks",
+ "UMask": "0x07"
+ },
+ {
+ "EventName": "l1_dtlb_misses",
+ "EventCode": "0x45",
+ "BriefDescription": "L1 DTLB Misses",
+ "UMask": "0xff"
+ },
+ {
+ "EventName": "l2_dtlb_misses",
+ "EventCode": "0x45",
+ "BriefDescription": "L2 DTLB Misses & Data page walks",
+ "UMask": "0xf0"
+ },
+ {
+ "EventName": "all_tlbs_flushed",
+ "EventCode": "0x78",
+ "BriefDescription": "All TLBs Flushed",
+ "UMask": "0xdf"
+ },
+ {
+ "EventName": "uops_dispatched",
+ "EventCode": "0xaa",
+ "BriefDescription": "Micro-ops Dispatched",
+ "UMask": "0x03"
+ },
+ {
+ "EventName": "sse_avx_stalls",
+ "EventCode": "0x0e",
+ "BriefDescription": "Mixed SSE/AVX Stalls",
+ "UMask": "0x0e"
+ },
+ {
+ "EventName": "uops_retired",
+ "EventCode": "0xc1",
+ "BriefDescription": "Micro-ops Retired"
+ },
+ {
+ "MetricName": "all_remote_links_outbound",
+ "BriefDescription": "Approximate: Outbound data bytes for all Remote Links for a node (die)",
+ "MetricExpr": "remote_outbound_data_controller_0 + remote_outbound_data_controller_1 + remote_outbound_data_controller_2 + remote_outbound_data_controller_3",
+ "MetricGroup": "data_fabric",
+ "PerPkg": "1",
+ "ScaleUnit": "3e-5MiB"
+ },
+ {
+ "MetricName": "nps1_die_to_dram",
+ "BriefDescription": "Approximate: Combined DRAM B/bytes of all channels on a NPS1 node (die) (may need --metric-no-group)",
+ "MetricExpr": "dram_channel_data_controller_0 + dram_channel_data_controller_1 + dram_channel_data_controller_2 + dram_channel_data_controller_3 + dram_channel_data_controller_4 + dram_channel_data_controller_5 + dram_channel_data_controller_6 + dram_channel_data_controller_7",
+ "MetricGroup": "data_fabric",
+ "PerPkg": "1",
+ "ScaleUnit": "6.1e-5MiB"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/cache.json b/tools/perf/pmu-events/arch/x86/cascadelakex/cache.json
index 3fba310a5012..3c0f5837480f 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/cache.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/cache.json
@@ -8064,6 +8064,20 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3",
+ "Deprecated": "1",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x1000020004",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
@@ -9256,20 +9270,6 @@
"UMask": "0x1"
},
{
- "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
- "Counter": "0,1,2,3",
- "CounterHTOff": "0,1,2,3",
- "Deprecated": "1",
- "EventCode": "0xB7, 0xBB",
- "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
- "MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x1000020004",
- "Offcore": "1",
- "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
- "SampleAfterValue": "100003",
- "UMask": "0x1"
- },
- {
"BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
index d25eebce34c9..de3193552277 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
@@ -4,14 +4,14 @@
"MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)",
"MetricGroup": "TopdownL1",
"MetricName": "Frontend_Bound",
- "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound."
+ "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound."
},
{
"BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
- "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+ "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
"MetricGroup": "TopdownL1_SMT",
"MetricName": "Frontend_Bound_SMT",
- "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU."
+ "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU."
},
{
"BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
@@ -22,13 +22,14 @@
},
{
"BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.",
- "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+ "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
"MetricGroup": "TopdownL1_SMT",
"MetricName": "Bad_Speculation_SMT",
"PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU."
},
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
+ "MetricConstraint": "NO_NMI_WATCHDOG",
"MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )",
"MetricGroup": "TopdownL1",
"MetricName": "Backend_Bound",
@@ -36,7 +37,7 @@
},
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
- "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) )",
+ "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) )",
"MetricGroup": "TopdownL1_SMT",
"MetricName": "Backend_Bound_SMT",
"PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU."
@@ -50,7 +51,7 @@
},
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.",
- "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
"MetricGroup": "TopdownL1_SMT",
"MetricName": "Retiring_SMT",
"PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU."
@@ -58,7 +59,7 @@
{
"BriefDescription": "Instructions Per Cycle (per Logical Processor)",
"MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "TopDownL1",
+ "MetricGroup": "Summary",
"MetricName": "IPC"
},
{
@@ -74,24 +75,6 @@
"MetricName": "IpTB"
},
{
- "BriefDescription": "Branch instructions per taken branch. ",
- "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
- "MetricGroup": "Branches;PGO",
- "MetricName": "BpTB"
- },
- {
- "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely (includes speculatively fetches) consumed by program instructions",
- "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1 ) )",
- "MetricGroup": "PGO;IcMiss",
- "MetricName": "IFetch_Line_Utilization"
- },
- {
- "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
- "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
- "MetricGroup": "DSB;Fetch_BW",
- "MetricName": "DSB_Coverage"
- },
- {
"BriefDescription": "Cycles Per Instruction (per Logical Processor)",
"MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
"MetricGroup": "Pipeline;Summary",
@@ -104,86 +87,110 @@
"MetricName": "CLKS"
},
{
- "BriefDescription": "Total issue-pipeline slots (per-Physical Core)",
+ "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
"MetricExpr": "4 * cycles",
"MetricGroup": "TopDownL1",
"MetricName": "SLOTS"
},
{
- "BriefDescription": "Total issue-pipeline slots (per-Physical Core)",
- "MetricExpr": "4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+ "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
+ "MetricExpr": "4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
"MetricGroup": "TopDownL1_SMT",
"MetricName": "SLOTS_SMT"
},
{
- "BriefDescription": "Instructions per Load (lower number means higher occurance rate)",
+ "BriefDescription": "Instructions per Load (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS",
"MetricGroup": "Instruction_Type",
- "MetricName": "IpL"
+ "MetricName": "IpLoad"
},
{
- "BriefDescription": "Instructions per Store (lower number means higher occurance rate)",
+ "BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES",
"MetricGroup": "Instruction_Type",
- "MetricName": "IpS"
+ "MetricName": "IpStore"
},
{
- "BriefDescription": "Instructions per Branch (lower number means higher occurance rate)",
+ "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Branches;Instruction_Type",
- "MetricName": "IpB"
+ "MetricName": "IpBranch"
},
{
- "BriefDescription": "Instruction per (near) call (lower number means higher occurance rate)",
+ "BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
"MetricGroup": "Branches",
"MetricName": "IpCall"
},
{
+ "BriefDescription": "Branch instructions per taken branch. ",
+ "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+ "MetricGroup": "Branches;PGO",
+ "MetricName": "BpTkBranch"
+ },
+ {
+ "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )",
+ "MetricGroup": "FLOPS;FP_Arith;Instruction_Type",
+ "MetricName": "IpFLOP"
+ },
+ {
"BriefDescription": "Total number of retired Instructions",
"MetricExpr": "INST_RETIRED.ANY",
- "MetricGroup": "Summary",
+ "MetricGroup": "Summary;TopDownL1",
"MetricName": "Instructions"
},
{
+ "BriefDescription": "Fraction of Uops delivered by the LSD (Loop Stream Detector; aka Loop Cache)",
+ "MetricExpr": "LSD.UOPS / (IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
+ "MetricGroup": "LSD",
+ "MetricName": "LSD_Coverage"
+ },
+ {
+ "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
+ "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
+ "MetricGroup": "DSB;Fetch_BW",
+ "MetricName": "DSB_Coverage"
+ },
+ {
"BriefDescription": "Instructions Per Cycle (per physical core)",
"MetricExpr": "INST_RETIRED.ANY / cycles",
- "MetricGroup": "SMT",
+ "MetricGroup": "SMT;TopDownL1",
"MetricName": "CoreIPC"
},
{
"BriefDescription": "Instructions Per Cycle (per physical core)",
- "MetricExpr": "INST_RETIRED.ANY / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
- "MetricGroup": "SMT",
+ "MetricExpr": "INST_RETIRED.ANY / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
+ "MetricGroup": "SMT;TopDownL1",
"MetricName": "CoreIPC_SMT"
},
{
"BriefDescription": "Floating Point Operations Per Cycle",
- "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )) / cycles",
+ "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / cycles",
"MetricGroup": "FLOPS",
"MetricName": "FLOPc"
},
{
"BriefDescription": "Floating Point Operations Per Cycle",
- "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+ "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
"MetricGroup": "FLOPS_SMT",
"MetricName": "FLOPc_SMT"
},
{
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
- "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
- "MetricGroup": "Pipeline",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / ( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 )",
+ "MetricGroup": "Pipeline;Ports_Utilization",
"MetricName": "ILP"
},
{
"BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per non-speculative branch misprediction (jeclear)",
- "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) ) * (4 * cycles) / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles))) + (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * cycles)) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * cycles)) ) * (4 * cycles) / BR_MISP_RETIRED.ALL_BRANCHES",
"MetricGroup": "BrMispredicts",
"MetricName": "Branch_Misprediction_Cost"
},
{
"BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per non-speculative branch misprediction (jeclear)",
- "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) ) * (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / BR_MISP_RETIRED.ALL_BRANCHES",
"MetricGroup": "BrMispredicts_SMT",
"MetricName": "Branch_Misprediction_Cost_SMT"
},
@@ -213,14 +220,14 @@
},
{
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricConstraint": "NO_NMI_WATCHDOG",
"MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )",
"MetricGroup": "TLB",
- "MetricName": "Page_Walks_Utilization",
- "MetricConstraint": "NO_NMI_WATCHDOG"
+ "MetricName": "Page_Walks_Utilization"
},
{
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
- "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) )",
+ "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ) )",
"MetricGroup": "TLB_SMT",
"MetricName": "Page_Walks_Utilization_SMT"
},
@@ -245,7 +252,7 @@
{
"BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
"MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1000000000 / duration_time",
- "MetricGroup": "Memory_BW",
+ "MetricGroup": "Memory_BW;Offcore",
"MetricName": "L3_Cache_Access_BW"
},
{
@@ -263,7 +270,7 @@
{
"BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)",
"MetricExpr": "1000 * L2_RQSTS.MISS / INST_RETIRED.ANY",
- "MetricGroup": "Cache_Misses",
+ "MetricGroup": "Cache_Misses;Offcore",
"MetricName": "L2MPKI_All"
},
{
@@ -298,7 +305,7 @@
},
{
"BriefDescription": "Giga Floating Point Operations Per Second",
- "MetricExpr": "( (( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )) / 1000000000 ) / duration_time",
+ "MetricExpr": "( ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / 1000000000 ) / duration_time",
"MetricGroup": "FLOPS;Summary",
"MetricName": "GFLOPs"
},
@@ -310,62 +317,74 @@
},
{
"BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
- "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+ "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 )",
"MetricGroup": "SMT;Summary",
"MetricName": "SMT_2T_Utilization"
},
{
- "BriefDescription": "Fraction of cycles spent in Kernel mode",
+ "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
"MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "Summary",
+ "MetricGroup": "OS",
"MetricName": "Kernel_Utilization"
},
{
"BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
"MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time",
- "MetricGroup": "Memory_BW",
+ "MetricGroup": "Memory_BW;SoC",
"MetricName": "DRAM_BW_Use"
},
{
"BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches",
- "MetricExpr": "1000000000 * ( cha@event\\=0x36\\,umask\\=0x21@ / cha@event\\=0x35\\,umask\\=0x21@ ) / ( cha_0@event\\=0x0@ / duration_time )",
- "MetricGroup": "Memory_Lat",
- "MetricName": "DRAM_Read_Latency"
+ "MetricExpr": "1000000000 * ( cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433@ / cha@event\\=0x35\\,umask\\=0x21\\,config\\=0x40433@ ) / ( cha_0@event\\=0x0@ / duration_time )",
+ "MetricGroup": "Memory_Lat;SoC",
+ "MetricName": "MEM_Read_Latency"
},
{
"BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches",
- "MetricExpr": "cha@event\\=0x36\\,umask\\=0x21@ / cha@event\\=0x36\\,umask\\=0x21\\,thresh\\=1@",
- "MetricGroup": "Memory_BW",
- "MetricName": "DRAM_Parallel_Reads"
+ "MetricExpr": "cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433@ / cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433\\,thresh\\=1@",
+ "MetricGroup": "Memory_BW;SoC",
+ "MetricName": "MEM_Parallel_Reads"
},
{
"BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches",
"MetricExpr": "( 1000000000 * ( imc@event\\=0xe0\\,umask\\=0x1@ / imc@event\\=0xe3@ ) / imc_0@event\\=0x0@ )",
- "MetricGroup": "Memory_Lat",
+ "MetricGroup": "Memory_Lat;SoC;Server",
"MetricName": "MEM_PMM_Read_Latency"
},
{
"BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]",
"MetricExpr": "( ( 64 * imc@event\\=0xe3@ / 1000000000 ) / duration_time )",
- "MetricGroup": "Memory_BW",
+ "MetricGroup": "Memory_BW;SoC;Server",
"MetricName": "PMM_Read_BW"
},
{
"BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]",
"MetricExpr": "( ( 64 * imc@event\\=0xe7@ / 1000000000 ) / duration_time )",
- "MetricGroup": "Memory_BW",
+ "MetricGroup": "Memory_BW;SoC;Server",
"MetricName": "PMM_Write_BW"
},
{
+ "BriefDescription": "Average IO (network or disk) Bandwidth Use for Writes [GB / sec]",
+ "MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3 ) * 4 / 1000000000 / duration_time",
+ "MetricGroup": "IO_BW;SoC;Server",
+ "MetricName": "IO_Write_BW"
+ },
+ {
+ "BriefDescription": "Average IO (network or disk) Bandwidth Use for Reads [GB / sec]",
+ "MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3 ) * 4 / 1000000000 / duration_time",
+ "MetricGroup": "IO_BW;SoC;Server",
+ "MetricName": "IO_Read_BW"
+ },
+ {
"BriefDescription": "Socket actual clocks when any core is active on that socket",
"MetricExpr": "cha_0@event\\=0x0@",
- "MetricGroup": "",
+ "MetricGroup": "SoC",
"MetricName": "Socket_CLKS"
},
{
- "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions. )",
+ "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
"MetricExpr": "INST_RETIRED.ANY / ( BR_INST_RETIRED.FAR_BRANCH / 2 )",
- "MetricGroup": "",
+ "MetricGroup": "Branches;OS",
"MetricName": "IpFarBranch"
},
{
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/frontend.json b/tools/perf/pmu-events/arch/x86/cascadelakex/frontend.json
index 3553472ad266..0716b2e3ff75 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/frontend.json
@@ -247,6 +247,30 @@
"UMask": "0x10"
},
{
+ "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switches",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xAB",
+ "EventName": "DSB2MITE_SWITCHES.COUNT",
+ "PublicDescription": "This event counts the number of the Decode Stream Buffer (DSB)-to-MITE switches including all misses because of missing Decode Stream Buffer (DSB) cache and u-arch forced misses.\nNote: Invoking MITE requires two or three cycles delay.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired instructions after front-end starvation of at least 1 cycle",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_1",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x400106",
+ "PEBS": "2",
+ "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 1 cycle which was not interrupted by a back-end stall.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3,4,5,6,7",
@@ -360,6 +384,16 @@
"UMask": "0x24"
},
{
+ "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xE6",
+ "EventName": "BACLEARS.ANY",
+ "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/memory.json b/tools/perf/pmu-events/arch/x86/cascadelakex/memory.json
index cc66a51c6a7b..0c07cb4fbf58 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/memory.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/memory.json
@@ -1,6 +1,6 @@
[
{
- "BriefDescription": "ALL_READS & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -13,7 +13,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -26,7 +26,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -81,7 +81,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -177,7 +177,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -218,7 +218,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -245,7 +245,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -286,7 +286,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -313,7 +313,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -350,7 +350,7 @@
"UMask": "0x8"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -405,7 +405,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS & ANY_SNOOP",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.ANY_SNOOP OCR.PF_L1D_AND_SW.L3_MISS.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -432,7 +432,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -473,7 +473,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -513,7 +513,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.ANY_SNOOP OCR.ALL_PF_RFO.L3_MISS.ANY_SNOOP OCR.ALL_PF_RFO.L3_MISS.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -540,7 +540,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.PF_L2_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -678,7 +678,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -705,7 +705,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -779,7 +779,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -792,7 +792,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -847,7 +847,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -874,7 +874,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -950,7 +950,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -977,7 +977,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1045,7 +1045,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & REMOTE_HIT_FORWARD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD OCR.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1058,7 +1058,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1071,7 +1071,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1084,7 +1084,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.PF_L2_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1097,7 +1097,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS & SNOOP_MISS",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1258,7 +1258,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS & REMOTE_HIT_FORWARD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.REMOTE_HIT_FORWARD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1285,7 +1285,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1298,7 +1298,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1339,7 +1339,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1380,7 +1380,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.PF_L2_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1393,7 +1393,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS & SNOOP_MISS",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1406,7 +1406,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1419,7 +1419,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & REMOTE_HITM",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.REMOTE_HITM OCR.ALL_PF_DATA_RD.L3_MISS.REMOTE_HITM",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1432,7 +1432,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1445,7 +1445,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS & SNOOP_MISS",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1513,7 +1513,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.NO_SNOOP_NEEDED OCR.PF_L1D_AND_SW.L3_MISS.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1540,7 +1540,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS & REMOTE_HIT_FORWARD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.REMOTE_HIT_FORWARD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1619,7 +1619,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1632,7 +1632,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1645,7 +1645,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1672,7 +1672,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS & HITM_OTHER_CORE",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.PF_L2_DATA_RD.L3_MISS.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1699,7 +1699,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_MISS.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1712,7 +1712,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1725,7 +1725,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.DEMAND_RFO.L3_MISS.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1738,7 +1738,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS & SNOOP_NONE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1765,7 +1765,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS.SNOOP_MISS OCR.ALL_READS.L3_MISS.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1806,7 +1806,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1833,7 +1833,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS & REMOTE_HIT_FORWARD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.REMOTE_HIT_FORWARD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1874,7 +1874,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1887,7 +1887,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1942,7 +1942,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1969,7 +1969,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1982,7 +1982,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1995,7 +1995,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2008,7 +2008,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS & SNOOP_NONE",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2021,7 +2021,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2048,7 +2048,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS & REMOTE_HITM",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.REMOTE_HITM",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2089,7 +2089,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.ANY_SNOOP OCR.ALL_READS.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2144,7 +2144,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2185,7 +2185,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2212,7 +2212,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2225,7 +2225,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2241,7 +2241,8 @@
"BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
- "EventCode": "0xCD",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
"MSRIndex": "0x3F6",
"MSRValue": "0x80",
@@ -2290,7 +2291,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS & SNOOP_MISS",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2345,7 +2346,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2386,7 +2387,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2399,7 +2400,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2426,7 +2427,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2439,7 +2440,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.DEMAND_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2452,7 +2453,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.ANY_SNOOP OCR.ALL_DATA_RD.L3_MISS.ANY_SNOOP OCR.ALL_DATA_RD.L3_MISS.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2493,7 +2494,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2520,7 +2521,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2547,7 +2548,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS & SNOOP_MISS",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2560,7 +2561,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2586,7 +2587,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2599,7 +2600,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS OCR.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2612,7 +2613,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2653,7 +2654,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2694,7 +2695,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2707,7 +2708,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS & ANY_SNOOP",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP OCR.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2730,7 +2731,7 @@
"UMask": "0x40"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2743,7 +2744,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2756,7 +2757,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS & REMOTE_HIT_FORWARD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2769,7 +2770,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2782,7 +2783,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2809,7 +2810,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2822,7 +2823,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2835,7 +2836,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2848,7 +2849,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2875,7 +2876,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS & REMOTE_HIT_FORWARD",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD OCR.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2902,7 +2903,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_READS.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2915,7 +2916,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2928,7 +2929,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.PF_L2_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2941,7 +2942,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2954,7 +2955,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2981,7 +2982,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3050,7 +3051,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3104,7 +3105,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.HITM_OTHER_CORE OCR.PF_L3_RFO.L3_MISS.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3117,7 +3118,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3195,7 +3196,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3222,7 +3223,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3235,7 +3236,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3290,7 +3291,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3373,7 +3374,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS & SNOOP_NONE",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3413,7 +3414,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3426,7 +3427,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3481,7 +3482,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.DEMAND_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3522,7 +3523,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3535,7 +3536,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS & REMOTE_HIT_FORWARD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.REMOTE_HIT_FORWARD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3548,7 +3549,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3561,7 +3562,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3588,7 +3589,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3601,7 +3602,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3614,7 +3615,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3778,7 +3779,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3791,7 +3792,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3804,7 +3805,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3859,7 +3860,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3886,7 +3887,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3899,7 +3900,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3925,7 +3926,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.PF_L2_RFO.L3_MISS.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3938,7 +3939,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3979,7 +3980,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.PF_L3_RFO.L3_MISS.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4044,7 +4045,7 @@
"UMask": "0x20"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.SNOOP_MISS OCR.ALL_PF_RFO.L3_MISS.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4071,7 +4072,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.PF_L3_DATA_RD.L3_MISS.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4111,7 +4112,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4138,7 +4139,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4151,7 +4152,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS & ANY_SNOOP",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP OCR.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4164,7 +4165,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS.ANY_SNOOP OCR.ALL_READS.L3_MISS.ANY_SNOOP OCR.ALL_READS.L3_MISS.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4205,7 +4206,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS & SNOOP_NONE",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4243,7 +4244,7 @@
"UMask": "0x4"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4284,7 +4285,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4297,7 +4298,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4310,7 +4311,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4323,7 +4324,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4336,7 +4337,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4363,7 +4364,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.ANY_SNOOP OCR.PF_L3_DATA_RD.L3_MISS.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4390,7 +4391,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4503,7 +4504,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4529,7 +4530,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4556,7 +4557,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4569,7 +4570,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.DEMAND_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4582,7 +4583,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4623,7 +4624,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.HITM_OTHER_CORE OCR.PF_L2_RFO.L3_MISS.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4636,7 +4637,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4649,7 +4650,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_READS.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_READS.L3_MISS.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4662,7 +4663,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4685,7 +4686,7 @@
"UMask": "0x80"
},
{
- "BriefDescription": "ALL_READS & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4698,7 +4699,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.SNOOP_NONE OCR.ALL_PF_RFO.L3_MISS.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4711,7 +4712,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.DEMAND_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4724,7 +4725,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS.SNOOP_MISS OCR.ALL_RFO.L3_MISS.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4765,7 +4766,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4802,7 +4803,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS & SNOOP_NONE",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4828,7 +4829,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS & HITM_OTHER_CORE",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.DEMAND_DATA_RD.L3_MISS.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4841,7 +4842,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS & REMOTE_HITM",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.REMOTE_HITM",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4909,7 +4910,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4950,7 +4951,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS & SNOOP_NONE",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4977,7 +4978,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS & REMOTE_HIT_FORWARD",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS.REMOTE_HIT_FORWARD OCR.ALL_READS.L3_MISS.REMOTE_HIT_FORWARD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4990,21 +4991,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
- "Counter": "0,1,2,3",
- "CounterHTOff": "0,1,2,3",
- "Deprecated": "1",
- "EventCode": "0xB7, 0xBB",
- "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
- "MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0110000001",
- "Offcore": "1",
- "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
- "SampleAfterValue": "100003",
- "UMask": "0x1"
- },
- {
- "BriefDescription": "ALL_RFO & L3_MISS & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_MISS.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5017,7 +5004,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.PF_L3_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5030,7 +5017,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5043,7 +5030,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_MISS.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5070,7 +5057,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS & REMOTE_HITM",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS.REMOTE_HITM OCR.ALL_READS.L3_MISS.REMOTE_HITM",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5097,7 +5084,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.ANY_SNOOP OCR.PF_L3_RFO.L3_MISS.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5152,7 +5139,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5188,7 +5175,7 @@
"UMask": "0x10"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS & REMOTE_HITM",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS.REMOTE_HITM OCR.ALL_RFO.L3_MISS.REMOTE_HITM",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5201,7 +5188,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5214,7 +5201,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS & REMOTE_HITM",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.REMOTE_HITM",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5227,7 +5214,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5240,7 +5227,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5253,7 +5240,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5266,7 +5253,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5335,7 +5322,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS & SNOOP_MISS",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5390,7 +5377,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5403,7 +5390,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5419,7 +5406,8 @@
"BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
- "EventCode": "0xCD",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
"MSRIndex": "0x3F6",
"MSRValue": "0x20",
@@ -5444,7 +5432,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5457,7 +5445,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5470,7 +5458,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS & REMOTE_HIT_FORWARD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5510,7 +5498,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5523,7 +5511,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS & SNOOP_NONE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5536,7 +5524,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5549,7 +5537,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS & SNOOP_MISS",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5585,7 +5573,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS.SNOOP_NONE OCR.ALL_RFO.L3_MISS.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5598,7 +5586,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5667,7 +5655,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5708,7 +5696,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.OTHER.L3_MISS.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5747,7 +5735,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS & REMOTE_HIT_FORWARD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5760,7 +5748,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS.SNOOP_NONE OCR.ALL_READS.L3_MISS.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5829,7 +5817,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5870,7 +5858,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5925,7 +5913,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5938,7 +5926,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5951,7 +5939,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5964,7 +5952,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS & REMOTE_HIT_FORWARD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.REMOTE_HIT_FORWARD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5977,7 +5965,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.SNOOP_MISS OCR.ALL_DATA_RD.L3_MISS.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6004,7 +5992,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6017,7 +6005,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6030,7 +6018,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6099,7 +6087,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6181,7 +6169,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_MISS.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_MISS.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6222,7 +6210,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6235,7 +6223,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.ANY_SNOOP OCR.PF_L2_RFO.L3_MISS.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6262,7 +6250,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS & SNOOP_MISS",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6359,7 +6347,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6400,7 +6388,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6413,7 +6401,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS & SNOOP_NONE",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6426,7 +6414,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6439,7 +6427,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6452,7 +6440,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS & HITM_OTHER_CORE",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.HITM_OTHER_CORE OCR.OTHER.L3_MISS.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6496,7 +6484,8 @@
"BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
- "EventCode": "0xCD",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
"MSRIndex": "0x3F6",
"MSRValue": "0x100",
@@ -6592,7 +6581,8 @@
"BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
- "EventCode": "0xCD",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
"MSRIndex": "0x3F6",
"MSRValue": "0x10",
@@ -6640,7 +6630,7 @@
"UMask": "0x20"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS & SNOOP_MISS",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6694,7 +6684,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS & SNOOP_NONE",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6707,7 +6697,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6763,7 +6753,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6776,7 +6766,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6789,7 +6779,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6816,7 +6806,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6843,7 +6833,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6873,7 +6863,8 @@
"BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
- "EventCode": "0xCD",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
"MSRIndex": "0x3F6",
"MSRValue": "0x200",
@@ -6893,7 +6884,7 @@
"UMask": "0x20"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6962,7 +6953,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6975,7 +6966,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7016,7 +7007,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.NO_SNOOP_NEEDED OCR.DEMAND_CODE_RD.L3_MISS.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7029,7 +7020,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS & HITM_OTHER_CORE",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.HITM_OTHER_CORE OCR.PF_L1D_AND_SW.L3_MISS.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7070,7 +7061,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS & REMOTE_HITM",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.REMOTE_HITM",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7083,7 +7074,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.DEMAND_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7096,7 +7087,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7123,7 +7114,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7136,7 +7127,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS.ANY_SNOOP OCR.ALL_RFO.L3_MISS.ANY_SNOOP OCR.ALL_RFO.L3_MISS.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7149,7 +7140,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7204,7 +7195,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.DEMAND_CODE_RD.L3_MISS.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7217,7 +7208,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS & REMOTE_HITM",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.REMOTE_HITM",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7243,7 +7234,8 @@
"BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
- "EventCode": "0xCD",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
"MSRIndex": "0x3F6",
"MSRValue": "0x40",
@@ -7254,7 +7246,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS.HITM_OTHER_CORE OCR.ALL_RFO.L3_MISS.HITM_OTHER_CORE OCR.ALL_RFO.L3_MISS.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7295,7 +7287,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.SNOOP_NONE OCR.ALL_PF_DATA_RD.L3_MISS.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7336,7 +7328,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.NO_SNOOP_NEEDED OCR.OTHER.L3_MISS.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7405,7 +7397,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7418,7 +7410,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS & ANY_SNOOP",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.ANY_SNOOP OCR.OTHER.L3_MISS.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7431,7 +7423,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7444,7 +7436,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7457,7 +7449,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7470,7 +7462,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.PF_L3_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7483,7 +7475,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7524,7 +7516,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS & REMOTE_HIT_FORWARD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD OCR.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7551,7 +7543,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7564,7 +7556,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7577,7 +7569,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.PF_L3_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7604,7 +7596,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7617,7 +7609,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.HITM_OTHER_CORE OCR.DEMAND_RFO.L3_MISS.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7658,7 +7650,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7681,7 +7673,7 @@
"UMask": "0x2"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS & REMOTE_HITM",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.REMOTE_HITM",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7708,7 +7700,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7735,7 +7727,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.PF_L3_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7762,7 +7754,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7789,7 +7781,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7802,7 +7794,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.SNOOP_NONE OCR.ALL_DATA_RD.L3_MISS.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7815,7 +7807,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7828,7 +7820,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7841,7 +7833,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7854,7 +7846,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7895,7 +7887,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_MISS.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7922,7 +7914,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7949,7 +7941,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7976,19 +7968,6 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD",
- "Counter": "0,1,2,3",
- "CounterHTOff": "0,1,2,3",
- "EventCode": "0xB7, 0xBB",
- "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
- "MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0810000001",
- "Offcore": "1",
- "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
- "SampleAfterValue": "100003",
- "UMask": "0x1"
- },
- {
"BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
@@ -8003,7 +7982,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8026,7 +8005,7 @@
"UMask": "0x2"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8039,7 +8018,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8070,7 +8049,7 @@
"UMask": "0x10"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS & ANY_SNOOP",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.ANY_SNOOP OCR.DEMAND_RFO.L3_MISS.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8083,7 +8062,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8236,7 +8215,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED OCR.ALL_READS.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8249,7 +8228,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.DEMAND_CODE_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8262,7 +8241,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8289,7 +8268,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8344,7 +8323,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS & REMOTE_HITM",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.REMOTE_HITM OCR.ALL_DATA_RD.L3_MISS.REMOTE_HITM",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8357,7 +8336,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8426,7 +8405,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS & REMOTE_HITM",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.REMOTE_HITM",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8439,7 +8418,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8452,7 +8431,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS & REMOTE_HIT_FORWARD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.REMOTE_HIT_FORWARD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8520,7 +8499,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS & REMOTE_HITM",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.REMOTE_HITM",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8630,7 +8609,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.PF_L3_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8713,7 +8692,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8726,7 +8705,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8766,7 +8745,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS & REMOTE_HIT_FORWARD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD OCR.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8803,7 +8782,7 @@
"UMask": "0x10"
},
{
- "BriefDescription": "ALL_READS & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8816,7 +8795,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8857,7 +8836,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8968,7 +8947,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.PF_L1D_AND_SW.L3_MISS.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8981,7 +8960,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.HITM_OTHER_CORE OCR.DEMAND_CODE_RD.L3_MISS.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8994,7 +8973,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9021,7 +9000,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9047,7 +9026,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9060,7 +9039,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9073,7 +9052,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9169,6 +9148,19 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0810000001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one).",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3,4,5,6,7",
@@ -9261,7 +9253,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9316,7 +9308,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9329,7 +9321,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9356,7 +9348,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS & ANY_SNOOP",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.ANY_SNOOP OCR.PF_L2_DATA_RD.L3_MISS.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9383,7 +9375,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9424,7 +9416,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9465,7 +9457,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9506,7 +9498,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.PF_L2_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9533,7 +9525,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9546,7 +9538,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9601,7 +9593,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9614,7 +9606,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.HIT_OTHER_CORE_FWD OCR.PF_L1D_AND_SW.L3_MISS.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9641,7 +9633,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS & REMOTE_HITM",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.REMOTE_HITM",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9678,7 +9670,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9705,7 +9697,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9718,7 +9710,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9745,7 +9737,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS.HITM_OTHER_CORE OCR.ALL_READS.L3_MISS.HITM_OTHER_CORE OCR.ALL_READS.L3_MISS.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9772,7 +9764,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9785,7 +9777,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.HIT_OTHER_CORE_FWD OCR.OTHER.L3_MISS.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9812,7 +9804,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS & REMOTE_HITM",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.REMOTE_HITM OCR.ALL_PF_RFO.L3_MISS.REMOTE_HITM",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9839,7 +9831,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9855,7 +9847,8 @@
"BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
- "EventCode": "0xCD",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
"MSRIndex": "0x3F6",
"MSRValue": "0x4",
@@ -9883,7 +9876,8 @@
"BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
- "EventCode": "0xCD",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
"MSRIndex": "0x3F6",
"MSRValue": "0x8",
@@ -9894,7 +9888,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9905,5 +9899,19 @@
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
"UMask": "0x1"
+ },
+ {
+ "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3",
+ "Deprecated": "1",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0110000001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
}
] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/other.json b/tools/perf/pmu-events/arch/x86/cascadelakex/other.json
index 05d13d53c374..f77d78e90954 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/other.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/other.json
@@ -1,6 +1,6 @@
[
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -13,7 +13,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_S & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_HIT_S.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -26,7 +26,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_F & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_F.NO_SNOOP_NEEDED OCR.ALL_READS.L3_HIT_F.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -39,7 +39,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT & SNOOP_HIT_WITH_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -52,7 +52,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & SUPPLIER_NONE & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -65,7 +65,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_M & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_M.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_HIT_M.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -78,7 +78,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_S & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_S.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -104,7 +104,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_S & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -117,7 +117,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_S & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -130,7 +130,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & SUPPLIER_NONE & HITM_OTHER_CORE",
+ "BriefDescription": "Counts any other requests OCR.OTHER.SUPPLIER_NONE.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -143,7 +143,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_M & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -156,7 +156,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_F & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -169,7 +169,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_S & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -182,7 +182,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_E & ANY_SNOOP",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_E.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -195,7 +195,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.ANY_SNOOP OCR.ALL_PF_RFO.L3_HIT.ANY_SNOOP OCR.ALL_PF_RFO.L3_HIT.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -208,7 +208,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_S & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -221,7 +221,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_S & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.ANY_SNOOP OCR.ALL_PF_RFO.L3_HIT_S.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -234,7 +234,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -247,7 +247,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT & SNOOP_MISS",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -260,7 +260,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_M & HITM_OTHER_CORE",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_M.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -273,7 +273,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -299,7 +299,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -312,7 +312,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -325,7 +325,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_M & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_M.NO_SNOOP_NEEDED OCR.ALL_READS.L3_HIT_M.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -338,7 +338,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_E & ANY_SNOOP",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_E.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -351,7 +351,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -364,7 +364,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_E & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_HIT_E.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -377,7 +377,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -390,7 +390,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & SUPPLIER_NONE & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED OCR.ALL_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -403,7 +403,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_F & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -416,7 +416,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & PMM_HIT_LOCAL_PMM & ANY_SNOOP",
+ "BriefDescription": "Counts any other requests OCR.OTHER.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -429,7 +429,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -442,7 +442,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & PMM_HIT_LOCAL_PMM & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_READS.PMM_HIT_LOCAL_PMM.ANY_SNOOP OCR.ALL_READS.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -478,7 +478,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE OCR.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -491,7 +491,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_E & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -517,7 +517,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_M & ANY_SNOOP",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_M.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -530,7 +530,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_S & ANY_SNOOP",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_S.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -543,7 +543,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.OTHER.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -556,7 +556,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & PMM_HIT_LOCAL_PMM & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -582,7 +582,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_S & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_S.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -595,7 +595,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_F & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_F.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -621,7 +621,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -634,7 +634,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_E & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_E.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -660,7 +660,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_E & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -673,7 +673,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_S & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -699,7 +699,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -712,7 +712,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT & SNOOP_MISS",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -738,7 +738,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -751,7 +751,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT.SNOOP_MISS OCR.ALL_RFO.L3_HIT.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -764,7 +764,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_M & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_M.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -777,7 +777,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_S & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_S.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -790,7 +790,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_F & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.HITM_OTHER_CORE OCR.ALL_RFO.L3_HIT_F.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -803,7 +803,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_E & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_E.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -816,7 +816,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -829,7 +829,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -842,7 +842,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT & SNOOP_NONE",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -855,7 +855,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_E & ANY_SNOOP",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_E.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -868,7 +868,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_M & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -881,7 +881,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT.HITM_OTHER_CORE OCR.ALL_RFO.L3_HIT.HITM_OTHER_CORE OCR.ALL_RFO.L3_HIT.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -894,7 +894,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT & SNOOP_NONE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -946,7 +946,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -985,7 +985,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_E & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -998,7 +998,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1011,7 +1011,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_E & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_E.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_HIT_E.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1024,7 +1024,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_F & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1050,7 +1050,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_F & ANY_SNOOP",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_F.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1063,7 +1063,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1076,7 +1076,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & PMM_HIT_LOCAL_PMM & SNOOP_NONE",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1089,7 +1089,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.HITM_OTHER_CORE OCR.PF_L3_RFO.L3_HIT.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1102,7 +1102,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_M & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1115,7 +1115,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_M & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_M.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1128,7 +1128,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_E & HITM_OTHER_CORE",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1141,7 +1141,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_E & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1167,7 +1167,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_E & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1180,7 +1180,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT & ANY_SNOOP",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.ANY_SNOOP OCR.DEMAND_RFO.L3_HIT.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1193,7 +1193,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1206,7 +1206,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & SUPPLIER_NONE & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1219,7 +1219,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1232,7 +1232,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1245,7 +1245,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & SUPPLIER_NONE & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1258,7 +1258,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT & SNOOP_NONE",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1297,7 +1297,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1310,7 +1310,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & ANY_RESPONSE have any response type.",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.ANY_RESPONSE have any response type.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1323,7 +1323,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_S & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_S.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1336,7 +1336,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.SNOOP_MISS OCR.ALL_PF_RFO.L3_HIT.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1349,7 +1349,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_M & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1362,7 +1362,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1388,7 +1388,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_S & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1401,7 +1401,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_M & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.ANY_SNOOP OCR.ALL_RFO.L3_HIT_M.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1414,7 +1414,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_F & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1427,7 +1427,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & SUPPLIER_NONE & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1440,7 +1440,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_E & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_E.ANY_SNOOP OCR.ALL_READS.L3_HIT_E.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1453,7 +1453,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_E & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_E.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1466,7 +1466,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & SUPPLIER_NONE & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.SUPPLIER_NONE.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1479,7 +1479,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_F & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1492,7 +1492,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & SUPPLIER_NONE & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.HIT_OTHER_CORE_FWD OCR.ALL_READS.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1505,7 +1505,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1518,7 +1518,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_E & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1531,7 +1531,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1544,7 +1544,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_E & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1557,7 +1557,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_E & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1570,7 +1570,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_F & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.ANY_SNOOP OCR.ALL_DATA_RD.L3_HIT_F.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1583,7 +1583,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT & SNOOP_NONE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1596,7 +1596,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1609,7 +1609,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_E & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1622,7 +1622,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_E & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1635,7 +1635,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_M & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.HITM_OTHER_CORE OCR.ALL_RFO.L3_HIT_M.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1661,7 +1661,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & ANY_RESPONSE have any response type.",
+ "BriefDescription": "OCR.ALL_RFO.ANY_RESPONSE have any response type.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1726,7 +1726,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & PMM_HIT_LOCAL_PMM & ANY_SNOOP",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1739,7 +1739,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1752,7 +1752,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.ANY_SNOOP OCR.PF_L2_RFO.L3_HIT.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1765,7 +1765,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_E & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1778,7 +1778,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1791,7 +1791,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_F & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_F.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1804,7 +1804,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & SUPPLIER_NONE & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1817,7 +1817,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT.SNOOP_MISS OCR.ALL_READS.L3_HIT.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1830,7 +1830,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1843,7 +1843,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1856,7 +1856,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NONE",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1908,7 +1908,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_E & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_E.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1921,7 +1921,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_F & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1934,7 +1934,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_F & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_HIT_F.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1947,7 +1947,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_M & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1960,7 +1960,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & SUPPLIER_NONE & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.ANY_SNOOP OCR.ALL_READS.SUPPLIER_NONE.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1973,7 +1973,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_F & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_F.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1986,7 +1986,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT & SNOOP_NONE",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1999,7 +1999,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_S & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2012,7 +2012,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2025,7 +2025,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_M & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2038,7 +2038,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & SUPPLIER_NONE & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2051,7 +2051,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2077,7 +2077,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2090,7 +2090,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2103,7 +2103,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_E & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_E.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2116,7 +2116,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2129,7 +2129,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_M & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2142,7 +2142,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & PMM_HIT_LOCAL_PMM & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2155,7 +2155,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_M & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2168,7 +2168,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_M & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2194,7 +2194,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2207,7 +2207,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2220,7 +2220,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2233,7 +2233,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_F & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2246,7 +2246,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2259,7 +2259,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_E & ANY_SNOOP",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_E.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2272,7 +2272,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_F & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_F.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2285,7 +2285,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_S & ANY_SNOOP",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_S.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2337,7 +2337,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_F & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2350,7 +2350,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_M & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_HIT_M.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2363,7 +2363,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & SUPPLIER_NONE & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2376,7 +2376,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2389,7 +2389,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2402,7 +2402,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_S & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_S.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2428,7 +2428,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_F & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2441,7 +2441,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_E & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2454,7 +2454,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & SUPPLIER_NONE & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2490,7 +2490,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2503,7 +2503,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_M & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2516,7 +2516,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_F & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2529,7 +2529,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & SUPPLIER_NONE & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2542,7 +2542,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NONE",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2555,7 +2555,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_E & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_E.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2568,7 +2568,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_S & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.ANY_SNOOP OCR.ALL_DATA_RD.L3_HIT_S.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2581,7 +2581,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_F & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_F.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_HIT_F.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2594,7 +2594,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_F & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2607,7 +2607,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2620,7 +2620,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & ANY_RESPONSE have any response type.",
+ "BriefDescription": "OCR.ALL_PF_RFO.ANY_RESPONSE have any response type.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2633,7 +2633,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2646,7 +2646,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_M & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_M.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2659,7 +2659,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_F & ANY_SNOOP",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_F.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2672,7 +2672,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_E & ANY_SNOOP",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_E.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2685,7 +2685,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_M & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_M.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2698,7 +2698,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT & SNOOP_MISS",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2711,7 +2711,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_M & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2724,7 +2724,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT & SNOOP_MISS",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2747,7 +2747,7 @@
"UMask": "0x40"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE OCR.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2773,7 +2773,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_E & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2799,7 +2799,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_M & ANY_SNOOP",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_M.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2812,7 +2812,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_M & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2825,7 +2825,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_S & HITM_OTHER_CORE",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2838,7 +2838,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_M & ANY_SNOOP",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_M.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2851,7 +2851,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2877,7 +2877,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_S & HITM_OTHER_CORE",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2890,7 +2890,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD OCR.ALL_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2916,7 +2916,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_S & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2929,7 +2929,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & SUPPLIER_NONE & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2942,7 +2942,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_E & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_E.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2955,7 +2955,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & SUPPLIER_NONE & ANY_SNOOP",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2968,7 +2968,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & SUPPLIER_NONE & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2994,7 +2994,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3020,7 +3020,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT & SNOOP_MISS",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3046,7 +3046,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & SUPPLIER_NONE & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.SUPPLIER_NONE.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3059,7 +3059,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_S & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3072,7 +3072,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NONE",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3085,7 +3085,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_S & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3098,7 +3098,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_M & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3111,7 +3111,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_E & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3124,7 +3124,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & SUPPLIER_NONE & ANY_SNOOP",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3137,7 +3137,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & ANY_RESPONSE have any response type.",
+ "BriefDescription": "OCR.ALL_DATA_RD.ANY_RESPONSE have any response type.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3150,7 +3150,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_E & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_E.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3163,7 +3163,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_S & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3189,7 +3189,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_F & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3202,7 +3202,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3215,7 +3215,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3241,7 +3241,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_F & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3254,7 +3254,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_F & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3267,7 +3267,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_E & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.ANY_SNOOP OCR.ALL_PF_RFO.L3_HIT_E.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3280,7 +3280,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.HIT_OTHER_CORE_FWD OCR.OTHER.L3_HIT.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3306,7 +3306,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3319,7 +3319,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3332,7 +3332,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_E & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3345,7 +3345,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_F & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_F.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3358,7 +3358,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & SUPPLIER_NONE & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.SUPPLIER_NONE.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3371,7 +3371,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_M & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3384,7 +3384,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_F & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_F.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3397,7 +3397,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & SUPPLIER_NONE & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3423,7 +3423,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3436,7 +3436,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3449,7 +3449,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT & SNOOP_NONE",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3462,7 +3462,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3475,7 +3475,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3488,7 +3488,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3501,7 +3501,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3514,7 +3514,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3527,7 +3527,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_M & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_M.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3540,7 +3540,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_M & ANY_SNOOP",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_M.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3553,7 +3553,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_F & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_F.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3566,7 +3566,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3579,7 +3579,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT & SNOOP_MISS",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3592,7 +3592,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_S & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3605,7 +3605,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_S & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_S.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3618,7 +3618,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_E & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.ANY_SNOOP OCR.ALL_DATA_RD.L3_HIT_E.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3631,7 +3631,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & PMM_HIT_LOCAL_PMM & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3644,7 +3644,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_F & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_F.HITM_OTHER_CORE OCR.ALL_READS.L3_HIT_F.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3657,7 +3657,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3670,7 +3670,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3696,7 +3696,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3709,7 +3709,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & SUPPLIER_NONE & ANY_SNOOP",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.SUPPLIER_NONE.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3735,7 +3735,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_S & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3748,7 +3748,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_M & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3771,7 +3771,7 @@
"UMask": "0x2"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3797,7 +3797,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_M & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_M.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3810,7 +3810,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & SNOOP_HIT_WITH_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3823,7 +3823,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_M & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3836,7 +3836,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_F & ANY_SNOOP",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_F.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3849,7 +3849,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_F & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3875,7 +3875,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_F & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3888,7 +3888,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_S & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_S.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3914,7 +3914,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3927,7 +3927,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT & SNOOP_MISS",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3940,7 +3940,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3953,7 +3953,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & PMM_HIT_LOCAL_PMM & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3966,7 +3966,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT.HITM_OTHER_CORE OCR.ALL_READS.L3_HIT.HITM_OTHER_CORE OCR.ALL_READS.L3_HIT.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3979,7 +3979,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_S & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_S.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3992,7 +3992,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_M & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_M.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4005,7 +4005,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4018,7 +4018,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_F & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4031,7 +4031,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & SUPPLIER_NONE & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts any other requests OCR.OTHER.SUPPLIER_NONE.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4044,7 +4044,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_E & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_E.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4057,7 +4057,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & PMM_HIT_LOCAL_PMM & SNOOP_NONE",
+ "BriefDescription": "Counts any other requests OCR.OTHER.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4070,7 +4070,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_S & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_S.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_HIT_S.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4092,7 +4092,7 @@
"UMask": "0x4"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4118,7 +4118,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_M & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_M.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4131,7 +4131,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & SUPPLIER_NONE & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4144,7 +4144,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4157,7 +4157,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_F & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4170,7 +4170,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_F & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4196,7 +4196,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & SUPPLIER_NONE & HITM_OTHER_CORE",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.SUPPLIER_NONE.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4209,7 +4209,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4235,7 +4235,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.ANY_SNOOP OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4248,7 +4248,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_M & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_M.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4261,7 +4261,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & SUPPLIER_NONE & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.HITM_OTHER_CORE OCR.ALL_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4274,7 +4274,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4287,7 +4287,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_F & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.ANY_SNOOP OCR.ALL_RFO.L3_HIT_F.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4300,7 +4300,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_S & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4313,7 +4313,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_S & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.ANY_SNOOP OCR.ALL_RFO.L3_HIT_S.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4326,7 +4326,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED",
+ "BriefDescription": "Counts any other requests OCR.OTHER.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4339,7 +4339,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT & SNOOP_NONE",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4352,7 +4352,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4365,7 +4365,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_S & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_S.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4378,7 +4378,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_M & ANY_SNOOP",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_M.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4391,7 +4391,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_S & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_HIT_S.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4404,7 +4404,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_F & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_F.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4417,7 +4417,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_E & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4430,7 +4430,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_E & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_E.NO_SNOOP_NEEDED OCR.ALL_READS.L3_HIT_E.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4443,7 +4443,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_S & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4469,7 +4469,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_S & ANY_SNOOP",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_S.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4495,7 +4495,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_F & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4521,7 +4521,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_S & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4534,7 +4534,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4560,7 +4560,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4573,7 +4573,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_E & HITM_OTHER_CORE",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_E.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4586,7 +4586,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT.ANY_SNOOP OCR.ALL_READS.L3_HIT.ANY_SNOOP OCR.ALL_READS.L3_HIT.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4599,7 +4599,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & PMM_HIT_LOCAL_PMM & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_READS.PMM_HIT_LOCAL_PMM.SNOOP_NONE OCR.ALL_READS.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4612,7 +4612,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4625,7 +4625,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4638,7 +4638,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4651,7 +4651,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT & HITM_OTHER_CORE",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4677,7 +4677,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT & SNOOP_HIT_WITH_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT.SNOOP_HIT_WITH_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4690,7 +4690,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & PMM_HIT_LOCAL_PMM & SNOOP_NONE",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4716,7 +4716,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT.SNOOP_NONE OCR.ALL_READS.L3_HIT.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4729,7 +4729,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_E & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4742,7 +4742,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4755,7 +4755,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & SUPPLIER_NONE & ANY_SNOOP",
+ "BriefDescription": "Counts any other requests OCR.OTHER.SUPPLIER_NONE.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4768,7 +4768,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4781,7 +4781,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_F & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4794,7 +4794,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_S & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_S.NO_SNOOP_NEEDED OCR.ALL_READS.L3_HIT_S.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4807,7 +4807,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_S & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.HITM_OTHER_CORE OCR.ALL_RFO.L3_HIT_S.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4820,7 +4820,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT & SNOOP_HIT_WITH_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4833,7 +4833,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_M & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_M.ANY_SNOOP OCR.ALL_READS.L3_HIT_M.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4846,7 +4846,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_READS.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_READS.L3_HIT.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4859,7 +4859,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & ANY_RESPONSE have any response type.",
+ "BriefDescription": "OCR.ALL_READS.ANY_RESPONSE have any response type.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4872,7 +4872,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4885,7 +4885,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_M & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4898,7 +4898,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & SUPPLIER_NONE & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4911,7 +4911,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_F & HITM_OTHER_CORE",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_F.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4924,7 +4924,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.ANY_SNOOP OCR.ALL_DATA_RD.L3_HIT.ANY_SNOOP OCR.ALL_DATA_RD.L3_HIT.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4937,7 +4937,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_M & HITM_OTHER_CORE",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_M.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4950,7 +4950,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4963,7 +4963,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NONE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4976,7 +4976,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5015,7 +5015,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & PMM_HIT_LOCAL_PMM & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5041,7 +5041,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_MISS OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5054,7 +5054,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT & SNOOP_NONE",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5067,7 +5067,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & SUPPLIER_NONE & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5080,7 +5080,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_E & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5093,7 +5093,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_S & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_S.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5106,7 +5106,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_S & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_S.HITM_OTHER_CORE OCR.ALL_READS.L3_HIT_S.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5119,7 +5119,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_F & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_F.ANY_SNOOP OCR.ALL_READS.L3_HIT_F.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5132,7 +5132,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_M & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_M.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5145,7 +5145,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_S & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_S.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5158,7 +5158,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT & SNOOP_MISS",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5171,7 +5171,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_E & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5184,7 +5184,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5197,7 +5197,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_S & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_S.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5210,7 +5210,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_E & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_E.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5236,7 +5236,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_S & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5249,7 +5249,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & SUPPLIER_NONE & ANY_SNOOP",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.SUPPLIER_NONE.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5262,7 +5262,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP OCR.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5275,7 +5275,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT.SNOOP_NONE OCR.ALL_RFO.L3_HIT.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5288,7 +5288,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_M & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5301,7 +5301,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5314,7 +5314,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5327,7 +5327,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_M & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5340,7 +5340,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_E & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_E.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5353,7 +5353,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & SUPPLIER_NONE & HITM_OTHER_CORE",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5366,7 +5366,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & SUPPLIER_NONE & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5379,7 +5379,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_F & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5401,7 +5401,7 @@
"UMask": "0x2"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5414,7 +5414,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5427,7 +5427,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5440,7 +5440,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_M & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_M.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5476,7 +5476,7 @@
"UMask": "0x20"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT & HITM_OTHER_CORE",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.HITM_OTHER_CORE OCR.PF_L1D_AND_SW.L3_HIT.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5489,7 +5489,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5502,7 +5502,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_E & HITM_OTHER_CORE",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_E.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5515,7 +5515,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_F & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5528,7 +5528,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & PMM_HIT_LOCAL_PMM & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5541,7 +5541,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & SUPPLIER_NONE & ANY_SNOOP",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.SUPPLIER_NONE.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5554,7 +5554,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_S & ANY_SNOOP",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_S.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5567,7 +5567,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_E & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5580,7 +5580,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_F & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5593,7 +5593,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5606,7 +5606,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5619,7 +5619,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_F & ANY_SNOOP",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_F.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5645,7 +5645,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & SUPPLIER_NONE & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.NO_SNOOP_NEEDED OCR.ALL_READS.SUPPLIER_NONE.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5658,7 +5658,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_F & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5671,7 +5671,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_S & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5684,7 +5684,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_F & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_F.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5697,7 +5697,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT & ANY_SNOOP",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.ANY_SNOOP OCR.PF_L1D_AND_SW.L3_HIT.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5710,7 +5710,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.PF_L3_DATA_RD.L3_HIT.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5723,7 +5723,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_S & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5736,7 +5736,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_F & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5749,7 +5749,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_M & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_HIT_M.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5762,7 +5762,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_F & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_F.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5775,7 +5775,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5788,7 +5788,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5801,7 +5801,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5814,7 +5814,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & PMM_HIT_LOCAL_PMM & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5827,7 +5827,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & SUPPLIER_NONE & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5840,7 +5840,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.SNOOP_NONE OCR.ALL_DATA_RD.L3_HIT.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5853,7 +5853,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_S & ANY_SNOOP",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_S.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5866,7 +5866,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_S & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5879,7 +5879,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_E & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_HIT_E.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5905,7 +5905,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_M & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5918,7 +5918,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.ANY_SNOOP OCR.PF_L3_RFO.L3_HIT.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5931,7 +5931,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_M & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5957,7 +5957,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5970,7 +5970,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NONE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5983,7 +5983,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_F & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5996,7 +5996,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_E & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6009,7 +6009,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_E & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_E.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6022,7 +6022,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_M & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6035,7 +6035,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_E & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6048,7 +6048,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6074,7 +6074,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_E & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6100,7 +6100,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_F & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6113,7 +6113,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_E & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6139,7 +6139,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_M & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6152,7 +6152,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_M & HITM_OTHER_CORE",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6165,7 +6165,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_F & HITM_OTHER_CORE",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_F.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6191,7 +6191,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6204,7 +6204,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_M & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6217,7 +6217,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & PMM_HIT_LOCAL_PMM & ANY_SNOOP",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6230,7 +6230,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_F & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6243,7 +6243,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & PMM_HIT_LOCAL_PMM & ANY_SNOOP",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6256,7 +6256,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & PMM_HIT_LOCAL_PMM & ANY_SNOOP",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6269,7 +6269,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT & ANY_SNOOP",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.ANY_SNOOP OCR.OTHER.L3_HIT.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6282,7 +6282,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_F & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6295,7 +6295,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_S & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6308,7 +6308,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT & HITM_OTHER_CORE",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.PF_L2_DATA_RD.L3_HIT.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6321,7 +6321,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_E & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_E.HITM_OTHER_CORE OCR.ALL_READS.L3_HIT_E.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6347,7 +6347,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & PMM_HIT_LOCAL_PMM & ANY_SNOOP",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6360,7 +6360,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6373,7 +6373,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6386,7 +6386,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_F & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_F.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6412,7 +6412,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT & ANY_SNOOP",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP OCR.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6438,7 +6438,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT & SNOOP_MISS",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6451,7 +6451,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT & SNOOP_HIT_WITH_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6464,7 +6464,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.HITM_OTHER_CORE OCR.ALL_PF_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6477,7 +6477,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_S & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6490,7 +6490,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6529,7 +6529,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED",
+ "BriefDescription": "OCR.ALL_READS.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED OCR.ALL_READS.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6542,7 +6542,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_M & ANY_SNOOP",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_M.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6555,7 +6555,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_F & ANY_SNOOP",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_F.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6568,7 +6568,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED OCR.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6620,7 +6620,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & SUPPLIER_NONE & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6633,7 +6633,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_M & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_M.HITM_OTHER_CORE OCR.ALL_READS.L3_HIT_M.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6659,7 +6659,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & SUPPLIER_NONE & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.HITM_OTHER_CORE OCR.ALL_READS.SUPPLIER_NONE.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6672,7 +6672,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_M & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.ANY_SNOOP OCR.ALL_DATA_RD.L3_HIT_M.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6685,7 +6685,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_E & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6698,7 +6698,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_F & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6711,7 +6711,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_F & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_F.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6763,7 +6763,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.NO_SNOOP_NEEDED OCR.OTHER.L3_HIT.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6776,7 +6776,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_F & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_HIT_F.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6789,7 +6789,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_S & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6815,7 +6815,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT.ANY_SNOOP OCR.ALL_RFO.L3_HIT.ANY_SNOOP OCR.ALL_RFO.L3_HIT.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6828,7 +6828,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6841,7 +6841,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_E & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6867,7 +6867,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_E & ANY_SNOOP",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_E.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6880,7 +6880,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.ANY_SNOOP OCR.ALL_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6893,7 +6893,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_M & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6906,7 +6906,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & SUPPLIER_NONE & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6919,7 +6919,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6958,7 +6958,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & SUPPLIER_NONE & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.ANY_SNOOP OCR.ALL_RFO.SUPPLIER_NONE.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6971,7 +6971,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6984,7 +6984,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6997,7 +6997,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_M & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7019,7 +7019,7 @@
"UMask": "0x8"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_S & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7032,7 +7032,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_S & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7045,7 +7045,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7058,7 +7058,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_M & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.ANY_SNOOP OCR.ALL_PF_RFO.L3_HIT_M.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7071,7 +7071,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_M & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7084,7 +7084,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & SUPPLIER_NONE & HITM_OTHER_CORE",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7097,7 +7097,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7110,7 +7110,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT & ANY_SNOOP",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP OCR.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7123,7 +7123,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7136,7 +7136,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_E & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7149,7 +7149,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7162,7 +7162,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7175,7 +7175,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_S & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7188,7 +7188,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7201,7 +7201,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_M & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7214,7 +7214,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_E & HITM_OTHER_CORE",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7227,7 +7227,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7240,7 +7240,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_S & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7253,7 +7253,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7276,7 +7276,7 @@
"UMask": "0x18"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_M & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7289,7 +7289,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_F & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7315,7 +7315,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_M & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_M.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7328,7 +7328,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NONE",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7341,7 +7341,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_E & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_E.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7354,7 +7354,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_E & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_E.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7367,7 +7367,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_S & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7380,7 +7380,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_S & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7393,7 +7393,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_E & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.ANY_SNOOP OCR.ALL_RFO.L3_HIT_E.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7406,7 +7406,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_S & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7419,7 +7419,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED",
+ "BriefDescription": "OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7432,7 +7432,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_E & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7445,7 +7445,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_F & ANY_SNOOP",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_F.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7458,7 +7458,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.SNOOP_NONE OCR.ALL_PF_RFO.L3_HIT.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7520,7 +7520,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE OCR.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7533,7 +7533,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_S & HITM_OTHER_CORE",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_S.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7546,7 +7546,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_S & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7572,7 +7572,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT & SNOOP_NONE",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7585,7 +7585,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7598,7 +7598,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7611,7 +7611,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_S & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7637,7 +7637,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_M & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_M.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7650,7 +7650,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & SUPPLIER_NONE & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7676,7 +7676,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7689,7 +7689,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_S & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7702,7 +7702,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_F & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7728,7 +7728,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_E & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7741,7 +7741,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7754,7 +7754,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_M & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7767,7 +7767,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_S & ANY_SNOOP",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_S.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7780,7 +7780,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.SNOOP_MISS OCR.ALL_DATA_RD.L3_HIT.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7793,7 +7793,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_M & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7806,7 +7806,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT & ANY_SNOOP",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP OCR.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7832,7 +7832,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_NONE OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7845,7 +7845,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.ANY_SNOOP OCR.ALL_PF_RFO.SUPPLIER_NONE.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7858,7 +7858,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7884,7 +7884,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_E & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7897,7 +7897,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7910,7 +7910,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_S & HITM_OTHER_CORE",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_S.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7949,7 +7949,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_E & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7962,7 +7962,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_E & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7975,7 +7975,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_M & HITM_OTHER_CORE",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8001,7 +8001,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8014,7 +8014,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_F & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_F.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8027,7 +8027,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_E & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.HITM_OTHER_CORE OCR.ALL_RFO.L3_HIT_E.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8053,7 +8053,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_F & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8092,7 +8092,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8105,7 +8105,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_S & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_S.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8118,7 +8118,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_F & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.ANY_SNOOP OCR.ALL_PF_RFO.L3_HIT_F.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8131,7 +8131,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED OCR.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8144,7 +8144,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8157,7 +8157,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_F & HITM_OTHER_CORE",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8170,7 +8170,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_M & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8183,7 +8183,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT & HITM_OTHER_CORE",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.HITM_OTHER_CORE OCR.OTHER.L3_HIT.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8196,7 +8196,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_M & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8222,7 +8222,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_E & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8235,7 +8235,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_F & HITM_OTHER_CORE",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8257,7 +8257,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_F & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_F.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8270,7 +8270,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_M & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8283,7 +8283,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_S & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8309,7 +8309,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_M & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_M.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8322,7 +8322,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_FWD OCR.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8348,7 +8348,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_E & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8361,7 +8361,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_S & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_S.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8374,7 +8374,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_S & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8387,7 +8387,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_F & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8400,7 +8400,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8413,7 +8413,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_M & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8426,7 +8426,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8439,7 +8439,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE OCR.ALL_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8452,7 +8452,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8465,7 +8465,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_F & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_F.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8478,7 +8478,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_S & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_S.ANY_SNOOP OCR.ALL_READS.L3_HIT_S.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8504,7 +8504,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8517,7 +8517,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8543,7 +8543,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8556,7 +8556,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED",
+ "BriefDescription": "OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8569,7 +8569,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_E & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8582,7 +8582,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_M & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_M.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8595,7 +8595,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8608,7 +8608,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8621,7 +8621,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_E & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_E.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8634,7 +8634,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8647,7 +8647,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_S & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_S.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json b/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json
index 5ec668f46ac1..023f31c72a42 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json
@@ -827,16 +827,6 @@
"UMask": "0x2"
},
{
- "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
- "Counter": "0,1,2,3",
- "CounterHTOff": "0,1,2,3,4,5,6,7",
- "EventCode": "0xE6",
- "EventName": "BACLEARS.ANY",
- "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
- "SampleAfterValue": "100003",
- "UMask": "0x1"
- },
- {
"BriefDescription": "Loads blocked due to overlapping with a preceding store that cannot be forwarded.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3,4,5,6,7",
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json
index 3fb5cdce842f..4ba9e6d9f25e 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json
@@ -90,32 +90,32 @@
"Unit": "iMC"
},
{
- "BriefDescription": "Intel Optane DC persistent memory bandwidth read (MB/sec). Derived from unc_m_pmm_rpq_inserts",
+ "BriefDescription": "Intel Optane DC persistent memory bandwidth read (MB). Derived from unc_m_pmm_rpq_inserts",
"Counter": "0,1,2,3",
"EventCode": "0xE3",
"EventName": "UNC_M_PMM_BANDWIDTH.READ",
"PerPkg": "1",
- "ScaleUnit": "6.103515625E-5MB/sec",
+ "ScaleUnit": "6.103515625E-5MB",
"Unit": "iMC"
},
{
- "BriefDescription": "Intel Optane DC persistent memory bandwidth write (MB/sec). Derived from unc_m_pmm_wpq_inserts",
+ "BriefDescription": "Intel Optane DC persistent memory bandwidth write (MB). Derived from unc_m_pmm_wpq_inserts",
"Counter": "0,1,2,3",
"EventCode": "0xE7",
"EventName": "UNC_M_PMM_BANDWIDTH.WRITE",
"PerPkg": "1",
- "ScaleUnit": "6.103515625E-5MB/sec",
+ "ScaleUnit": "6.103515625E-5MB",
"Unit": "iMC"
},
{
- "BriefDescription": "Intel Optane DC persistent memory bandwidth total (MB/sec). Derived from unc_m_pmm_rpq_inserts",
+ "BriefDescription": "Intel Optane DC persistent memory bandwidth total (MB). Derived from unc_m_pmm_rpq_inserts",
"Counter": "0,1,2,3",
"EventCode": "0xE3",
"EventName": "UNC_M_PMM_BANDWIDTH.TOTAL",
"MetricExpr": "UNC_M_PMM_RPQ_INSERTS + UNC_M_PMM_WPQ_INSERTS",
"MetricName": "UNC_M_PMM_BANDWIDTH.TOTAL",
"PerPkg": "1",
- "ScaleUnit": "6.103515625E-5MB/sec",
+ "ScaleUnit": "6.103515625E-5MB",
"Unit": "iMC"
},
{
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-other.json b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-other.json
index df355ba7acc8..0cd083839e75 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-other.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-other.json
@@ -538,6 +538,27 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "TOR Inserts : DRds issued by iA Cores that Missed the LLC",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD",
+ "Filter": "config1=0x40433",
+ "PerPkg": "1",
+ "PublicDescription": "TOR Inserts : DRds issued by iA Cores that Missed the LLC : Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
+ "UMask": "0x21",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Occupancy : DRds issued by iA Cores that Missed the LLC",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD",
+ "Filter": "config1=0x40433",
+ "PerPkg": "1",
+ "PublicDescription": "TOR Occupancy : DRds issued by iA Cores that Missed the LLC : For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
+ "UMask": "0x21",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "Clockticks of the IIO Traffic Controller",
"Counter": "0,1,2,3",
"EventCode": "0x1",
diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv
index 25b06cf98747..2f2a209e87e1 100644
--- a/tools/perf/pmu-events/arch/x86/mapfile.csv
+++ b/tools/perf/pmu-events/arch/x86/mapfile.csv
@@ -38,3 +38,4 @@ GenuineIntel-6-7E,v1,icelake,core
GenuineIntel-6-86,v1,tremontx,core
AuthenticAMD-23-([12][0-9A-F]|[0-9A-F]),v2,amdzen1,core
AuthenticAMD-23-[[:xdigit:]]+,v1,amdzen2,core
+AuthenticAMD-25-[[:xdigit:]]+,v1,amdzen2,core
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/cache.json b/tools/perf/pmu-events/arch/x86/skylakex/cache.json
index 24df183693fa..e750a21976f1 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/cache.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/cache.json
@@ -1,1663 +1,1675 @@
[
{
- "EventCode": "0x24",
- "UMask": "0x21",
- "BriefDescription": "Demand Data Read miss L2, no rejects",
+ "BriefDescription": "Counts all demand code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
"Counter": "0,1,2,3",
- "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
- "PublicDescription": "Counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x01003C0004",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x24",
- "UMask": "0x22",
- "BriefDescription": "RFO requests that miss L2 cache",
+ "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM",
"Counter": "0,1,2,3",
- "EventName": "L2_RQSTS.RFO_MISS",
- "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xB0",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
+ "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4"
},
{
- "EventCode": "0x24",
- "UMask": "0x24",
- "BriefDescription": "L2 cache misses when fetching instructions",
+ "BriefDescription": "Counts all demand code reads that have any response type.",
"Counter": "0,1,2,3",
- "EventName": "L2_RQSTS.CODE_RD_MISS",
- "PublicDescription": "Counts L2 cache misses when fetching instructions.",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0000010004",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
"EventCode": "0x24",
- "UMask": "0x27",
+ "EventName": "L2_RQSTS.PF_MISS",
+ "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x38"
+ },
+ {
"BriefDescription": "Demand requests that miss L2 cache",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_DEMAND_MISS",
"PublicDescription": "Demand requests that miss L2 cache.",
"SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x27"
},
{
- "EventCode": "0x24",
- "UMask": "0x38",
- "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache",
+ "BriefDescription": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
"Counter": "0,1,2,3",
- "EventName": "L2_RQSTS.PF_MISS",
- "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache.",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x08003C0002",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x24",
- "UMask": "0x3f",
- "BriefDescription": "All requests that miss L2 cache",
+ "BriefDescription": "Retired load instructions with L3 cache hits as data sources",
"Counter": "0,1,2,3",
- "EventName": "L2_RQSTS.MISS",
- "PublicDescription": "All requests that miss L2 cache.",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD1",
+ "EventName": "MEM_LOAD_RETIRED.L3_HIT",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L3 cache.",
+ "SampleAfterValue": "50021",
+ "UMask": "0x4"
},
{
- "EventCode": "0x24",
- "UMask": "0xc1",
- "BriefDescription": "Demand Data Read requests that hit L2 cache",
+ "BriefDescription": "L2 writebacks that access L2 cache",
"Counter": "0,1,2,3",
- "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
- "PublicDescription": "Counts the number of demand Data Read requests, initiated by load instructions, that hit L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xF0",
+ "EventName": "L2_TRANS.L2_WB",
+ "PublicDescription": "Counts L2 writebacks that access L2 cache.",
"SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x40"
},
{
- "EventCode": "0x24",
- "UMask": "0xc2",
- "BriefDescription": "RFO requests that hit L2 cache",
+ "BriefDescription": "L2 cache lines filling L2",
"Counter": "0,1,2,3",
- "EventName": "L2_RQSTS.RFO_HIT",
- "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xF1",
+ "EventName": "L2_LINES_IN.ALL",
+ "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1f"
},
{
- "EventCode": "0x24",
- "UMask": "0xc4",
- "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "L2_RQSTS.CODE_RD_HIT",
- "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x04003C0400",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x24",
- "UMask": "0xd8",
- "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache",
+ "BriefDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "L2_RQSTS.PF_HIT",
- "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache.",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x24",
- "UMask": "0xe1",
- "BriefDescription": "Demand Data Read requests",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3.",
"Counter": "0,1,2,3",
- "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
- "PublicDescription": "Counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F803C0100",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x24",
- "UMask": "0xe2",
- "BriefDescription": "RFO requests to L2 cache",
+ "BriefDescription": "Demand Data Read requests sent to uncore",
"Counter": "0,1,2,3",
- "EventName": "L2_RQSTS.ALL_RFO",
- "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xB0",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
+ "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x24",
- "UMask": "0xe4",
- "BriefDescription": "L2 code requests",
+ "BriefDescription": "Retired load instructions missed L3 cache as data sources",
"Counter": "0,1,2,3",
- "EventName": "L2_RQSTS.ALL_CODE_RD",
- "PublicDescription": "Counts the total number of L2 code requests.",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD1",
+ "EventName": "MEM_LOAD_RETIRED.L3_MISS",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L3 cache.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x20"
},
{
- "EventCode": "0x24",
- "UMask": "0xe7",
- "BriefDescription": "Demand requests to L2 cache",
+ "BriefDescription": "All retired store instructions.",
"Counter": "0,1,2,3",
- "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES",
- "PublicDescription": "Demand requests to L2 cache.",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD0",
+ "EventName": "MEM_INST_RETIRED.ALL_STORES",
+ "L1_Hit_Indication": "1",
+ "PEBS": "1",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x82"
},
{
- "EventCode": "0x24",
- "UMask": "0xf8",
- "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches",
+ "BriefDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared state. A non-threaded event.",
"Counter": "0,1,2,3",
- "EventName": "L2_RQSTS.ALL_PF",
- "PublicDescription": "Counts the total number of requests from the L2 hardware prefetchers.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xF2",
+ "EventName": "L2_LINES_OUT.SILENT",
"SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0x24",
- "UMask": "0xff",
- "BriefDescription": "All L2 requests",
+ "BriefDescription": "Counts all prefetch data reads that hit in the L3.",
"Counter": "0,1,2,3",
- "EventName": "L2_RQSTS.REFERENCES",
- "PublicDescription": "All L2 requests.",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F803C0490",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x2E",
- "UMask": "0x41",
- "BriefDescription": "Core-originated cacheable demand requests missed L3",
+ "BriefDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "LONGEST_LAT_CACHE.MISS",
- "Errata": "SKL057",
- "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all misses to the L3.",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0490",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0x2E",
- "UMask": "0x4f",
- "BriefDescription": "Core-originated cacheable demand requests that refer to L3",
+ "BriefDescription": "Core-originated cacheable demand requests missed L3",
"Counter": "0,1,2,3",
- "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
"Errata": "SKL057",
- "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all accesses to the L3.",
+ "EventCode": "0x2E",
+ "EventName": "LONGEST_LAT_CACHE.MISS",
+ "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all misses to the L3.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x41"
},
{
- "EventCode": "0x48",
- "UMask": "0x1",
- "BriefDescription": "Cycles with L1D load Misses outstanding.",
+ "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches",
"Counter": "0,1,2,3",
- "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
- "CounterMask": "1",
- "PublicDescription": "Counts duration of L1D miss outstanding in cycles.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.ALL_PF",
+ "PublicDescription": "Counts the total number of requests from the L2 hardware prefetchers.",
+ "SampleAfterValue": "200003",
+ "UMask": "0xf8"
},
{
- "EventCode": "0x48",
- "UMask": "0x1",
- "BriefDescription": "L1D miss outstandings duration in cycles",
+ "BriefDescription": "Retired load instructions whose data sources was remote HITM",
"Counter": "0,1,2,3",
- "EventName": "L1D_PEND_MISS.PENDING",
- "PublicDescription": "Counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch.Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD3",
+ "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM",
+ "PEBS": "1",
+ "PublicDescription": "Retired load instructions whose data sources was remote HITM.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x4"
},
{
- "EventCode": "0x48",
- "UMask": "0x1",
- "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.",
+ "BriefDescription": "Counts all prefetch data reads that have any response type.",
"Counter": "0,1,2,3",
- "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY",
- "AnyThread": "1",
- "CounterMask": "1",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0000010490",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x48",
- "UMask": "0x2",
- "BriefDescription": "Number of times a request needed a FB entry but there was no entry available for it. That is the FB unavailability was dominant reason for blocking the request. A request includes cacheable/uncacheable demands that is load, store or SW prefetch.",
+ "BriefDescription": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
"Counter": "0,1,2,3",
- "EventName": "L1D_PEND_MISS.FB_FULL",
- "PublicDescription": "Number of times a request needed a FB (Fill Buffer) entry but there was no entry available for it. A request includes cacheable/uncacheable demands that are load, store or SW prefetch instructions.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x08003C0122",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x51",
- "UMask": "0x1",
- "BriefDescription": "L1D data line replacements",
+ "BriefDescription": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
"Counter": "0,1,2,3",
- "EventName": "L1D.REPLACEMENT",
- "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x08003C0100",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x60",
- "UMask": "0x1",
- "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore",
+ "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
- "CounterMask": "1",
- "PublicDescription": "Counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
+ "PublicDescription": "Counts the number of offcore outstanding RFO (store) transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x4"
},
{
- "EventCode": "0x60",
- "UMask": "0x1",
- "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.",
+ "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
- "PublicDescription": "Counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.Note: A prefetch promoted to Demand is counted from the promotion point.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0491",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x60",
- "UMask": "0x1",
- "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
+ "BriefDescription": "RFO requests that miss L2 cache",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6",
- "CounterMask": "6",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.RFO_MISS",
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x22"
},
{
- "EventCode": "0x60",
- "UMask": "0x2",
- "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.",
+ "BriefDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines can be either in modified state or clean state. Modified lines may either be written back to L3 or directly written to memory and not allocated in L3. Clean lines may either be allocated in L3 or dropped",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
- "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xF2",
+ "EventName": "L2_LINES_OUT.NON_SILENT",
+ "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines can be either in modified state or clean state. Modified lines may either be written back to L3 or directly written to memory and not allocated in L3. Clean lines may either be allocated in L3 or dropped.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x2"
},
{
- "EventCode": "0x60",
- "UMask": "0x2",
- "BriefDescription": "Cycles with offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore.",
+ "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD",
- "CounterMask": "1",
- "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xF2",
+ "EventName": "L2_LINES_OUT.USELESS_HWPF",
+ "SampleAfterValue": "200003",
+ "UMask": "0x4"
},
{
- "EventCode": "0x60",
- "UMask": "0x4",
- "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
+ "BriefDescription": "Counts all demand data writes (RFOs) that have any response type.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
- "PublicDescription": "Counts the number of offcore outstanding RFO (store) transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0000010002",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x60",
- "UMask": "0x4",
- "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.",
+ "BriefDescription": "All requests that miss L2 cache",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
- "CounterMask": "1",
- "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.MISS",
+ "PublicDescription": "All requests that miss L2 cache.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x3f"
},
{
- "EventCode": "0x60",
- "UMask": "0x8",
- "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
+ "BriefDescription": "L2 code requests",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
- "CounterMask": "1",
- "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.ALL_CODE_RD",
+ "PublicDescription": "Counts the total number of L2 code requests.",
+ "SampleAfterValue": "200003",
+ "UMask": "0xe4"
},
{
- "EventCode": "0x60",
- "UMask": "0x8",
- "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore",
+ "BriefDescription": "Retired load instructions whose data sources was forwarded from a remote cache",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
- "PublicDescription": "Counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD3",
+ "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD",
+ "PEBS": "1",
+ "PublicDescription": "Retired load instructions whose data sources was forwarded from a remote cache.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x8"
},
{
- "EventCode": "0xB0",
- "UMask": "0x1",
- "BriefDescription": "Demand Data Read requests sent to uncore",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
- "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0020",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xB0",
- "UMask": "0x2",
- "BriefDescription": "Cacheable and noncachaeble code read requests",
+ "BriefDescription": "Counts all demand & prefetch data reads that have any response type.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
- "PublicDescription": "Counts both cacheable and non-cacheable code read requests.",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0000010491",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xB0",
- "UMask": "0x4",
- "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM",
+ "BriefDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
- "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD2",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT",
+ "PEBS": "1",
+ "PublicDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache.",
+ "SampleAfterValue": "20011",
+ "UMask": "0x2"
},
{
- "EventCode": "0xB0",
- "UMask": "0x8",
- "BriefDescription": "Demand and prefetch data reads",
+ "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from remote dram",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
- "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD3",
+ "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM",
+ "PEBS": "1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x2"
},
{
- "EventCode": "0xB0",
- "UMask": "0x80",
- "BriefDescription": "Any memory transaction that reached the SQ.",
+ "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS",
- "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc..",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x01003C0122",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xB2",
- "UMask": "0x1",
- "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.",
+ "BriefDescription": "Retired load instructions missed L1 cache as data sources",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL",
- "PublicDescription": "Counts the number of cases when the offcore requests buffer cannot take more entries for the core. This can happen when the superqueue does not contain eligible entries, or when L1D writeback pending FIFO requests is full.Note: Writeback pending FIFO has six entries.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD1",
+ "EventName": "MEM_LOAD_RETIRED.L1_MISS",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x8"
},
{
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x01003C0020",
+ "Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "EventCode": "0xD0",
- "UMask": "0x11",
- "BriefDescription": "Retired load instructions that miss the STLB. (Precise Event)",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS",
- "PublicDescription": "Retired load instructions that miss the STLB.",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0400",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "EventCode": "0xD0",
- "UMask": "0x12",
- "BriefDescription": "Retired store instructions that miss the STLB. (Precise Event)",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "L2 cache misses when fetching instructions",
"Counter": "0,1,2,3",
- "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES",
- "PublicDescription": "Retired store instructions that miss the STLB.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.CODE_RD_MISS",
+ "PublicDescription": "Counts L2 cache misses when fetching instructions.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x24"
+ },
+ {
+ "BriefDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x04003C0490",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "L1_Hit_Indication": "1",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "EventCode": "0xD0",
- "UMask": "0x21",
- "BriefDescription": "Retired load instructions with locked access. (Precise Event)",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.",
"Counter": "0,1,2,3",
- "EventName": "MEM_INST_RETIRED.LOCK_LOADS",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
+ "PublicDescription": "Counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.Note: A prefetch promoted to Demand is counted from the promotion point.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xD0",
- "UMask": "0x41",
- "BriefDescription": "Retired load instructions that split across a cacheline boundary. (Precise Event)",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "Counts demand data reads that hit in the L3.",
"Counter": "0,1,2,3",
- "EventName": "MEM_INST_RETIRED.SPLIT_LOADS",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F803C0001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "EventCode": "0xD0",
- "UMask": "0x42",
- "BriefDescription": "Retired store instructions that split across a cacheline boundary. (Precise Event)",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "Counts prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
"Counter": "0,1,2,3",
- "EventName": "MEM_INST_RETIRED.SPLIT_STORES",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x01003C0120",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "L1_Hit_Indication": "1",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "EventCode": "0xD0",
- "UMask": "0x81",
- "BriefDescription": "All retired load instructions. (Precise Event)",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "MEM_INST_RETIRED.ALL_LOADS",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x04003C0120",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xD0",
- "UMask": "0x82",
- "BriefDescription": "All retired store instructions. (Precise Event)",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "Cycles with offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore.",
"Counter": "0,1,2,3",
- "EventName": "MEM_INST_RETIRED.ALL_STORES",
- "PublicDescription": "All retired store instructions.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD",
+ "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
"SampleAfterValue": "2000003",
- "L1_Hit_Indication": "1",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x2"
},
{
- "EventCode": "0xD1",
- "UMask": "0x1",
- "BriefDescription": "Retired load instructions with L1 cache hits as data sources",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "Demand requests to L2 cache",
"Counter": "0,1,2,3",
- "EventName": "MEM_LOAD_RETIRED.L1_HIT",
- "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES",
+ "PublicDescription": "Demand requests to L2 cache.",
+ "SampleAfterValue": "200003",
+ "UMask": "0xe7"
},
{
- "EventCode": "0xD1",
- "UMask": "0x2",
- "BriefDescription": "Retired load instructions with L2 cache hits as data sources",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3.",
"Counter": "0,1,2,3",
- "EventName": "MEM_LOAD_RETIRED.L2_HIT",
- "PublicDescription": "Retired load instructions with L2 cache hits as data sources.",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F803C0080",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "EventCode": "0xD1",
- "UMask": "0x4",
- "BriefDescription": "Retired load instructions with L3 cache hits as data sources",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "MEM_LOAD_RETIRED.L3_HIT",
- "PublicDescription": "Retired load instructions with L3 cache hits as data sources.",
- "SampleAfterValue": "50021",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x04003C0491",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xD1",
- "UMask": "0x8",
- "BriefDescription": "Retired load instructions missed L1 cache as data sources",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that have any response type.",
"Counter": "0,1,2,3",
- "EventName": "MEM_LOAD_RETIRED.L1_MISS",
- "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache.",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0000010020",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "EventCode": "0xD1",
- "UMask": "0x10",
- "BriefDescription": "Retired load instructions missed L2 cache as data sources",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3.",
"Counter": "0,1,2,3",
- "EventName": "MEM_LOAD_RETIRED.L2_MISS",
- "PublicDescription": "Retired load instructions missed L2 cache as data sources.",
- "SampleAfterValue": "50021",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F803C0122",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xD1",
- "UMask": "0x20",
- "BriefDescription": "Retired load instructions missed L3 cache as data sources",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"Counter": "0,1,2,3",
- "EventName": "MEM_LOAD_RETIRED.L3_MISS",
- "PublicDescription": "Retired load instructions missed L3 cache as data sources.",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x08003C0001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xD1",
- "UMask": "0x40",
- "BriefDescription": "Retired load instructions which data sources were load missed L1 but hit FB due to preceding miss to the same cache line with data not ready",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "MEM_LOAD_RETIRED.FB_HIT",
- "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready.",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0080",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xD2",
- "UMask": "0x1",
- "BriefDescription": "Retired load instructions which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
- "Data_LA": "1",
- "PEBS": "1",
+ "AnyThread": "1",
+ "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.",
"Counter": "0,1,2,3",
- "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS",
- "SampleAfterValue": "20011",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x48",
+ "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xD2",
- "UMask": "0x2",
- "BriefDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "Counts all prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
"Counter": "0,1,2,3",
- "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT",
- "PublicDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache.",
- "SampleAfterValue": "20011",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x01003C0490",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xD2",
- "UMask": "0x4",
- "BriefDescription": "Retired load instructions which data sources were HitM responses from shared L3",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "Core-originated cacheable demand requests that refer to L3",
"Counter": "0,1,2,3",
- "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM",
- "PublicDescription": "Retired load instructions which data sources were HitM responses from shared L3.",
- "SampleAfterValue": "20011",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "Errata": "SKL057",
+ "EventCode": "0x2E",
+ "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+ "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all accesses to the L3.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4f"
},
{
- "EventCode": "0xD2",
- "UMask": "0x8",
- "BriefDescription": "Retired load instructions which data sources were hits in L3 without snoops required",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that have any response type.",
"Counter": "0,1,2,3",
- "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE",
- "PublicDescription": "Retired load instructions which data sources were hits in L3 without snoops required.",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0000010080",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "EventCode": "0xD3",
- "UMask": "0x1",
- "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from local dram",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
"Counter": "0,1,2,3",
- "EventName": "MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x08003C0120",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xD3",
- "UMask": "0x2",
- "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from remote dram",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3.",
"Counter": "0,1,2,3",
- "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F803C0491",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xD3",
- "UMask": "0x4",
- "BriefDescription": "Retired load instructions whose data sources was remote HITM",
+ "BriefDescription": "Retired load instructions that miss the STLB.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3",
"Data_LA": "1",
+ "EventCode": "0xD0",
+ "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS",
"PEBS": "1",
- "Counter": "0,1,2,3",
- "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "SampleAfterValue": "100003",
+ "UMask": "0x11"
},
{
- "EventCode": "0xD3",
- "UMask": "0x8",
- "BriefDescription": "Retired load instructions whose data sources was forwarded from a remote cache",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "Counts demand data reads that have any response type.",
"Counter": "0,1,2,3",
- "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0000010001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xD4",
- "UMask": "0x4",
- "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3.",
"Counter": "0,1,2,3",
- "EventName": "MEM_LOAD_MISC_RETIRED.UC",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F803C0020",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xF0",
- "UMask": "0x40",
- "BriefDescription": "L2 writebacks that access L2 cache",
+ "BriefDescription": "L1D data line replacements",
"Counter": "0,1,2,3",
- "EventName": "L2_TRANS.L2_WB",
- "PublicDescription": "Counts L2 writebacks that access L2 cache.",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x51",
+ "EventName": "L1D.REPLACEMENT",
+ "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xF1",
- "UMask": "0x1f",
- "BriefDescription": "L2 cache lines filling L2",
+ "BriefDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "L2_LINES_IN.ALL",
- "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0120",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xF2",
- "UMask": "0x1",
- "BriefDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared state. A non-threaded event.",
+ "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.",
"Counter": "0,1,2,3",
- "EventName": "L2_LINES_OUT.SILENT",
- "PublicDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared or Exclusive state. A non-threaded event.",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD4",
+ "EventName": "MEM_LOAD_MISC_RETIRED.UC",
+ "PEBS": "1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x4"
},
{
- "EventCode": "0xF2",
- "UMask": "0x2",
- "BriefDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines can be either in modified state or clean state. Modified lines may either be written back to L3 or directly written to memory and not allocated in L3. Clean lines may either be allocated in L3 or dropped",
+ "BriefDescription": "Retired load instructions which data sources were load missed L1 but hit FB due to preceding miss to the same cache line with data not ready",
"Counter": "0,1,2,3",
- "EventName": "L2_LINES_OUT.NON_SILENT",
- "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD1",
+ "EventName": "MEM_LOAD_RETIRED.FB_HIT",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x40"
},
{
- "EventCode": "0xF2",
- "UMask": "0x4",
"BriefDescription": "This event is deprecated. Refer to new event L2_LINES_OUT.USELESS_HWPF",
- "Deprecated": "1",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "Deprecated": "1",
+ "EventCode": "0xF2",
"EventName": "L2_LINES_OUT.USELESS_PREF",
- "PublicDescription": "This event is deprecated. Refer to new event L2_LINES_OUT.USELESS_HWPF",
"SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x4"
},
{
- "EventCode": "0xF2",
- "UMask": "0x4",
- "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache",
+ "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache",
"Counter": "0,1,2,3",
- "EventName": "L2_LINES_OUT.USELESS_HWPF",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.PF_HIT",
+ "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache.",
"SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0xd8"
},
{
- "EventCode": "0xF4",
- "UMask": "0x10",
- "BriefDescription": "Number of cache line split locks sent to uncore.",
+ "BriefDescription": "Counts all demand & prefetch RFOs that have any response type.",
"Counter": "0,1,2,3",
- "EventName": "SQ_MISC.SPLIT_LOCK",
- "PublicDescription": "Counts the number of cache line split locks sent to the uncore.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
- },
- {
- "Offcore": "1",
+ "CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand data reads have any response type.",
- "MSRValue": "0x0000010001",
- "Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand data reads have any response type.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
- },
- {
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0000010122",
"Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand data reads TBD TBD",
- "MSRValue": "0x01003C0001",
- "Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand data reads TBD TBD",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand data reads TBD TBD",
- "MSRValue": "0x04003C0001",
+ "BriefDescription": "Demand Data Read miss L2, no rejects",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand data reads TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
+ "PublicDescription": "Counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x21"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand data reads TBD TBD",
- "MSRValue": "0x10003C0001",
+ "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand data reads TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0122",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand data reads TBD TBD",
- "MSRValue": "0x3F803C0001",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that have any response type.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand data reads TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0000010100",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all demand data writes (RFOs) have any response type.",
- "MSRValue": "0x0000010002",
+ "BriefDescription": "Retired load instructions which data sources were hits in L3 without snoops required",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all demand data writes (RFOs) have any response type.",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD2",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE",
+ "PEBS": "1",
+ "PublicDescription": "Retired load instructions which data sources were hits in L3 without snoops required.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x8"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all demand data writes (RFOs) TBD TBD",
- "MSRValue": "0x01003C0002",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all demand data writes (RFOs) TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x04003C0080",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all demand data writes (RFOs) TBD TBD",
- "MSRValue": "0x04003C0002",
+ "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all demand data writes (RFOs) TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x01003C0491",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all demand data writes (RFOs) TBD TBD",
- "MSRValue": "0x10003C0002",
+ "BriefDescription": "All retired load instructions.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all demand data writes (RFOs) TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD0",
+ "EventName": "MEM_INST_RETIRED.ALL_LOADS",
+ "PEBS": "1",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x81"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all demand data writes (RFOs) TBD TBD",
- "MSRValue": "0x3F803C0002",
+ "BriefDescription": "Retired load instructions which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all demand data writes (RFOs) TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD2",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS",
+ "PEBS": "1",
+ "SampleAfterValue": "20011",
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that have any response type.",
- "MSRValue": "0x0000010004",
+ "BriefDescription": "Demand Data Read requests",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that have any response type.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
+ "PublicDescription": "Counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.",
+ "SampleAfterValue": "200003",
+ "UMask": "0xe1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD",
- "MSRValue": "0x01003C0004",
+ "BriefDescription": "All L2 requests",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.REFERENCES",
+ "PublicDescription": "All L2 requests.",
+ "SampleAfterValue": "200003",
+ "UMask": "0xff"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD",
- "MSRValue": "0x04003C0004",
+ "BriefDescription": "Cycles with L1D load Misses outstanding.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x48",
+ "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
+ "PublicDescription": "Counts duration of L1D miss outstanding in cycles.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD",
- "MSRValue": "0x10003C0004",
+ "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
+ "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD",
- "MSRValue": "0x3F803C0004",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x01003C0100",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads have any response type.",
- "MSRValue": "0x0000010010",
+ "BriefDescription": "Number of cache line split locks sent to uncore.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts prefetch (that bring data to L2) data reads have any response type.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xF4",
+ "EventName": "SQ_MISC.SPLIT_LOCK",
+ "PublicDescription": "Counts the number of cache line split locks sent to the uncore.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x10"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD",
- "MSRValue": "0x01003C0010",
+ "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F803C0002",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD",
- "MSRValue": "0x04003C0010",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x04003C0100",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD",
- "MSRValue": "0x10003C0010",
+ "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HITM_OTHER_CORE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.CODE_RD_HIT",
+ "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.",
+ "SampleAfterValue": "200003",
+ "UMask": "0xc4"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD",
- "MSRValue": "0x3F803C0010",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F803C0400",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs have any response type.",
- "MSRValue": "0x0000010020",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that have any response type.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs have any response type.",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0000010010",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD",
- "MSRValue": "0x01003C0020",
+ "BriefDescription": "Retired load instructions with L2 cache hits as data sources",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD1",
+ "EventName": "MEM_LOAD_RETIRED.L2_HIT",
+ "PEBS": "1",
+ "PublicDescription": "Retired load instructions with L2 cache hits as data sources.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x2"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD",
- "MSRValue": "0x04003C0020",
+ "BriefDescription": "RFO requests that hit L2 cache",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.RFO_HIT",
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.",
+ "SampleAfterValue": "200003",
+ "UMask": "0xc2"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD",
- "MSRValue": "0x10003C0020",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x01003C0080",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD",
- "MSRValue": "0x3F803C0020",
+ "BriefDescription": "L1D miss outstandings duration in cycles",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x48",
+ "EventName": "L1D_PEND_MISS.PENDING",
+ "PublicDescription": "Counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch.Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads have any response type.",
- "MSRValue": "0x0000010080",
+ "BriefDescription": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.ANY_RESPONSE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads have any response type.",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x08003C0491",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD",
- "MSRValue": "0x01003C0080",
+ "BriefDescription": "Demand Data Read requests that hit L2 cache",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
+ "PublicDescription": "Counts the number of demand Data Read requests, initiated by load instructions, that hit L2 cache",
+ "SampleAfterValue": "200003",
+ "UMask": "0xc1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD",
- "MSRValue": "0x04003C0080",
+ "BriefDescription": "Retired load instructions which data sources were HitM responses from shared L3",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD2",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM",
+ "PEBS": "1",
+ "PublicDescription": "Retired load instructions which data sources were HitM responses from shared L3.",
+ "SampleAfterValue": "20011",
+ "UMask": "0x4"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD",
- "MSRValue": "0x10003C0080",
+ "BriefDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HITM_OTHER_CORE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x04003C0001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD",
- "MSRValue": "0x3F803C0080",
+ "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x01003C0002",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs have any response type.",
- "MSRValue": "0x0000010100",
+ "BriefDescription": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.ANY_RESPONSE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs have any response type.",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x08003C0080",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD",
- "MSRValue": "0x01003C0100",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x01003C0010",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD",
- "MSRValue": "0x04003C0100",
+ "BriefDescription": "Retired store instructions that split across a cacheline boundary.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD0",
+ "EventName": "MEM_INST_RETIRED.SPLIT_STORES",
+ "L1_Hit_Indication": "1",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired store instructions that split across a cacheline boundary.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x42"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD",
- "MSRValue": "0x10003C0100",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HITM_OTHER_CORE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0010",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD",
- "MSRValue": "0x3F803C0100",
+ "BriefDescription": "Any memory transaction that reached the SQ.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xB0",
+ "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS",
+ "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc..",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x80"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests have any response type.",
- "MSRValue": "0x0000010400",
+ "BriefDescription": "Cacheable and noncachaeble code read requests",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.ANY_RESPONSE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests have any response type.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xB0",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
+ "PublicDescription": "Counts both cacheable and non-cacheable code read requests.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x2"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD",
- "MSRValue": "0x01003C0400",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0100",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD",
- "MSRValue": "0x04003C0400",
+ "BriefDescription": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x08003C0004",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD",
- "MSRValue": "0x10003C0400",
+ "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HITM_OTHER_CORE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+ "PublicDescription": "Counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD",
- "MSRValue": "0x3F803C0400",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that have any response type.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0000010400",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD have any response type.",
- "MSRValue": "0x0000010490",
+ "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.ANY_RESPONSE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD have any response type.",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0004",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x01003C0490",
+ "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x04003C0004",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x04003C0490",
+ "BriefDescription": "Number of times a request needed a FB entry but there was no entry available for it. That is the FB unavailability was dominant reason for blocking the request. A request includes cacheable/uncacheable demands that is load, store or SW prefetch.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x48",
+ "EventName": "L1D_PEND_MISS.FB_FULL",
+ "PublicDescription": "Number of times a request needed a FB (Fill Buffer) entry but there was no entry available for it. A request includes cacheable/uncacheable demands that are load, store or SW prefetch instructions.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x10003C0490",
+ "BriefDescription": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x08003C0020",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x3F803C0490",
+ "BriefDescription": "Counts all demand code reads that hit in the L3.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F803C0004",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD have any response type.",
- "MSRValue": "0x0000010120",
+ "BriefDescription": "Counts prefetch RFOs that hit in the L3.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.ANY_RESPONSE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD have any response type.",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F803C0120",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x01003C0120",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x04003C0020",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x04003C0120",
+ "BriefDescription": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.SNOOP_HIT_WITH_FWD",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x08003C0400",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x10003C0120",
+ "BriefDescription": "Retired load instructions with L1 cache hits as data sources",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD1",
+ "EventName": "MEM_LOAD_RETIRED.L1_HIT",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x3F803C0120",
+ "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+ "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x8"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD have any response type.",
- "MSRValue": "0x0000010491",
+ "BriefDescription": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD have any response type.",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x08003C0490",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x01003C0491",
+ "BriefDescription": "Retired load instructions with locked access.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD0",
+ "EventName": "MEM_INST_RETIRED.LOCK_LOADS",
+ "PEBS": "1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x21"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x04003C0491",
+ "BriefDescription": "Demand and prefetch data reads",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xB0",
+ "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
+ "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x8"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x10003C0491",
+ "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from local dram",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD3",
+ "EventName": "MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM",
+ "PEBS": "1",
+ "PublicDescription": "Retired load instructions which data sources missed L3 but serviced from local DRAM.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x3F803C0491",
+ "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD have any response type.",
- "MSRValue": "0x0000010122",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD have any response type.",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x01003C0400",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x01003C0122",
+ "BriefDescription": "Retired load instructions that split across a cacheline boundary.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD0",
+ "EventName": "MEM_INST_RETIRED.SPLIT_LOADS",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions that split across a cacheline boundary.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x41"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x04003C0122",
+ "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xB2",
+ "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL",
+ "PublicDescription": "Counts the number of cases when the offcore requests buffer cannot take more entries for the core. This can happen when the superqueue does not contain eligible entries, or when L1D writeback pending FIFO requests is full.Note: Writeback pending FIFO has six entries.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x10003C0122",
+ "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HITM_OTHER_CORE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "6",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x3F803C0122",
+ "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
+ "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand data reads",
- "MSRValue": "0x08007C0001",
+ "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
- "PublicDescription": "Counts demand data reads",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x04003C0002",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all demand data writes (RFOs)",
- "MSRValue": "0x08007C0002",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
- "PublicDescription": "Counts all demand data writes (RFOs)",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F803C0010",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
- "MSRValue": "0x08007C0004",
+ "BriefDescription": "Counts prefetch RFOs that have any response type.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
- "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0000010120",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
- "MSRValue": "0x08007C0010",
+ "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
- "PublicDescription": "Counts prefetch (that bring data to L2) data reads",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0002",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
- "MSRValue": "0x08007C0020",
+ "BriefDescription": "Retired store instructions that miss the STLB.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
- "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD0",
+ "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES",
+ "L1_Hit_Indication": "1",
+ "PEBS": "1",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x12"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
- "MSRValue": "0x08007C0080",
+ "BriefDescription": "RFO requests to L2 cache",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.ALL_RFO",
+ "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
+ "SampleAfterValue": "200003",
+ "UMask": "0xe2"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
- "MSRValue": "0x08007C0100",
+ "BriefDescription": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x08003C0010",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests",
- "MSRValue": "0x08007C0400",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.SNOOP_HIT_WITH_FWD",
- "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x04003C0010",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD",
- "MSRValue": "0x08007C0490",
+ "BriefDescription": "Retired load instructions missed L2 cache as data sources",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
- "PublicDescription": "TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD1",
+ "EventName": "MEM_LOAD_RETIRED.L2_MISS",
+ "PEBS": "1",
+ "PublicDescription": "Retired load instructions missed L2 cache as data sources.",
+ "SampleAfterValue": "50021",
+ "UMask": "0x10"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD",
- "MSRValue": "0x08007C0120",
+ "BriefDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
- "PublicDescription": "TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x01003C0001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD",
- "MSRValue": "0x08007C0491",
+ "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
- "PublicDescription": "TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
+ "PublicDescription": "Counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x8"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD",
- "MSRValue": "0x08007C0122",
+ "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
- "PublicDescription": "TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x04003C0122",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
}
] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json b/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json
index c5d0babe89fc..e197cde15047 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json
@@ -1,85 +1,85 @@
[
{
- "EventCode": "0xC7",
- "UMask": "0x1",
- "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT14 RCP14 DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
"Counter": "0,1,2,3",
- "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC7",
+ "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x4"
},
{
- "EventCode": "0xC7",
- "UMask": "0x2",
- "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "BriefDescription": "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 8 calculations per element.",
"Counter": "0,1,2,3",
- "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC7",
+ "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x40"
},
{
- "EventCode": "0xC7",
- "UMask": "0x4",
- "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired. Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
"Counter": "0,1,2,3",
- "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC7",
+ "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x2"
},
{
- "EventCode": "0xC7",
- "UMask": "0x8",
- "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
"Counter": "0,1,2,3",
- "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC7",
+ "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x10"
},
{
- "EventCode": "0xC7",
- "UMask": "0x10",
- "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "BriefDescription": "Number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 16 calculations per element.",
"Counter": "0,1,2,3",
- "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC7",
+ "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x80"
},
{
- "EventCode": "0xC7",
- "UMask": "0x20",
- "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired. Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC7",
"EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x20"
},
{
- "EventCode": "0xC7",
- "UMask": "0x40",
- "BriefDescription": "Number of Packed Double-Precision FP arithmetic instructions (Use operation multiplier of 8)",
+ "BriefDescription": "Cycles with any input/output SSE or FP assist",
"Counter": "0,1,2,3",
- "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0xCA",
+ "EventName": "FP_ASSIST.ANY",
+ "PublicDescription": "Counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1e"
},
{
- "EventCode": "0xC7",
- "UMask": "0x80",
- "BriefDescription": "Number of Packed Single-Precision FP arithmetic instructions (Use operation multiplier of 16)",
+ "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
"Counter": "0,1,2,3",
- "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC7",
+ "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xCA",
- "UMask": "0x1e",
- "BriefDescription": "Cycles with any input/output SSE or FP assist",
+ "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
"Counter": "0,1,2,3",
- "EventName": "FP_ASSIST.ANY",
- "CounterMask": "1",
- "PublicDescription": "Counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC7",
+ "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x8"
}
] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/frontend.json b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json
index 4dc583cfb545..cdf95bd2a73d 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json
@@ -1,482 +1,516 @@
[
{
- "EventCode": "0x79",
- "UMask": "0x4",
- "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path",
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.",
"Counter": "0,1,2,3",
- "EventName": "IDQ.MITE_CYCLES",
- "CounterMask": "1",
- "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x80",
+ "EventName": "ICACHE_16B.IFDATA_STALL",
+ "PublicDescription": "Cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x4"
},
{
- "EventCode": "0x79",
- "UMask": "0x4",
- "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+ "BriefDescription": "Retired Instructions who experienced iTLB true miss.",
"Counter": "0,1,2,3",
- "EventName": "IDQ.MITE_UOPS",
- "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xC6",
+ "EventName": "FRONTEND_RETIRED.ITLB_MISS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x14",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xC6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_128",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x408006",
+ "PEBS": "1",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Cycles with less than 3 uops delivered by the front end.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x9C",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE",
+ "PublicDescription": "Cycles with less than 3 uops delivered by the front-end.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0x79",
- "UMask": "0x8",
"BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path",
"Counter": "0,1,2,3",
- "EventName": "IDQ.DSB_CYCLES",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
"CounterMask": "1",
+ "EventCode": "0x79",
+ "EventName": "IDQ.DSB_CYCLES",
"PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x8"
},
{
- "EventCode": "0x79",
- "UMask": "0x8",
- "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
+ "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
"Counter": "0,1,2,3",
- "EventName": "IDQ.DSB_UOPS",
- "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "3",
+ "EventCode": "0x9C",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
+ "PublicDescription": "Counts, on the per-thread basis, cycles when less than 1 uop is delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >= 3.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0x79",
- "UMask": "0x10",
- "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
"Counter": "0,1,2,3",
- "EventName": "IDQ.MS_DSB_CYCLES",
- "CounterMask": "1",
- "PublicDescription": "Counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xE6",
+ "EventName": "BACLEARS.ANY",
+ "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x79",
- "UMask": "0x18",
- "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
+ "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss.",
"Counter": "0,1,2,3",
- "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
- "CounterMask": "1",
- "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xC6",
+ "EventName": "FRONTEND_RETIRED.DSB_MISS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x11",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "EventCode": "0x79",
- "UMask": "0x18",
- "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
+ "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
"Counter": "0,1,2,3",
- "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
"CounterMask": "4",
- "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
+ "EventCode": "0x9C",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+ "PublicDescription": "Counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0x79",
- "UMask": "0x20",
- "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall.",
"Counter": "0,1,2,3",
- "EventName": "IDQ.MS_MITE_UOPS",
- "PublicDescription": "Counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xC6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_16",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x401006",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "EventCode": "0x79",
- "UMask": "0x24",
- "BriefDescription": "Cycles MITE is delivering any Uop",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
"Counter": "0,1,2,3",
- "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS",
- "CounterMask": "1",
- "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MITE_UOPS",
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x4"
},
{
- "EventCode": "0x79",
- "UMask": "0x24",
- "BriefDescription": "Cycles MITE is delivering 4 Uops",
+ "BriefDescription": "Cycles with less than 2 uops delivered by the front end.",
"Counter": "0,1,2,3",
- "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
- "CounterMask": "4",
- "PublicDescription": "Counts the number of cycles 4 uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "2",
+ "EventCode": "0x9C",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE",
+ "PublicDescription": "Cycles with less than 2 uops delivered by the front-end.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0x79",
- "UMask": "0x30",
"BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
"Counter": "0,1,2,3",
- "EventName": "IDQ.MS_CYCLES",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
"CounterMask": "1",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MS_CYCLES",
"PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x30"
},
{
- "EventCode": "0x79",
- "UMask": "0x30",
- "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "BriefDescription": "Cycles MITE is delivering any Uop",
"Counter": "0,1,2,3",
- "EventName": "IDQ.MS_UOPS",
- "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x79",
+ "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS",
+ "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x24"
+ },
+ {
+ "BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x83",
+ "EventName": "ICACHE_64B.IFTAG_HIT",
+ "SampleAfterValue": "200003",
+ "UMask": "0x1"
},
{
- "EdgeDetect": "1",
- "EventCode": "0x79",
- "UMask": "0x30",
"BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer",
"Counter": "0,1,2,3",
- "EventName": "IDQ.MS_SWITCHES",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
"CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MS_SWITCHES",
"PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x30"
},
{
- "EventCode": "0x80",
- "UMask": "0x4",
- "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.",
+ "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss.",
"Counter": "0,1,2,3",
- "EventName": "ICACHE_16B.IFDATA_STALL",
- "PublicDescription": "Cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xC6",
+ "EventName": "FRONTEND_RETIRED.L2_MISS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x13",
+ "PEBS": "1",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "EventCode": "0x83",
- "UMask": "0x1",
- "BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
+ "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path",
"Counter": "0,1,2,3",
- "EventName": "ICACHE_64B.IFTAG_HIT",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MITE_CYCLES",
+ "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4"
},
{
- "EventCode": "0x83",
- "UMask": "0x2",
- "BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.",
"Counter": "0,1,2,3",
- "EventName": "ICACHE_64B.IFTAG_MISS",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xC6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_64",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x404006",
+ "PEBS": "1",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "EventCode": "0x83",
- "UMask": "0x4",
- "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
+ "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled",
"Counter": "0,1,2,3",
- "EventName": "ICACHE_64B.IFTAG_STALL",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
- },
- {
- "Invert": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
"EventCode": "0x9C",
- "UMask": "0x1",
- "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.",
- "Counter": "0,1,2,3",
- "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
- "CounterMask": "1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+ "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4 x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions). c. Instruction Decode Queue (IDQ) delivers four uops.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0x9C",
- "UMask": "0x1",
- "BriefDescription": "Cycles with less than 3 uops delivered by the front end.",
+ "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
"Counter": "0,1,2,3",
- "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE",
- "CounterMask": "1",
- "PublicDescription": "Cycles with less than 3 uops delivered by the front-end.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MS_MITE_UOPS",
+ "PublicDescription": "Counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x20"
},
{
- "EventCode": "0x9C",
- "UMask": "0x1",
- "BriefDescription": "Cycles with less than 2 uops delivered by the front end.",
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
"Counter": "0,1,2,3",
- "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE",
- "CounterMask": "2",
- "PublicDescription": "Cycles with less than 2 uops delivered by the front-end.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x83",
+ "EventName": "ICACHE_64B.IFTAG_STALL",
+ "SampleAfterValue": "200003",
+ "UMask": "0x4"
},
{
- "EventCode": "0x9C",
- "UMask": "0x1",
- "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
+ "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.",
"Counter": "0,1,2,3",
- "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
- "CounterMask": "3",
- "PublicDescription": "Counts, on the per-thread basis, cycles when less than 1 uop is delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >= 3.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xAB",
+ "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
+ "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x2"
},
{
- "EventCode": "0x9C",
- "UMask": "0x1",
- "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
"Counter": "0,1,2,3",
- "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
- "CounterMask": "4",
- "PublicDescription": "Counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x79",
+ "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
+ "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x18"
},
{
- "EventCode": "0x9C",
- "UMask": "0x1",
- "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled",
+ "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.",
"Counter": "0,1,2,3",
- "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
- "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding \u201c4 \u2013 x\u201d when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions). c. Instruction Decode Queue (IDQ) delivers four uops.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xC6",
+ "EventName": "FRONTEND_RETIRED.STLB_MISS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x15",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "EventCode": "0xAB",
- "UMask": "0x2",
- "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
"Counter": "0,1,2,3",
- "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
- "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 0\u20132 cycles.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x79",
+ "EventName": "IDQ.DSB_UOPS",
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x8"
},
{
- "EventCode": "0xC6",
- "UMask": "0x1",
- "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall. Precise Event.",
- "PEBS": "1",
- "MSRValue": "0x400406",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.",
"Counter": "0,1,2,3",
- "EventName": "FRONTEND_RETIRED.LATENCY_GE_4",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xC6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_512",
"MSRIndex": "0x3F7",
- "TakenAlone": "1",
+ "MSRValue": "0x420006",
+ "PEBS": "1",
"SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "EventCode": "0xC6",
- "UMask": "0x1",
- "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
- "PEBS": "1",
- "MSRValue": "0x200206",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.",
"Counter": "0,1,2,3",
- "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_2",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xC6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_8",
"MSRIndex": "0x3F7",
+ "MSRValue": "0x400806",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.",
+ "SampleAfterValue": "100007",
"TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired instructions after front-end starvation of at least 1 cycle",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_1",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x400106",
+ "PEBS": "2",
+ "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 1 cycle which was not interrupted by a back-end stall.",
"SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "EventCode": "0xC6",
- "UMask": "0x1",
- "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
- "PEBS": "1",
- "MSRValue": "0x400206",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall.",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xC6",
"EventName": "FRONTEND_RETIRED.LATENCY_GE_2",
"MSRIndex": "0x3F7",
- "TakenAlone": "1",
+ "MSRValue": "0x400206",
+ "PEBS": "1",
"SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "EventCode": "0xC6",
- "UMask": "0x1",
- "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss. Precise Event.",
- "PEBS": "1",
- "MSRValue": "0x15",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.",
"Counter": "0,1,2,3",
- "EventName": "FRONTEND_RETIRED.STLB_MISS",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xC6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_4",
"MSRIndex": "0x3F7",
- "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.",
- "TakenAlone": "1",
+ "MSRValue": "0x400406",
+ "PEBS": "1",
"SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "EventCode": "0xC6",
- "UMask": "0x1",
- "BriefDescription": "Retired Instructions who experienced iTLB true miss. Precise Event.",
- "PEBS": "1",
- "MSRValue": "0x14",
+ "BriefDescription": "Cycles MITE is delivering 4 Uops",
"Counter": "0,1,2,3",
- "EventName": "FRONTEND_RETIRED.ITLB_MISS",
- "MSRIndex": "0x3F7",
- "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.",
- "TakenAlone": "1",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "4",
+ "EventCode": "0x79",
+ "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
+ "PublicDescription": "Counts the number of cycles 4 uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x24"
},
{
- "EventCode": "0xC6",
- "UMask": "0x1",
- "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss. Precise Event.",
- "PEBS": "1",
- "MSRValue": "0x13",
+ "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
"Counter": "0,1,2,3",
- "EventName": "FRONTEND_RETIRED.L2_MISS",
- "MSRIndex": "0x3F7",
- "TakenAlone": "1",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MS_DSB_CYCLES",
+ "PublicDescription": "Counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x10"
},
{
- "EventCode": "0xC6",
- "UMask": "0x1",
- "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss. Precise Event.",
- "PEBS": "1",
- "MSRValue": "0x12",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
"Counter": "0,1,2,3",
- "EventName": "FRONTEND_RETIRED.L1I_MISS",
- "MSRIndex": "0x3F7",
- "TakenAlone": "1",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MS_UOPS",
+ "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x30"
},
{
- "EventCode": "0xC6",
- "UMask": "0x1",
- "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss. Precise Event.",
- "PEBS": "1",
- "MSRValue": "0x11",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.",
"Counter": "0,1,2,3",
- "EventName": "FRONTEND_RETIRED.DSB_MISS",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xC6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_256",
"MSRIndex": "0x3F7",
- "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.",
- "TakenAlone": "1",
+ "MSRValue": "0x410006",
+ "PEBS": "1",
"SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3",
"EventCode": "0xC6",
- "UMask": "0x1",
- "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_2",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x200206",
"PEBS": "1",
- "MSRValue": "0x300206",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall.",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xC6",
"EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_3",
"MSRIndex": "0x3F7",
- "TakenAlone": "1",
+ "MSRValue": "0x300206",
+ "PEBS": "1",
"SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "EventCode": "0xC6",
- "UMask": "0x1",
- "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
- "PEBS": "1",
- "MSRValue": "0x100206",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall.",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xC6",
"EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1",
"MSRIndex": "0x3F7",
+ "MSRValue": "0x100206",
+ "PEBS": "1",
"PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.",
- "TakenAlone": "1",
"SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
- },
- {
- "EventCode": "0xC6",
- "UMask": "0x1",
- "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall. Precise Event.",
- "PEBS": "1",
- "MSRValue": "0x420006",
- "Counter": "0,1,2,3",
- "EventName": "FRONTEND_RETIRED.LATENCY_GE_512",
- "MSRIndex": "0x3F7",
"TakenAlone": "1",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "EventCode": "0xC6",
- "UMask": "0x1",
- "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall. Precise Event.",
- "PEBS": "1",
- "MSRValue": "0x410006",
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
"Counter": "0,1,2,3",
- "EventName": "FRONTEND_RETIRED.LATENCY_GE_256",
- "MSRIndex": "0x3F7",
- "TakenAlone": "1",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "4",
+ "EventCode": "0x79",
+ "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
+ "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x18"
},
{
- "EventCode": "0xC6",
- "UMask": "0x1",
- "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall. Precise Event.",
- "PEBS": "1",
- "MSRValue": "0x408006",
+ "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switches",
"Counter": "0,1,2,3",
- "EventName": "FRONTEND_RETIRED.LATENCY_GE_128",
- "MSRIndex": "0x3F7",
- "TakenAlone": "1",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xAB",
+ "EventName": "DSB2MITE_SWITCHES.COUNT",
+ "PublicDescription": "This event counts the number of the Decode Stream Buffer (DSB)-to-MITE switches including all misses because of missing Decode Stream Buffer (DSB) cache and u-arch forced misses.\nNote: Invoking MITE requires two or three cycles delay.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xC6",
- "UMask": "0x1",
- "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall. Precise Event.",
- "PEBS": "1",
- "MSRValue": "0x404006",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall.",
"Counter": "0,1,2,3",
- "EventName": "FRONTEND_RETIRED.LATENCY_GE_64",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xC6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_32",
"MSRIndex": "0x3F7",
- "TakenAlone": "1",
+ "MSRValue": "0x402006",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.",
"SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "EventCode": "0xC6",
- "UMask": "0x1",
- "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall. Precise Event.",
- "PEBS": "1",
- "MSRValue": "0x402006",
+ "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.",
"Counter": "0,1,2,3",
- "EventName": "FRONTEND_RETIRED.LATENCY_GE_32",
- "MSRIndex": "0x3F7",
- "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.",
- "TakenAlone": "1",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x9C",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
+ "Invert": "1",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xC6",
- "UMask": "0x1",
- "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall. Precise Event.",
- "PEBS": "1",
- "MSRValue": "0x401006",
+ "BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
"Counter": "0,1,2,3",
- "EventName": "FRONTEND_RETIRED.LATENCY_GE_16",
- "MSRIndex": "0x3F7",
- "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.",
- "TakenAlone": "1",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x83",
+ "EventName": "ICACHE_64B.IFTAG_MISS",
+ "SampleAfterValue": "200003",
+ "UMask": "0x2"
},
{
- "EventCode": "0xC6",
- "UMask": "0x1",
- "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.",
- "PEBS": "1",
- "MSRValue": "0x400806",
+ "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss.",
"Counter": "0,1,2,3",
- "EventName": "FRONTEND_RETIRED.LATENCY_GE_8",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xC6",
+ "EventName": "FRONTEND_RETIRED.L1I_MISS",
"MSRIndex": "0x3F7",
- "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.",
- "TakenAlone": "1",
+ "MSRValue": "0x12",
+ "PEBS": "1",
"SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "TakenAlone": "1",
+ "UMask": "0x1"
}
] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/memory.json b/tools/perf/pmu-events/arch/x86/skylakex/memory.json
index 48a9cdf81307..6c3fd89d204d 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/memory.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/memory.json
@@ -1,1396 +1,1403 @@
[
{
- "EventCode": "0x54",
- "UMask": "0x1",
- "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address",
+ "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from remote dram.",
"Counter": "0,1,2,3",
- "EventName": "TX_MEM.ABORT_CONFLICT",
- "PublicDescription": "Number of times a TSX line had a cache conflict.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063B800122",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x54",
- "UMask": "0x2",
- "BriefDescription": "Number of times a transactional abort was signaled due to a data capacity limitation for transactional reads or writes.",
+ "BriefDescription": "Counts all demand & prefetch RFOs that miss in the L3.",
"Counter": "0,1,2,3",
- "EventName": "TX_MEM.ABORT_CAPACITY",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FBC000122",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x54",
- "UMask": "0x4",
- "BriefDescription": "Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer",
+ "BriefDescription": "Cycles with at least 6 Demand Data Read requests that miss L3 cache in the superQ.",
"Counter": "0,1,2,3",
- "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK",
- "PublicDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "6",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x10"
},
{
- "EventCode": "0x54",
- "UMask": "0x8",
- "BriefDescription": "Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero.",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from remote dram.",
"Counter": "0,1,2,3",
- "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY",
- "PublicDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063B800020",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x54",
- "UMask": "0x10",
- "BriefDescription": "Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3.",
"Counter": "0,1,2,3",
- "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH",
- "PublicDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FBC000100",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x54",
- "UMask": "0x20",
- "BriefDescription": "Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer.",
+ "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT",
- "PublicDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x103FC00002",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x54",
- "UMask": "0x40",
- "BriefDescription": "Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero.",
+ "BriefDescription": "Number of times an HLE execution aborted due to hardware timer expiration.",
"Counter": "0,1,2,3",
- "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL",
- "PublicDescription": "Number of times we could not allocate Lock Buffer.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC8",
+ "EventName": "HLE_RETIRED.ABORTED_TIMER",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x10"
},
{
- "EventCode": "0x5d",
- "UMask": "0x1",
- "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.",
+ "BriefDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from remote dram.",
"Counter": "0,1,2,3",
- "EventName": "TX_EXEC.MISC1",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063B800490",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x5d",
- "UMask": "0x2",
- "BriefDescription": "Counts the number of times a class of instructions (e.g., vzeroupper) that may cause a transactional abort was executed inside a transactional region",
+ "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)",
"Counter": "0,1,2,3",
- "EventName": "TX_EXEC.MISC2",
- "PublicDescription": "Unfriendly TSX abort triggered by a vzeroupper instruction.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC9",
+ "EventName": "RTM_RETIRED.ABORTED_MEM",
+ "PublicDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x8"
},
{
- "EventCode": "0x5d",
- "UMask": "0x4",
- "BriefDescription": "Counts the number of times an instruction execution caused the transactional nest count supported to be exceeded",
+ "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from remote dram.",
"Counter": "0,1,2,3",
- "EventName": "TX_EXEC.MISC3",
- "PublicDescription": "Unfriendly TSX abort triggered by a nest count that is too deep.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063B800004",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x5d",
- "UMask": "0x8",
- "BriefDescription": "Counts the number of times a XBEGIN instruction was executed inside an HLE transactional region.",
+ "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type",
"Counter": "0,1,2,3",
- "EventName": "TX_EXEC.MISC4",
- "PublicDescription": "RTM region detected inside HLE.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC9",
+ "EventName": "RTM_RETIRED.ABORTED_MEMTYPE",
+ "PublicDescription": "Number of times an RTM execution aborted due to incompatible memory type.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x40"
},
{
- "EventCode": "0x5d",
- "UMask": "0x10",
- "BriefDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region",
+ "BriefDescription": "Counts all demand data writes (RFOs) that miss in the L3.",
"Counter": "0,1,2,3",
- "EventName": "TX_EXEC.MISC5",
- "PublicDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FBC000002",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x60",
- "UMask": "0x10",
- "BriefDescription": "Counts number of Offcore outstanding Demand Data Read requests that miss L3 cache in the superQ every cycle.",
+ "BriefDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local or remote dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063FC00120",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x60",
- "UMask": "0x10",
- "BriefDescription": "Cycles with at least 6 Demand Data Read requests that miss L3 cache in the superQ.",
+ "BriefDescription": "Counts all prefetch data reads that miss in the L3.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6",
- "CounterMask": "6",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FBC000490",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x60",
- "UMask": "0x10",
- "BriefDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.",
+ "BriefDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD",
- "CounterMask": "1",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0604000120",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xA3",
- "UMask": "0x2",
- "BriefDescription": "Cycles while L3 cache miss demand load is outstanding.",
+ "BriefDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local dram.",
"Counter": "0,1,2,3",
- "EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS",
- "CounterMask": "2",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0604000490",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xA3",
- "UMask": "0x6",
- "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.",
+ "BriefDescription": "Cycles while L3 cache miss demand load is outstanding.",
"Counter": "0,1,2,3",
- "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS",
- "CounterMask": "6",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "2",
+ "EventCode": "0xA3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x2"
},
{
- "EventCode": "0xB0",
- "UMask": "0x10",
- "BriefDescription": "Demand Data Read requests who miss L3 cache",
+ "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
- "PublicDescription": "Demand Data Read requests who miss L3 cache.",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0604000491",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xC3",
- "UMask": "0x2",
- "BriefDescription": "Counts the number of machine clears due to memory order conflicts.",
+ "BriefDescription": "Counts all demand code reads that miss the L3 and clean or shared data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
- "Errata": "SKL089",
- "PublicDescription": "Counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from one of the following:a. memory disambiguation,b. external snoop, orc. cross SMT-HW-thread snoop (stores) hitting load buffer.",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x083FC00004",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xC8",
- "UMask": "0x1",
- "BriefDescription": "Number of times an HLE execution started.",
+ "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from local or remote dram.",
"Counter": "0,1,2,3",
- "EventName": "HLE_RETIRED.START",
- "PublicDescription": "Number of times we entered an HLE region. Does not count nested transactions.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063FC00004",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xC8",
- "UMask": "0x2",
- "BriefDescription": "Number of times an HLE execution successfully committed",
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.",
"Counter": "0,1,2,3",
- "EventName": "HLE_RETIRED.COMMIT",
- "PublicDescription": "Number of times HLE commit succeeded.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x20",
+ "PEBS": "2",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "EventCode": "0xC8",
- "UMask": "0x4",
- "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one).",
- "PEBS": "1",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the modified data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "HLE_RETIRED.ABORTED",
- "PublicDescription": "Number of times HLE abort was triggered. (PEBS)",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x103FC00020",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xC8",
- "UMask": "0x8",
- "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
+ "BriefDescription": "Counts all prefetch data reads that miss the L3 and the modified data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "HLE_RETIRED.ABORTED_MEM",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x103FC00490",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xC8",
- "UMask": "0x10",
- "BriefDescription": "Number of times an HLE execution aborted due to hardware timer expiration.",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3.",
"Counter": "0,1,2,3",
- "EventName": "HLE_RETIRED.ABORTED_TIMER",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FBC000020",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xC8",
- "UMask": "0x20",
- "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).",
+ "BriefDescription": "Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer",
"Counter": "0,1,2,3",
- "EventName": "HLE_RETIRED.ABORTED_UNFRIENDLY",
- "PublicDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x54",
+ "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK",
+ "PublicDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x4"
},
{
- "EventCode": "0xC8",
- "UMask": "0x40",
- "BriefDescription": "Number of times an HLE execution aborted due to incompatible memory type",
+ "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address",
"Counter": "0,1,2,3",
- "EventName": "HLE_RETIRED.ABORTED_MEMTYPE",
- "PublicDescription": "Number of times an HLE execution aborted due to incompatible memory type.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x54",
+ "EventName": "TX_MEM.ABORT_CONFLICT",
+ "PublicDescription": "Number of times a TSX line had a cache conflict.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xC8",
- "UMask": "0x80",
- "BriefDescription": "Number of times an HLE execution aborted due to unfriendly events (such as interrupts).",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss in the L3.",
"Counter": "0,1,2,3",
- "EventName": "HLE_RETIRED.ABORTED_EVENTS",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FBC000400",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xC9",
- "UMask": "0x1",
- "BriefDescription": "Number of times an RTM execution started.",
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.",
"Counter": "0,1,2,3",
- "EventName": "RTM_RETIRED.START",
- "PublicDescription": "Number of times we entered an RTM region. Does not count nested transactions.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x40",
+ "PEBS": "2",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency.",
+ "SampleAfterValue": "2003",
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "EventCode": "0xC9",
- "UMask": "0x2",
- "BriefDescription": "Number of times an RTM execution successfully committed",
+ "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and clean or shared data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "RTM_RETIRED.COMMIT",
- "PublicDescription": "Number of times RTM commit succeeded.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x083FC00002",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xC9",
- "UMask": "0x4",
- "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one).",
- "PEBS": "1",
+ "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
"Counter": "0,1,2,3",
- "EventName": "RTM_RETIRED.ABORTED",
- "PublicDescription": "Number of times RTM abort was triggered. (PEBS)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC8",
+ "EventName": "HLE_RETIRED.ABORTED_MEM",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x8"
},
{
- "EventCode": "0xC9",
- "UMask": "0x8",
- "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)",
+ "BriefDescription": "Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero.",
"Counter": "0,1,2,3",
- "EventName": "RTM_RETIRED.ABORTED_MEM",
- "PublicDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x54",
+ "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY",
+ "PublicDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x8"
},
{
- "EventCode": "0xC9",
- "UMask": "0x10",
- "BriefDescription": "Number of times an RTM execution aborted due to uncommon conditions.",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the modified data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "RTM_RETIRED.ABORTED_TIMER",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x103FC00080",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xC9",
- "UMask": "0x20",
- "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions",
+ "BriefDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region",
"Counter": "0,1,2,3",
- "EventName": "RTM_RETIRED.ABORTED_UNFRIENDLY",
- "PublicDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x5d",
+ "EventName": "TX_EXEC.MISC5",
+ "PublicDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x10"
},
{
- "EventCode": "0xC9",
- "UMask": "0x40",
- "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type",
+ "BriefDescription": "Counts the number of times a XBEGIN instruction was executed inside an HLE transactional region.",
"Counter": "0,1,2,3",
- "EventName": "RTM_RETIRED.ABORTED_MEMTYPE",
- "PublicDescription": "Number of times an RTM execution aborted due to incompatible memory type.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x5d",
+ "EventName": "TX_EXEC.MISC4",
+ "PublicDescription": "RTM region detected inside HLE.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x8"
},
{
- "EventCode": "0xC9",
- "UMask": "0x80",
- "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)",
+ "BriefDescription": "Counts the number of times an instruction execution caused the transactional nest count supported to be exceeded",
"Counter": "0,1,2,3",
- "EventName": "RTM_RETIRED.ABORTED_EVENTS",
- "PublicDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x5d",
+ "EventName": "TX_EXEC.MISC3",
+ "PublicDescription": "Unfriendly TSX abort triggered by a nest count that is too deep.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x4"
},
{
- "EventCode": "0xCD",
- "UMask": "0x1",
- "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.",
- "PEBS": "2",
- "MSRValue": "0x200",
+ "BriefDescription": "Counts the number of times a class of instructions (e.g., vzeroupper) that may cause a transactional abort was executed inside a transactional region",
"Counter": "0,1,2,3",
- "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
- "MSRIndex": "0x3F6",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency.",
- "TakenAlone": "1",
- "SampleAfterValue": "101",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x5d",
+ "EventName": "TX_EXEC.MISC2",
+ "PublicDescription": "Unfriendly TSX abort triggered by a vzeroupper instruction.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
},
{
- "EventCode": "0xCD",
- "UMask": "0x1",
- "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.",
- "PEBS": "2",
- "MSRValue": "0x100",
+ "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.",
"Counter": "0,1,2,3",
- "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
- "MSRIndex": "0x3F6",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency.",
- "TakenAlone": "1",
- "SampleAfterValue": "503",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x5d",
+ "EventName": "TX_EXEC.MISC1",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xCD",
- "UMask": "0x1",
- "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.",
- "PEBS": "2",
- "MSRValue": "0x80",
+ "BriefDescription": "Number of times an RTM execution successfully committed",
"Counter": "0,1,2,3",
- "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
- "MSRIndex": "0x3F6",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency.",
- "TakenAlone": "1",
- "SampleAfterValue": "1009",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC9",
+ "EventName": "RTM_RETIRED.COMMIT",
+ "PublicDescription": "Number of times RTM commit succeeded.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
},
{
- "EventCode": "0xCD",
- "UMask": "0x1",
- "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.",
- "PEBS": "2",
- "MSRValue": "0x40",
+ "BriefDescription": "Counts prefetch RFOs that miss in the L3.",
"Counter": "0,1,2,3",
- "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
- "MSRIndex": "0x3F6",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency.",
- "TakenAlone": "1",
- "SampleAfterValue": "2003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FBC000120",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xCD",
- "UMask": "0x1",
- "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.",
- "PEBS": "2",
- "MSRValue": "0x20",
+ "BriefDescription": "Counts number of Offcore outstanding Demand Data Read requests that miss L3 cache in the superQ every cycle.",
"Counter": "0,1,2,3",
- "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
- "MSRIndex": "0x3F6",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency.",
- "TakenAlone": "1",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x10"
},
{
- "EventCode": "0xCD",
- "UMask": "0x1",
- "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.",
- "PEBS": "2",
- "MSRValue": "0x10",
+ "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram.",
"Counter": "0,1,2,3",
- "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
- "MSRIndex": "0x3F6",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency.",
- "TakenAlone": "1",
- "SampleAfterValue": "20011",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063B800491",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xCD",
- "UMask": "0x1",
- "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.",
- "PEBS": "2",
- "MSRValue": "0x8",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from remote dram.",
"Counter": "0,1,2,3",
- "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
- "MSRIndex": "0x3F6",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency.",
- "TakenAlone": "1",
- "SampleAfterValue": "50021",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063B800080",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xCD",
- "UMask": "0x1",
- "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.",
- "PEBS": "2",
- "MSRValue": "0x4",
+ "BriefDescription": "Counts all prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
- "MSRIndex": "0x3F6",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency.",
- "TakenAlone": "1",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x083FC00490",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand data reads TBD TBD",
- "MSRValue": "0x3FBC000001",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand data reads TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0604000010",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand data reads TBD",
- "MSRValue": "0x083FC00001",
+ "BriefDescription": "Number of times an RTM execution aborted due to uncommon conditions.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand data reads TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC9",
+ "EventName": "RTM_RETIRED.ABORTED_TIMER",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x10"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand data reads TBD",
- "MSRValue": "0x103FC00001",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local or remote dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HITM",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand data reads TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063FC00020",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand data reads TBD",
- "MSRValue": "0x063FC00001",
+ "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local or remote dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand data reads TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063FC00002",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand data reads TBD",
- "MSRValue": "0x063B800001",
+ "BriefDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local or remote dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand data reads TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063FC00490",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand data reads TBD",
- "MSRValue": "0x0604000001",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from remote dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand data reads TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063B800100",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all demand data writes (RFOs) TBD TBD",
- "MSRValue": "0x3FBC000002",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the modified data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all demand data writes (RFOs) TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x103FC00010",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all demand data writes (RFOs) TBD",
- "MSRValue": "0x083FC00002",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local or remote dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HIT_FORWARD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all demand data writes (RFOs) TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063FC00010",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all demand data writes (RFOs) TBD",
- "MSRValue": "0x103FC00002",
+ "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HITM",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all demand data writes (RFOs) TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC8",
+ "EventName": "HLE_RETIRED.ABORTED_UNFRIENDLY",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x20"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all demand data writes (RFOs) TBD",
- "MSRValue": "0x063FC00002",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from remote dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all demand data writes (RFOs) TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063B800400",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all demand data writes (RFOs) TBD",
- "MSRValue": "0x063B800002",
+ "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all demand data writes (RFOs) TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x083FC00122",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all demand data writes (RFOs) TBD",
- "MSRValue": "0x0604000002",
+ "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the modified data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all demand data writes (RFOs) TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x103FC00122",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD",
- "MSRValue": "0x3FBC000004",
+ "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from remote dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063B800001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD",
- "MSRValue": "0x083FC00004",
+ "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HIT_FORWARD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0604000001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD",
- "MSRValue": "0x103FC00004",
+ "BriefDescription": "Number of times an HLE execution successfully committed",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HITM",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC8",
+ "EventName": "HLE_RETIRED.COMMIT",
+ "PublicDescription": "Number of times HLE commit succeeded.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD",
- "MSRValue": "0x063FC00004",
+ "BriefDescription": "Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x54",
+ "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL",
+ "PublicDescription": "Number of times we could not allocate Lock Buffer.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x40"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD",
- "MSRValue": "0x063B800004",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and clean or shared data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x083FC00100",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD",
- "MSRValue": "0x0604000004",
+ "BriefDescription": "Counts demand data reads that miss the L3 and the modified data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x103FC00001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD",
- "MSRValue": "0x3FBC000010",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0604000020",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD",
- "MSRValue": "0x083FC00010",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and clean or shared data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x083FC00080",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD",
- "MSRValue": "0x103FC00010",
+ "BriefDescription": "Demand Data Read requests who miss L3 cache",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HITM",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xB0",
+ "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
+ "PublicDescription": "Demand Data Read requests who miss L3 cache.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x10"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD",
- "MSRValue": "0x063FC00010",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local or remote dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063FC00080",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD",
- "MSRValue": "0x063B800010",
+ "BriefDescription": "Counts demand data reads that miss in the L3.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FBC000001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD",
- "MSRValue": "0x0604000010",
+ "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC9",
+ "EventName": "RTM_RETIRED.ABORTED_UNFRIENDLY",
+ "PublicDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x20"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD",
- "MSRValue": "0x3FBC000020",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and clean or shared data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x083FC00010",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD",
- "MSRValue": "0x083FC00020",
+ "BriefDescription": "Counts prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HIT_FORWARD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x083FC00120",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD",
- "MSRValue": "0x103FC00020",
+ "BriefDescription": "Counts all demand code reads that miss the L3 and the modified data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HITM",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x103FC00004",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD",
- "MSRValue": "0x063FC00020",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the modified data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x103FC00100",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD",
- "MSRValue": "0x063B800020",
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x100",
+ "PEBS": "2",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency.",
+ "SampleAfterValue": "503",
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD",
- "MSRValue": "0x0604000020",
+ "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local or remote dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063FC00122",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD",
- "MSRValue": "0x3FBC000080",
+ "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from local or remote dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063FC00001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD",
- "MSRValue": "0x083FC00080",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and clean or shared data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x083FC00020",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD",
- "MSRValue": "0x103FC00080",
+ "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from remote dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HITM",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063B800002",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD",
- "MSRValue": "0x063FC00080",
+ "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x103FC00491",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD",
- "MSRValue": "0x063B800080",
+ "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one).",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC9",
+ "EventName": "RTM_RETIRED.ABORTED",
+ "PEBS": "1",
+ "PublicDescription": "Number of times RTM abort was triggered.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD",
- "MSRValue": "0x0604000080",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local or remote dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063FC00400",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD",
- "MSRValue": "0x3FBC000100",
+ "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one).",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC8",
+ "EventName": "HLE_RETIRED.ABORTED",
+ "PEBS": "1",
+ "PublicDescription": "Number of times HLE abort was triggered.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD",
- "MSRValue": "0x083FC00100",
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HIT_FORWARD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x10",
+ "PEBS": "2",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency.",
+ "SampleAfterValue": "20011",
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD",
- "MSRValue": "0x103FC00100",
+ "BriefDescription": "Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HITM",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x54",
+ "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT",
+ "PublicDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x20"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD",
- "MSRValue": "0x063FC00100",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and clean or shared data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x083FC00400",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD",
- "MSRValue": "0x063B800100",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from remote dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063B800010",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD",
- "MSRValue": "0x0604000100",
+ "BriefDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x10"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD",
- "MSRValue": "0x3FBC000400",
+ "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0604000002",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD",
- "MSRValue": "0x083FC00400",
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HIT_FORWARD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x200",
+ "PEBS": "2",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency.",
+ "SampleAfterValue": "101",
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD",
- "MSRValue": "0x103FC00400",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the modified data is transferred from remote cache.",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HITM",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x103FC00400",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD",
- "MSRValue": "0x063FC00400",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FBC000010",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD",
- "MSRValue": "0x063B800400",
+ "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0604000004",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD",
- "MSRValue": "0x0604000400",
+ "BriefDescription": "Number of times a transactional abort was signaled due to a data capacity limitation for transactional reads or writes.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x54",
+ "EventName": "TX_MEM.ABORT_CAPACITY",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x3FBC000490",
+ "BriefDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from remote dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063B800120",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x083FC00490",
+ "BriefDescription": "Number of times an HLE execution aborted due to incompatible memory type",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC8",
+ "EventName": "HLE_RETIRED.ABORTED_MEMTYPE",
+ "PublicDescription": "Number of times an HLE execution aborted due to incompatible memory type.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x40"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x103FC00490",
+ "BriefDescription": "Number of times an RTM execution started.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HITM",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC9",
+ "EventName": "RTM_RETIRED.START",
+ "PublicDescription": "Number of times we entered an RTM region. Does not count nested transactions.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x063FC00490",
+ "BriefDescription": "Counts the number of machine clears due to memory order conflicts.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "Errata": "SKL089",
+ "EventCode": "0xC3",
+ "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+ "PublicDescription": "Counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from one of the following:a. memory disambiguation,b. external snoop, orc. cross SMT-HW-thread snoop (stores) hitting load buffer.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x2"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x063B800490",
+ "BriefDescription": "Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x54",
+ "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH",
+ "PublicDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x10"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x0604000490",
+ "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local or remote dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063FC00491",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x3FBC000120",
+ "BriefDescription": "Counts all demand & prefetch data reads that miss in the L3.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FBC000491",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x083FC00120",
+ "BriefDescription": "Counts demand data reads that miss the L3 and clean or shared data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x083FC00001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x103FC00120",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HITM",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FBC000080",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x063FC00120",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0604000100",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x063B800120",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0604000080",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x0604000120",
+ "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC9",
+ "EventName": "RTM_RETIRED.ABORTED_EVENTS",
+ "PublicDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt).",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x80"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x3FBC000491",
+ "BriefDescription": "Number of times an HLE execution started.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC8",
+ "EventName": "HLE_RETIRED.START",
+ "PublicDescription": "Number of times we entered an HLE region. Does not count nested transactions.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x083FC00491",
+ "BriefDescription": "Counts all demand code reads that miss in the L3.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FBC000004",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x103FC00491",
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HITM",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x80",
+ "PEBS": "2",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency.",
+ "SampleAfterValue": "1009",
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x063FC00491",
+ "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x083FC00491",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x063B800491",
+ "BriefDescription": "Counts prefetch RFOs that miss the L3 and the modified data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x103FC00120",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x0604000491",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local or remote dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063FC00100",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x3FBC000122",
+ "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "6",
+ "EventCode": "0xA3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x6"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x083FC00122",
+ "BriefDescription": "Number of times an HLE execution aborted due to unfriendly events (such as interrupts).",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC8",
+ "EventName": "HLE_RETIRED.ABORTED_EVENTS",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x80"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x103FC00122",
+ "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HITM",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0604000122",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x063FC00122",
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x4",
+ "PEBS": "2",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x063B800122",
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x8",
+ "PEBS": "2",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency.",
+ "SampleAfterValue": "50021",
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x0604000122",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0604000400",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
}
] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/other.json b/tools/perf/pmu-events/arch/x86/skylakex/other.json
index 778a541463eb..f6b147ba8ef6 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/other.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/other.json
@@ -1,164 +1,116 @@
[
{
- "EventCode": "0x28",
- "UMask": "0x7",
- "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the Non-AVX turbo schedule.",
+ "BriefDescription": "Core cycles the core was throttled due to a pending power level request.",
"Counter": "0,1,2,3",
- "EventName": "CORE_POWER.LVL0_TURBO_LICENSE",
- "PublicDescription": "Core cycles where the core was running with power-delivery for baseline license level 0. This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes.",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
- },
- {
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
"EventCode": "0x28",
- "UMask": "0x18",
- "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX2 turbo schedule.",
- "Counter": "0,1,2,3",
- "EventName": "CORE_POWER.LVL1_TURBO_LICENSE",
- "PublicDescription": "Core cycles where the core was running with power-delivery for license level 1. This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions.",
+ "EventName": "CORE_POWER.THROTTLE",
+ "PublicDescription": "Core cycles the out-of-order engine was throttled due to a pending power level request.",
"SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x40"
},
{
- "EventCode": "0x28",
- "UMask": "0x20",
- "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX512 turbo schedule.",
+ "BriefDescription": "Counts number of cache lines that are dropped and not written back to L3 as they are deemed to be less likely to be reused shortly",
"Counter": "0,1,2,3",
- "EventName": "CORE_POWER.LVL2_TURBO_LICENSE",
- "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server michroarchtecture). This includes high current AVX 512-bit instructions.",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xFE",
+ "EventName": "IDI_MISC.WB_DOWNGRADE",
+ "PublicDescription": "Counts number of cache lines that are dropped and not written back to L3 as they are deemed to be less likely to be reused shortly.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4"
},
{
- "EventCode": "0x28",
- "UMask": "0x40",
- "BriefDescription": "Core cycles the core was throttled due to a pending power level request.",
+ "BriefDescription": "Number of PREFETCHW instructions executed.",
"Counter": "0,1,2,3",
- "EventName": "CORE_POWER.THROTTLE",
- "PublicDescription": "Core cycles the out-of-order engine was throttled due to a pending power level request.",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
- },
- {
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
"EventCode": "0x32",
- "UMask": "0x1",
- "BriefDescription": "Number of PREFETCHNTA instructions executed.",
- "Counter": "0,1,2,3",
- "EventName": "SW_PREFETCH_ACCESS.NTA",
+ "EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x8"
},
{
- "EventCode": "0x32",
- "UMask": "0x2",
- "BriefDescription": "Number of PREFETCHT0 instructions executed.",
+ "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the Non-AVX turbo schedule.",
"Counter": "0,1,2,3",
- "EventName": "SW_PREFETCH_ACCESS.T0",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x28",
+ "EventName": "CORE_POWER.LVL0_TURBO_LICENSE",
+ "PublicDescription": "Core cycles where the core was running with power-delivery for baseline license level 0. This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x7"
},
{
- "EventCode": "0x32",
- "UMask": "0x4",
- "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.",
+ "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX2 turbo schedule.",
"Counter": "0,1,2,3",
- "EventName": "SW_PREFETCH_ACCESS.T1_T2",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x28",
+ "EventName": "CORE_POWER.LVL1_TURBO_LICENSE",
+ "PublicDescription": "Core cycles where the core was running with power-delivery for license level 1. This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x18"
},
{
- "EventCode": "0x32",
- "UMask": "0x8",
- "BriefDescription": "Number of PREFETCHW instructions executed.",
+ "BriefDescription": "Number of PREFETCHT0 instructions executed.",
"Counter": "0,1,2,3",
- "EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x32",
+ "EventName": "SW_PREFETCH_ACCESS.T0",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x2"
},
{
- "EventCode": "0xCB",
- "UMask": "0x1",
"BriefDescription": "Number of hardware interrupts received by the processor.",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xCB",
"EventName": "HW_INTERRUPTS.RECEIVED",
"PublicDescription": "Counts the number of hardware interruptions received by the processor.",
"SampleAfterValue": "203",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
- },
- {
- "EventCode": "0xEF",
- "UMask": "0x1",
- "Counter": "0,1,2,3",
- "EventName": "CORE_SNOOP_RESPONSE.RSP_IHITI",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
- },
- {
- "EventCode": "0xEF",
- "UMask": "0x2",
- "Counter": "0,1,2,3",
- "EventName": "CORE_SNOOP_RESPONSE.RSP_IHITFSE",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
- },
- {
- "EventCode": "0xEF",
- "UMask": "0x4",
- "Counter": "0,1,2,3",
- "EventName": "CORE_SNOOP_RESPONSE.RSP_SHITFSE",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xEF",
- "UMask": "0x8",
+ "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX512 turbo schedule.",
"Counter": "0,1,2,3",
- "EventName": "CORE_SNOOP_RESPONSE.RSP_SFWDM",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x28",
+ "EventName": "CORE_POWER.LVL2_TURBO_LICENSE",
+ "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server michroarchtecture). This includes high current AVX 512-bit instructions.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x20"
},
{
- "EventCode": "0xEF",
- "UMask": "0x10",
+ "BriefDescription": "Number of PREFETCHNTA instructions executed.",
"Counter": "0,1,2,3",
- "EventName": "CORE_SNOOP_RESPONSE.RSP_IFWDM",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x32",
+ "EventName": "SW_PREFETCH_ACCESS.NTA",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xEF",
- "UMask": "0x20",
+ "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.",
"Counter": "0,1,2,3",
- "EventName": "CORE_SNOOP_RESPONSE.RSP_IFWDFE",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x32",
+ "EventName": "SW_PREFETCH_ACCESS.T1_T2",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x4"
},
{
- "EventCode": "0xEF",
- "UMask": "0x40",
"Counter": "0,1,2,3",
- "EventName": "CORE_SNOOP_RESPONSE.RSP_SFWDFE",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x09",
+ "EventName": "MEMORY_DISAMBIGUATION.HISTORY_RESET",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xFE",
- "UMask": "0x2",
"BriefDescription": "Counts number of cache lines that are allocated and written back to L3 with the intention that they are more likely to be reused shortly",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xFE",
"EventName": "IDI_MISC.WB_UPGRADE",
"PublicDescription": "Counts number of cache lines that are allocated and written back to L3 with the intention that they are more likely to be reused shortly.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
- },
- {
- "EventCode": "0xFE",
- "UMask": "0x4",
- "BriefDescription": "Counts number of cache lines that are dropped and not written back to L3 as they are deemed to be less likely to be reused shortly",
- "Counter": "0,1,2,3",
- "EventName": "IDI_MISC.WB_DOWNGRADE",
- "PublicDescription": "Counts number of cache lines that are dropped and not written back to L3 as they are deemed to be less likely to be reused shortly.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x2"
}
] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json
index 369f56c1d1b5..3bfc6943ddf9 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json
@@ -1,967 +1,959 @@
[
{
- "UMask": "0x1",
- "BriefDescription": "Instructions retired from execution.",
- "Counter": "Fixed counter 0",
- "EventName": "INST_RETIRED.ANY",
- "PublicDescription": "Counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, Counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "Fixed counter 0"
- },
- {
- "UMask": "0x2",
- "BriefDescription": "Core cycles when the thread is not in halt state",
- "Counter": "Fixed counter 1",
- "EventName": "CPU_CLK_UNHALTED.THREAD",
- "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "Fixed counter 1"
- },
- {
- "UMask": "0x2",
- "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
- "Counter": "Fixed counter 1",
- "EventName": "CPU_CLK_UNHALTED.THREAD_ANY",
- "AnyThread": "1",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "Fixed counter 1"
+ "BriefDescription": "Number of instructions retired. General Counter - architectural event",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "Errata": "SKL091, SKL044",
+ "EventCode": "0xC0",
+ "EventName": "INST_RETIRED.ANY_P",
+ "PublicDescription": "Counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).",
+ "SampleAfterValue": "2000003"
},
{
- "UMask": "0x3",
- "BriefDescription": "Reference cycles when the core is not in halt state.",
- "Counter": "Fixed counter 2",
- "EventName": "CPU_CLK_UNHALTED.REF_TSC",
- "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
+ "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.STALL_CYCLES",
+ "Invert": "1",
+ "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "Fixed counter 2"
+ "UMask": "0x1"
},
{
- "EventCode": "0x03",
- "UMask": "0x2",
- "BriefDescription": "Loads blocked by overlapping with store buffer that cannot be forwarded .",
+ "BriefDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station was not empty.",
"Counter": "0,1,2,3",
- "EventName": "LD_BLOCKS.STORE_FORWARD",
- "PublicDescription": "Counts how many times the load operation got the true Block-on-Store blocking code preventing store forwarding. This includes cases when:a. preceding store conflicts with the load (incomplete overlap),b. store forwarding is impossible due to u-arch limitations,c. preceding lock RMW operations are not forwarded,d. store has the no-forward bit set (uncacheable/page-split/masked stores),e. all-blocking stores are used (mostly, fences and port I/O), and others.The most common case is a load blocked due to its address range overlapping with a preceding smaller uncompleted store. Note: This event does not take into account cases of out-of-SW-control (for example, SbTailHit), unknown physical STA, and cases of blocking loads on store due to being non-WB memory type or a lock. These cases are covered by other events. See the table of not supported store forwards in the Optimization Guide.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xA6",
+ "EventName": "EXE_ACTIVITY.4_PORTS_UTIL",
+ "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x10"
},
{
- "EventCode": "0x03",
- "UMask": "0x8",
- "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use",
+ "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.",
"Counter": "0,1,2,3",
- "EventName": "LD_BLOCKS.NO_SR",
- "PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x14",
+ "EventName": "ARITH.DIVIDER_ACTIVE",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x07",
- "UMask": "0x1",
"BriefDescription": "False dependencies in MOB due to partial compare on address.",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x07",
"EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
"PublicDescription": "Counts false dependencies in MOB when the partial comparison upon loose net check and dependency was resolved by the Enhanced Loose net mechanism. This may not result in high performance penalties. Loose net checks can fail when loads and stores are 4k aliased.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0x0D",
- "UMask": "0x1",
- "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)",
+ "BriefDescription": "Far branch instructions retired.",
"Counter": "0,1,2,3",
- "EventName": "INT_MISC.RECOVERY_CYCLES",
- "PublicDescription": "Core cycles the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "Errata": "SKL091",
+ "EventCode": "0xC4",
+ "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+ "PEBS": "1",
+ "PublicDescription": "This event counts far branch instructions retired.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x40"
},
{
- "EventCode": "0x0D",
- "UMask": "0x1",
- "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).",
+ "BriefDescription": "Counts the number of x87 uops dispatched.",
"Counter": "0,1,2,3",
- "EventName": "INT_MISC.RECOVERY_CYCLES_ANY",
- "AnyThread": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.X87",
+ "PublicDescription": "Counts the number of x87 uops executed.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x10"
},
{
- "EventCode": "0x0D",
- "UMask": "0x80",
- "BriefDescription": "Cycles the issue-stage is waiting for front-end to fetch from resteered path following branch misprediction or machine clear events.",
+ "BriefDescription": "Demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.",
"Counter": "0,1,2,3",
- "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x4C",
+ "EventName": "LOAD_HIT_PRE.SW_PF",
+ "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "Invert": "1",
- "EventCode": "0x0E",
- "UMask": "0x1",
- "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread",
+ "BriefDescription": "Mispredicted direct and indirect near call instructions retired.",
"Counter": "0,1,2,3",
- "EventName": "UOPS_ISSUED.STALL_CYCLES",
- "CounterMask": "1",
- "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC5",
+ "EventName": "BR_MISP_RETIRED.NEAR_CALL",
+ "PEBS": "1",
+ "PublicDescription": "Counts both taken and not taken retired mispredicted direct and indirect near calls, including both register and memory indirect.",
+ "SampleAfterValue": "400009",
+ "UMask": "0x2"
},
{
- "EventCode": "0x0E",
- "UMask": "0x1",
- "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)",
+ "BriefDescription": "Total execution stalls.",
"Counter": "0,1,2,3",
- "EventName": "UOPS_ISSUED.ANY",
- "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "4",
+ "EventCode": "0xA3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x4"
},
{
- "EventCode": "0x0E",
- "UMask": "0x2",
- "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.",
+ "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.",
"Counter": "0,1,2,3",
- "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH",
- "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to \u201cMixing Intel AVX and Intel SSE Code\u201d section of the Optimization Guide.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "5",
+ "EventCode": "0xA3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x5"
},
{
- "EventCode": "0x0E",
- "UMask": "0x20",
"BriefDescription": "Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x0E",
"EventName": "UOPS_ISSUED.SLOW_LEA",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x20"
},
{
- "EventCode": "0x14",
- "UMask": "0x1",
- "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.",
+ "BriefDescription": "Cycles with less than 10 actually retired uops.",
"Counter": "0,1,2,3",
- "EventName": "ARITH.DIVIDER_ACTIVE",
- "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "10",
+ "EventCode": "0xC2",
+ "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+ "Invert": "1",
+ "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x2"
},
{
- "EventCode": "0x3C",
- "UMask": "0x0",
"BriefDescription": "Thread cycles when thread is not in halt state",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x3C",
"EventName": "CPU_CLK_UNHALTED.THREAD_P",
"PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "SampleAfterValue": "2000003"
},
{
- "EventCode": "0x3C",
- "UMask": "0x0",
- "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+ "BriefDescription": "Cycles where at least 2 uops were executed per-thread",
"Counter": "0,1,2,3",
- "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY",
- "AnyThread": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "2",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC",
+ "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
- },
- {
- "EdgeDetect": "1",
- "EventCode": "0x3C",
- "UMask": "0x0",
- "BriefDescription": "Counts when there is a transition from ring 1, 2 or 3 to ring 0.",
- "Counter": "0,1,2,3",
- "EventName": "CPU_CLK_UNHALTED.RING0_TRANS",
- "CounterMask": "1",
- "PublicDescription": "Counts when the Current Privilege Level (CPL) transitions from ring 1, 2 or 3 to ring 0 (Kernel).",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0x3C",
- "UMask": "0x1",
"BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x3C",
"EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK",
- "SampleAfterValue": "2503",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "SampleAfterValue": "25003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x3C",
- "UMask": "0x1",
- "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.",
+ "BriefDescription": "Number of machine clears (nukes) of any type.",
"Counter": "0,1,2,3",
- "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
- "AnyThread": "1",
- "SampleAfterValue": "2503",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0xC3",
+ "EventName": "MACHINE_CLEARS.COUNT",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x3C",
- "UMask": "0x1",
- "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.",
- "Counter": "0,1,2,3",
- "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
"AnyThread": "1",
- "SampleAfterValue": "2503",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+ "Counter": "Fixed counter 1",
+ "CounterHTOff": "Fixed counter 1",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_ANY",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
},
{
- "EventCode": "0x3C",
- "UMask": "0x1",
- "BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
+ "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.",
"Counter": "0,1,2,3",
- "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
- "SampleAfterValue": "2503",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.THREAD",
+ "PublicDescription": "Number of uops to be executed per-thread each cycle.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x3C",
- "UMask": "0x2",
- "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
+ "BriefDescription": "Cycles where at least 3 uops were executed per-thread",
"Counter": "0,1,2,3",
- "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "3",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC",
+ "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0x3C",
- "UMask": "0x2",
- "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
+ "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.",
"Counter": "0,1,2,3",
- "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
- "SampleAfterValue": "2503",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
+ "Invert": "1",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
},
{
- "EventCode": "0x4C",
- "UMask": "0x1",
- "BriefDescription": "Demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.",
+ "BriefDescription": "Cycles where the Store Buffer was full and no outstanding load.",
"Counter": "0,1,2,3",
- "EventName": "LOAD_HIT_PRE.SW_PF",
- "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xA6",
+ "EventName": "EXE_ACTIVITY.BOUND_ON_STORES",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x40"
},
{
- "EventCode": "0x59",
- "UMask": "0x1",
- "BriefDescription": "Cycles where the pipeline is stalled due to serializing operations.",
+ "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
"Counter": "0,1,2,3",
- "EventName": "PARTIAL_RAT_STALLS.SCOREBOARD",
- "PublicDescription": "This event counts cycles during which the microcode scoreboard stalls happen.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "8",
+ "EventCode": "0xA3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x8"
},
{
- "EdgeDetect": "1",
- "Invert": "1",
- "EventCode": "0x5E",
- "UMask": "0x1",
- "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.",
+ "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
"Counter": "0,1,2,3",
- "EventName": "RS_EVENTS.EMPTY_END",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
"CounterMask": "1",
- "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate front-end Latency Bound issues.",
+ "EventCode": "0xA8",
+ "EventName": "LSD.CYCLES_ACTIVE",
+ "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0x5E",
- "UMask": "0x1",
- "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
+ "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)",
"Counter": "0,1,2,3",
- "EventName": "RS_EVENTS.EMPTY_CYCLES",
- "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for the thread.; Note: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x0D",
+ "EventName": "INT_MISC.RECOVERY_CYCLES",
+ "PublicDescription": "Core cycles the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0x87",
- "UMask": "0x1",
- "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
+ "BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
"Counter": "0,1,2,3",
- "EventName": "ILD_STALL.LCP",
- "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x3C",
+ "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
+ "SampleAfterValue": "25003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xA1",
- "UMask": "0x1",
"BriefDescription": "Cycles per thread when uops are executed in port 0",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xA1",
"EventName": "UOPS_DISPATCHED_PORT.PORT_0",
"PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 0.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xA1",
- "UMask": "0x2",
"BriefDescription": "Cycles per thread when uops are executed in port 1",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xA1",
"EventName": "UOPS_DISPATCHED_PORT.PORT_1",
"PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 1.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x2"
},
{
- "EventCode": "0xA1",
- "UMask": "0x4",
"BriefDescription": "Cycles per thread when uops are executed in port 2",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xA1",
"EventName": "UOPS_DISPATCHED_PORT.PORT_2",
"PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 2.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x4"
},
{
- "EventCode": "0xA1",
- "UMask": "0x8",
"BriefDescription": "Cycles per thread when uops are executed in port 3",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xA1",
"EventName": "UOPS_DISPATCHED_PORT.PORT_3",
"PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 3.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x8"
},
{
- "EventCode": "0xA1",
- "UMask": "0x10",
"BriefDescription": "Cycles per thread when uops are executed in port 4",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xA1",
"EventName": "UOPS_DISPATCHED_PORT.PORT_4",
"PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 4.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x10"
},
{
- "EventCode": "0xA1",
- "UMask": "0x20",
"BriefDescription": "Cycles per thread when uops are executed in port 5",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xA1",
"EventName": "UOPS_DISPATCHED_PORT.PORT_5",
"PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 5.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x20"
},
{
- "EventCode": "0xA1",
- "UMask": "0x40",
"BriefDescription": "Cycles per thread when uops are executed in port 6",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xA1",
"EventName": "UOPS_DISPATCHED_PORT.PORT_6",
"PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 6.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x40"
},
{
- "EventCode": "0xA1",
- "UMask": "0x80",
"BriefDescription": "Cycles per thread when uops are executed in port 7",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xA1",
"EventName": "UOPS_DISPATCHED_PORT.PORT_7",
"PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 7.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x80"
},
{
- "EventCode": "0xa2",
- "UMask": "0x1",
- "BriefDescription": "Resource-related stall cycles",
+ "AnyThread": "1",
+ "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).",
"Counter": "0,1,2,3",
- "EventName": "RESOURCE_STALLS.ANY",
- "PublicDescription": "Counts resource-related stall cycles.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x0D",
+ "EventName": "INT_MISC.RECOVERY_CYCLES_ANY",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xA2",
- "UMask": "0x8",
- "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
- "Counter": "0,1,2,3",
- "EventName": "RESOURCE_STALLS.SB",
- "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
+ "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution",
+ "Counter": "1",
+ "CounterHTOff": "1",
+ "Errata": "SKL091, SKL044",
+ "EventCode": "0xC0",
+ "EventName": "INST_RETIRED.PREC_DIST",
+ "PEBS": "2",
+ "PublicDescription": "A version of INST_RETIRED that allows for a more unbiased distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR) feature to mitigate some bias in how retired instructions get sampled.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xA3",
- "UMask": "0x1",
- "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.",
+ "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
"Counter": "0,1,2,3",
- "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
- "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "4",
+ "EventCode": "0xA8",
+ "EventName": "LSD.CYCLES_4_UOPS",
+ "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xA3",
- "UMask": "0x4",
- "BriefDescription": "Total execution stalls.",
+ "BriefDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station was not empty.",
"Counter": "0,1,2,3",
- "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
- "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xA6",
+ "EventName": "EXE_ACTIVITY.3_PORTS_UTIL",
+ "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x8"
},
{
- "EventCode": "0xA3",
- "UMask": "0x5",
- "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.",
+ "BriefDescription": "Loads blocked due to overlapping with a preceding store that cannot be forwarded.",
"Counter": "0,1,2,3",
- "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
- "CounterMask": "5",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x03",
+ "EventName": "LD_BLOCKS.STORE_FORWARD",
+ "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2"
},
{
- "EventCode": "0xA3",
- "UMask": "0x8",
- "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
+ "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.",
"Counter": "0,1,2,3",
- "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
- "CounterMask": "8",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0x5E",
+ "EventName": "RS_EVENTS.EMPTY_END",
+ "Invert": "1",
+ "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate front-end Latency Bound issues.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xA3",
- "UMask": "0xc",
- "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
+ "AnyThread": "1",
+ "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.",
"Counter": "0,1,2,3",
- "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
- "CounterMask": "12",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x3C",
+ "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
+ "SampleAfterValue": "25003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xA3",
- "UMask": "0x10",
- "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
+ "BriefDescription": "Cycles where the pipeline is stalled due to serializing operations.",
"Counter": "0,1,2,3",
- "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
- "CounterMask": "16",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x59",
+ "EventName": "PARTIAL_RAT_STALLS.SCOREBOARD",
+ "PublicDescription": "This event counts cycles during which the microcode scoreboard stalls happen.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xA3",
- "UMask": "0x14",
- "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
+ "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread",
"Counter": "0,1,2,3",
- "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY",
- "CounterMask": "20",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x0E",
+ "EventName": "UOPS_ISSUED.STALL_CYCLES",
+ "Invert": "1",
+ "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "EventCode": "0xA6",
- "UMask": "0x1",
- "BriefDescription": "Cycles where no uops were executed, the Reservation Station was not empty, the Store Buffer was full and there was no outstanding load.",
+ "BriefDescription": "Not taken branch instructions retired.",
"Counter": "0,1,2,3",
- "EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS",
- "PublicDescription": "Counts cycles during which no uops were executed on all ports and Reservation Station (RS) was not empty.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "Errata": "SKL091",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.COND_NTAKEN",
+ "PublicDescription": "This event counts not taken branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "UMask": "0x10"
},
{
- "EventCode": "0xA6",
- "UMask": "0x2",
- "BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.",
+ "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
"Counter": "0,1,2,3",
- "EventName": "EXE_ACTIVITY.1_PORTS_UTIL",
- "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "3",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x2"
},
{
- "EventCode": "0xA6",
- "UMask": "0x4",
- "BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.",
+ "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
"Counter": "0,1,2,3",
- "EventName": "EXE_ACTIVITY.2_PORTS_UTIL",
- "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x2"
},
{
- "EventCode": "0xA6",
- "UMask": "0x8",
- "BriefDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station was not empty.",
+ "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
"Counter": "0,1,2,3",
- "EventName": "EXE_ACTIVITY.3_PORTS_UTIL",
- "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "4",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x2"
},
{
- "EventCode": "0xA6",
- "UMask": "0x10",
- "BriefDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station was not empty.",
- "Counter": "0,1,2,3",
- "EventName": "EXE_ACTIVITY.4_PORTS_UTIL",
- "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.",
+ "BriefDescription": "Reference cycles when the core is not in halt state.",
+ "Counter": "Fixed counter 2",
+ "CounterHTOff": "Fixed counter 2",
+ "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+ "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x3"
},
{
- "EventCode": "0xA6",
- "UMask": "0x40",
- "BriefDescription": "Cycles where the Store Buffer was full and no outstanding load.",
+ "BriefDescription": "All mispredicted macro branch instructions retired.",
"Counter": "0,1,2,3",
- "EventName": "EXE_ACTIVITY.BOUND_ON_STORES",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC5",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+ "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.",
+ "SampleAfterValue": "400009"
+ },
+ {
+ "BriefDescription": "Number of times a microcode assist is invoked by HW other than FP-assist. Examples include AD (page Access Dirty) and AVX* related assists.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC1",
+ "EventName": "OTHER_ASSISTS.ANY",
+ "SampleAfterValue": "100003",
+ "UMask": "0x3f"
+ },
+ {
+ "BriefDescription": "Cycles without actually retired uops.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0xC2",
+ "EventName": "UOPS_RETIRED.STALL_CYCLES",
+ "Invert": "1",
+ "PublicDescription": "This event counts cycles without actually retired uops.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x2"
},
{
- "EventCode": "0xA8",
- "UMask": "0x1",
"BriefDescription": "Number of Uops delivered by the LSD.",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xA8",
"EventName": "LSD.UOPS",
"PublicDescription": "Number of uops delivered to the back-end by the LSD(Loop Stream Detector).",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xA8",
- "UMask": "0x1",
- "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
+ "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
"Counter": "0,1,2,3",
- "EventName": "LSD.CYCLES_4_UOPS",
- "CounterMask": "4",
- "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x3C",
+ "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
+ "SampleAfterValue": "25003",
+ "UMask": "0x2"
},
{
- "EventCode": "0xA8",
- "UMask": "0x1",
- "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
+ "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
"Counter": "0,1,2,3",
- "EventName": "LSD.CYCLES_ACTIVE",
- "CounterMask": "1",
- "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x87",
+ "EventName": "ILD_STALL.LCP",
+ "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xB1",
- "UMask": "0x1",
- "BriefDescription": "Cycles where at least 4 uops were executed per-thread",
+ "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
"Counter": "0,1,2,3",
- "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC",
- "CounterMask": "4",
- "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "16",
+ "EventCode": "0xA3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x10"
},
{
- "EventCode": "0xB1",
- "UMask": "0x1",
- "BriefDescription": "Cycles where at least 3 uops were executed per-thread",
+ "BriefDescription": "Taken branch instructions retired.",
"Counter": "0,1,2,3",
- "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC",
- "CounterMask": "3",
- "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "Errata": "SKL091",
+ "EventCode": "0xC4",
+ "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+ "PEBS": "1",
+ "PublicDescription": "This event counts taken branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "UMask": "0x20"
},
{
- "EventCode": "0xB1",
- "UMask": "0x1",
- "BriefDescription": "Cycles where at least 2 uops were executed per-thread",
+ "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use",
"Counter": "0,1,2,3",
- "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC",
- "CounterMask": "2",
- "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x03",
+ "EventName": "LD_BLOCKS.NO_SR",
+ "PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x8"
},
{
- "EventCode": "0xB1",
- "UMask": "0x1",
- "BriefDescription": "Cycles where at least 1 uop was executed per-thread",
+ "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)",
"Counter": "0,1,2,3",
- "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
- "CounterMask": "1",
- "PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x0E",
+ "EventName": "UOPS_ISSUED.ANY",
+ "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "Invert": "1",
- "EventCode": "0xB1",
- "UMask": "0x1",
- "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
- "Counter": "0,1,2,3",
- "EventName": "UOPS_EXECUTED.STALL_CYCLES",
- "CounterMask": "1",
- "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
+ "BriefDescription": "Core cycles when the thread is not in halt state",
+ "Counter": "Fixed counter 1",
+ "CounterHTOff": "Fixed counter 1",
+ "EventName": "CPU_CLK_UNHALTED.THREAD",
+ "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x2"
},
{
- "EventCode": "0xB1",
- "UMask": "0x1",
- "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.",
+ "AnyThread": "1",
+ "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.",
"Counter": "0,1,2,3",
- "EventName": "UOPS_EXECUTED.THREAD",
- "PublicDescription": "Number of uops to be executed per-thread each cycle.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x3C",
+ "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
+ "SampleAfterValue": "25003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xB1",
- "UMask": "0x2",
- "BriefDescription": "Number of uops executed on the core.",
+ "BriefDescription": "Direct and indirect near call instructions retired.",
"Counter": "0,1,2,3",
- "EventName": "UOPS_EXECUTED.CORE",
- "PublicDescription": "Number of uops executed from any thread.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "Errata": "SKL091",
+ "EventCode": "0xC4",
+ "EventName": "BR_INST_RETIRED.NEAR_CALL",
+ "PEBS": "1",
+ "PublicDescription": "This event counts both direct and indirect near call instructions retired.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x2"
},
{
- "Invert": "1",
- "EventCode": "0xB1",
- "UMask": "0x2",
- "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.",
+ "BriefDescription": "Number of retired PAUSE instructions (that do not end up with a VMExit to the VMM; TSX aborted Instructions may be counted). This event is not supported on first SKL and KBL products.",
"Counter": "0,1,2,3",
- "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
- "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xCC",
+ "EventName": "ROB_MISC_EVENTS.PAUSE_INST",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x40"
},
{
- "EventCode": "0xB1",
- "UMask": "0x2",
- "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
+ "BriefDescription": "Resource-related stall cycles",
"Counter": "0,1,2,3",
- "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
- "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xa2",
+ "EventName": "RESOURCE_STALLS.ANY",
+ "PublicDescription": "Counts resource-related stall cycles.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xB1",
- "UMask": "0x2",
- "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
+ "BriefDescription": "Self-modifying code (SMC) detected.",
"Counter": "0,1,2,3",
- "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
- "CounterMask": "3",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC3",
+ "EventName": "MACHINE_CLEARS.SMC",
+ "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4"
},
{
- "EventCode": "0xB1",
- "UMask": "0x2",
- "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
+ "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
"Counter": "0,1,2,3",
- "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
- "CounterMask": "2",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x3C",
+ "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
+ "SampleAfterValue": "25003",
+ "UMask": "0x2"
},
{
- "EventCode": "0xB1",
- "UMask": "0x2",
- "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
+ "BriefDescription": "Cycles where at least 4 uops were executed per-thread",
"Counter": "0,1,2,3",
- "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
- "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "4",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC",
+ "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xB1",
- "UMask": "0x10",
- "BriefDescription": "Counts the number of x87 uops dispatched.",
+ "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.",
"Counter": "0,1,2,3",
- "EventName": "UOPS_EXECUTED.X87",
- "PublicDescription": "Counts the number of x87 uops executed.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC5",
+ "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
+ "PEBS": "1",
+ "SampleAfterValue": "400009",
+ "UMask": "0x20"
},
{
- "EventCode": "0xC0",
- "UMask": "0x0",
- "BriefDescription": "Number of instructions retired. General Counter - architectural event",
+ "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
"Counter": "0,1,2,3",
- "EventName": "INST_RETIRED.ANY_P",
- "Errata": "SKL091, SKL044",
- "PublicDescription": "Counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).",
+ "CounterHTOff": "0,1,2,3",
+ "CounterMask": "20",
+ "EventCode": "0xA3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x14"
},
{
- "EventCode": "0xC0",
- "UMask": "0x1",
- "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution",
- "PEBS": "2",
- "Counter": "1",
- "EventName": "INST_RETIRED.PREC_DIST",
- "Errata": "SKL091, SKL044",
- "PublicDescription": "A version of INST_RETIRED that allows for a more unbiased distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR) feature to mitigate some bias in how retired instructions get sampled.",
+ "BriefDescription": "Cycles where no uops were executed, the Reservation Station was not empty, the Store Buffer was full and there was no outstanding load.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xA6",
+ "EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS",
+ "PublicDescription": "Counts cycles during which no uops were executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "1"
+ "UMask": "0x1"
},
{
- "Invert": "1",
- "EventCode": "0xC0",
- "UMask": "0x1",
"BriefDescription": "Number of cycles using always true condition applied to PEBS instructions retired event.",
- "PEBS": "2",
"Counter": "0,2,3",
- "EventName": "INST_RETIRED.TOTAL_CYCLES_PS",
+ "CounterHTOff": "0,2,3",
"CounterMask": "10",
"Errata": "SKL091, SKL044",
+ "EventCode": "0xC0",
+ "EventName": "INST_RETIRED.TOTAL_CYCLES_PS",
+ "Invert": "1",
+ "PEBS": "2",
"PublicDescription": "Number of cycles using an always true condition applied to PEBS instructions retired event. (inst_ret< 16)",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,2,3"
+ "UMask": "0x1"
},
{
- "EventCode": "0xC1",
- "UMask": "0x3f",
- "BriefDescription": "Number of times a microcode assist is invoked by HW other than FP-assist. Examples include AD (page Access Dirty) and AVX* related assists.",
+ "BriefDescription": "Retirement slots used.",
"Counter": "0,1,2,3",
- "EventName": "OTHER_ASSISTS.ANY",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC2",
+ "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+ "PublicDescription": "Counts the retirement slots used.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
},
{
- "Invert": "1",
- "EventCode": "0xC2",
- "UMask": "0x2",
- "BriefDescription": "Cycles with less than 10 actually retired uops.",
+ "AnyThread": "1",
+ "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
"Counter": "0,1,2,3",
- "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
- "CounterMask": "10",
- "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x3C",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY",
+ "SampleAfterValue": "2000003"
},
{
- "Invert": "1",
- "EventCode": "0xC2",
- "UMask": "0x2",
- "BriefDescription": "Cycles without actually retired uops.",
+ "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.",
"Counter": "0,1,2,3",
- "EventName": "UOPS_RETIRED.STALL_CYCLES",
- "CounterMask": "1",
- "PublicDescription": "This event counts cycles without actually retired uops.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x0E",
+ "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH",
+ "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to Mixing Intel AVX and Intel SSE Code section of the Optimization Guide.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x2"
},
{
- "EventCode": "0xC2",
- "UMask": "0x2",
- "BriefDescription": "Retirement slots used.",
+ "BriefDescription": "Increments whenever there is an update to the LBR array.",
"Counter": "0,1,2,3",
- "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
- "PublicDescription": "Counts the retirement slots used.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xCC",
+ "EventName": "ROB_MISC_EVENTS.LBR_INSERTS",
+ "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x20"
},
{
- "EdgeDetect": "1",
- "EventCode": "0xC3",
- "UMask": "0x1",
- "BriefDescription": "Number of machine clears (nukes) of any type.",
+ "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
"Counter": "0,1,2,3",
- "EventName": "MACHINE_CLEARS.COUNT",
- "CounterMask": "1",
- "PublicDescription": "Number of machine clears (nukes) of any type.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x5E",
+ "EventName": "RS_EVENTS.EMPTY_CYCLES",
+ "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for the thread.; Note: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xC3",
- "UMask": "0x4",
- "BriefDescription": "Self-modifying code (SMC) detected.",
+ "BriefDescription": "Return instructions retired.",
"Counter": "0,1,2,3",
- "EventName": "MACHINE_CLEARS.SMC",
- "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "Errata": "SKL091",
+ "EventCode": "0xC4",
+ "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+ "PEBS": "1",
+ "PublicDescription": "This event counts return instructions retired.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x8"
},
{
- "EventCode": "0xC4",
- "UMask": "0x0",
- "BriefDescription": "All (macro) branch instructions retired.",
+ "BriefDescription": "Instructions retired from execution.",
+ "Counter": "Fixed counter 0",
+ "CounterHTOff": "Fixed counter 0",
+ "EventName": "INST_RETIRED.ANY",
+ "PublicDescription": "Counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, Counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
"Counter": "0,1,2,3",
- "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
- "Errata": "SKL091",
- "PublicDescription": "Counts all (macro) branch instructions retired.",
- "SampleAfterValue": "400009",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "2",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
},
{
- "EventCode": "0xC4",
- "UMask": "0x1",
- "BriefDescription": "Conditional branch instructions retired.",
- "PEBS": "1",
+ "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
"Counter": "0,1,2,3",
- "EventName": "BR_INST_RETIRED.CONDITIONAL",
- "Errata": "SKL091",
- "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts conditional branch instructions retired.",
- "SampleAfterValue": "400009",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xA2",
+ "EventName": "RESOURCE_STALLS.SB",
+ "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x8"
},
{
- "EventCode": "0xC4",
- "UMask": "0x2",
- "BriefDescription": "Direct and indirect near call instructions retired.",
- "PEBS": "1",
+ "BriefDescription": "Counts when there is a transition from ring 1, 2 or 3 to ring 0.",
"Counter": "0,1,2,3",
- "EventName": "BR_INST_RETIRED.NEAR_CALL",
- "Errata": "SKL091",
- "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect near call instructions retired.",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0x3C",
+ "EventName": "CPU_CLK_UNHALTED.RING0_TRANS",
+ "PublicDescription": "Counts when the Current Privilege Level (CPL) transitions from ring 1, 2 or 3 to ring 0 (Kernel).",
+ "SampleAfterValue": "100007"
},
{
- "EventCode": "0xC4",
- "UMask": "0x4",
"BriefDescription": "All (macro) branch instructions retired.",
- "PEBS": "2",
"Counter": "0,1,2,3",
- "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS",
+ "CounterHTOff": "0,1,2,3",
"Errata": "SKL091",
+ "EventCode": "0xC4",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS",
+ "PEBS": "2",
"PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.",
"SampleAfterValue": "400009",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x4"
},
{
- "EventCode": "0xC4",
- "UMask": "0x8",
- "BriefDescription": "Return instructions retired.",
- "PEBS": "1",
+ "BriefDescription": "Mispredicted macro branch instructions retired.",
"Counter": "0,1,2,3",
- "EventName": "BR_INST_RETIRED.NEAR_RETURN",
- "Errata": "SKL091",
- "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts return instructions retired.",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xC5",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS",
+ "PEBS": "2",
+ "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "UMask": "0x4"
},
{
- "EventCode": "0xC4",
- "UMask": "0x10",
- "BriefDescription": "Counts all not taken macro branch instructions retired.",
- "PEBS": "1",
+ "BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.",
"Counter": "0,1,2,3",
- "EventName": "BR_INST_RETIRED.NOT_TAKEN",
- "Errata": "SKL091",
- "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts not taken branch instructions retired.",
- "SampleAfterValue": "400009",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xA6",
+ "EventName": "EXE_ACTIVITY.1_PORTS_UTIL",
+ "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
},
{
- "EventCode": "0xC4",
- "UMask": "0x20",
- "BriefDescription": "Taken branch instructions retired.",
- "PEBS": "1",
+ "BriefDescription": "Not taken branch instructions retired.",
"Counter": "0,1,2,3",
- "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
"Errata": "SKL091",
- "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts taken branch instructions retired.",
+ "EventCode": "0xC4",
+ "EventName": "BR_INST_RETIRED.NOT_TAKEN",
+ "PublicDescription": "This event counts not taken branch instructions retired.",
"SampleAfterValue": "400009",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x10"
},
{
- "EventCode": "0xC4",
- "UMask": "0x40",
- "BriefDescription": "Counts the number of far branch instructions retired.",
- "PEBS": "1",
+ "BriefDescription": "Conditional branch instructions retired.",
"Counter": "0,1,2,3",
- "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
"Errata": "SKL091",
- "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts far branch instructions retired.",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
- },
- {
- "EventCode": "0xC5",
- "UMask": "0x0",
- "BriefDescription": "All mispredicted macro branch instructions retired.",
- "Counter": "0,1,2,3",
- "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
- "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.",
+ "EventCode": "0xC4",
+ "EventName": "BR_INST_RETIRED.CONDITIONAL",
+ "PEBS": "1",
+ "PublicDescription": "This event counts conditional branch instructions retired.",
"SampleAfterValue": "400009",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xC5",
- "UMask": "0x1",
"BriefDescription": "Mispredicted conditional branch instructions retired.",
- "PEBS": "1",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC5",
"EventName": "BR_MISP_RETIRED.CONDITIONAL",
- "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted conditional branch instructions retired.",
+ "PEBS": "1",
+ "PublicDescription": "This event counts mispredicted conditional branch instructions retired.",
"SampleAfterValue": "400009",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xC5",
- "UMask": "0x2",
- "BriefDescription": "Mispredicted direct and indirect near call instructions retired.",
- "PEBS": "1",
+ "BriefDescription": "Number of uops executed on the core.",
"Counter": "0,1,2,3",
- "EventName": "BR_MISP_RETIRED.NEAR_CALL",
- "PublicDescription": "This event counts both taken and not taken retired mispredicted direct and indirect near calls, including both register and memory indirect.",
- "SampleAfterValue": "400009",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.CORE",
+ "PublicDescription": "Number of uops executed from any thread.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
},
{
- "EventCode": "0xC5",
- "UMask": "0x4",
- "BriefDescription": "Mispredicted macro branch instructions retired.",
- "PEBS": "2",
+ "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
"Counter": "0,1,2,3",
- "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS",
- "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.",
- "SampleAfterValue": "400009",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "12",
+ "EventCode": "0xA3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
+ "SampleAfterValue": "2000003",
+ "UMask": "0xc"
},
{
- "EventCode": "0xC5",
- "UMask": "0x20",
- "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.",
- "PEBS": "1",
+ "BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.",
"Counter": "0,1,2,3",
- "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
- "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken.",
- "SampleAfterValue": "400009",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xA6",
+ "EventName": "EXE_ACTIVITY.2_PORTS_UTIL",
+ "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4"
},
{
- "EventCode": "0xCC",
- "UMask": "0x20",
- "BriefDescription": "Increments whenever there is an update to the LBR array.",
+ "BriefDescription": "Cycles the issue-stage is waiting for front-end to fetch from resteered path following branch misprediction or machine clear events.",
"Counter": "0,1,2,3",
- "EventName": "ROB_MISC_EVENTS.LBR_INSERTS",
- "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x0D",
+ "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x80"
},
{
- "EventCode": "0xCC",
- "UMask": "0x40",
- "BriefDescription": "Number of retired PAUSE instructions (that do not end up with a VMExit to the VMM; TSX aborted Instructions may be counted). This event is not supported on first SKL and KBL products.",
+ "BriefDescription": "All (macro) branch instructions retired.",
"Counter": "0,1,2,3",
- "EventName": "ROB_MISC_EVENTS.PAUSE_INST",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "Errata": "SKL091",
+ "EventCode": "0xC4",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+ "PublicDescription": "Counts all (macro) branch instructions retired.",
+ "SampleAfterValue": "400009"
+ },
+ {
+ "BriefDescription": "Cycles where at least 1 uop was executed per-thread",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
+ "PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xE6",
- "UMask": "0x1",
- "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
+ "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.",
"Counter": "0,1,2,3",
- "EventName": "BACLEARS.ANY",
- "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0xA3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
}
] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
index 390bdab1be9d..f31794d3b926 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
@@ -4,14 +4,14 @@
"MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)",
"MetricGroup": "TopdownL1",
"MetricName": "Frontend_Bound",
- "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound."
+ "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound."
},
{
"BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
- "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+ "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
"MetricGroup": "TopdownL1_SMT",
"MetricName": "Frontend_Bound_SMT",
- "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU."
+ "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU."
},
{
"BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
@@ -22,13 +22,14 @@
},
{
"BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.",
- "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+ "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
"MetricGroup": "TopdownL1_SMT",
"MetricName": "Bad_Speculation_SMT",
"PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU."
},
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
+ "MetricConstraint": "NO_NMI_WATCHDOG",
"MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )",
"MetricGroup": "TopdownL1",
"MetricName": "Backend_Bound",
@@ -36,7 +37,7 @@
},
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
- "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) )",
+ "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) )",
"MetricGroup": "TopdownL1_SMT",
"MetricName": "Backend_Bound_SMT",
"PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU."
@@ -50,7 +51,7 @@
},
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.",
- "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
"MetricGroup": "TopdownL1_SMT",
"MetricName": "Retiring_SMT",
"PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU."
@@ -58,7 +59,7 @@
{
"BriefDescription": "Instructions Per Cycle (per Logical Processor)",
"MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "TopDownL1",
+ "MetricGroup": "Summary",
"MetricName": "IPC"
},
{
@@ -74,24 +75,6 @@
"MetricName": "IpTB"
},
{
- "BriefDescription": "Branch instructions per taken branch. ",
- "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
- "MetricGroup": "Branches;PGO",
- "MetricName": "BpTB"
- },
- {
- "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely (includes speculatively fetches) consumed by program instructions",
- "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1 ) )",
- "MetricGroup": "PGO;IcMiss",
- "MetricName": "IFetch_Line_Utilization"
- },
- {
- "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
- "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
- "MetricGroup": "DSB;Fetch_BW",
- "MetricName": "DSB_Coverage"
- },
- {
"BriefDescription": "Cycles Per Instruction (per Logical Processor)",
"MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
"MetricGroup": "Pipeline;Summary",
@@ -104,86 +87,104 @@
"MetricName": "CLKS"
},
{
- "BriefDescription": "Total issue-pipeline slots (per-Physical Core)",
+ "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
"MetricExpr": "4 * cycles",
"MetricGroup": "TopDownL1",
"MetricName": "SLOTS"
},
{
- "BriefDescription": "Total issue-pipeline slots (per-Physical Core)",
- "MetricExpr": "4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+ "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
+ "MetricExpr": "4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
"MetricGroup": "TopDownL1_SMT",
"MetricName": "SLOTS_SMT"
},
{
- "BriefDescription": "Instructions per Load (lower number means higher occurance rate)",
+ "BriefDescription": "Instructions per Load (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS",
"MetricGroup": "Instruction_Type",
- "MetricName": "IpL"
+ "MetricName": "IpLoad"
},
{
- "BriefDescription": "Instructions per Store (lower number means higher occurance rate)",
+ "BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES",
"MetricGroup": "Instruction_Type",
- "MetricName": "IpS"
+ "MetricName": "IpStore"
},
{
- "BriefDescription": "Instructions per Branch (lower number means higher occurance rate)",
+ "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Branches;Instruction_Type",
- "MetricName": "IpB"
+ "MetricName": "IpBranch"
},
{
- "BriefDescription": "Instruction per (near) call (lower number means higher occurance rate)",
+ "BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
"MetricGroup": "Branches",
"MetricName": "IpCall"
},
{
+ "BriefDescription": "Branch instructions per taken branch. ",
+ "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+ "MetricGroup": "Branches;PGO",
+ "MetricName": "BpTkBranch"
+ },
+ {
+ "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )",
+ "MetricGroup": "FLOPS;FP_Arith;Instruction_Type",
+ "MetricName": "IpFLOP"
+ },
+ {
"BriefDescription": "Total number of retired Instructions",
"MetricExpr": "INST_RETIRED.ANY",
- "MetricGroup": "Summary",
+ "MetricGroup": "Summary;TopDownL1",
"MetricName": "Instructions"
},
{
+ "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
+ "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
+ "MetricGroup": "DSB;Fetch_BW",
+ "MetricName": "DSB_Coverage"
+ },
+ {
"BriefDescription": "Instructions Per Cycle (per physical core)",
"MetricExpr": "INST_RETIRED.ANY / cycles",
- "MetricGroup": "SMT",
+ "MetricGroup": "SMT;TopDownL1",
"MetricName": "CoreIPC"
},
{
"BriefDescription": "Instructions Per Cycle (per physical core)",
- "MetricExpr": "INST_RETIRED.ANY / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
- "MetricGroup": "SMT",
+ "MetricExpr": "INST_RETIRED.ANY / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
+ "MetricGroup": "SMT;TopDownL1",
"MetricName": "CoreIPC_SMT"
},
{
"BriefDescription": "Floating Point Operations Per Cycle",
- "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )) / cycles",
+ "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / cycles",
"MetricGroup": "FLOPS",
"MetricName": "FLOPc"
},
{
"BriefDescription": "Floating Point Operations Per Cycle",
- "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+ "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
"MetricGroup": "FLOPS_SMT",
"MetricName": "FLOPc_SMT"
},
{
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
- "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
- "MetricGroup": "Pipeline",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / ( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 )",
+ "MetricGroup": "Pipeline;Ports_Utilization",
"MetricName": "ILP"
},
{
"BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per non-speculative branch misprediction (jeclear)",
- "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) ) * (4 * cycles) / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles))) + (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * cycles)) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * cycles)) ) * (4 * cycles) / BR_MISP_RETIRED.ALL_BRANCHES",
"MetricGroup": "BrMispredicts",
"MetricName": "Branch_Misprediction_Cost"
},
{
"BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per non-speculative branch misprediction (jeclear)",
- "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) ) * (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / BR_MISP_RETIRED.ALL_BRANCHES",
"MetricGroup": "BrMispredicts_SMT",
"MetricName": "Branch_Misprediction_Cost_SMT"
},
@@ -213,14 +214,14 @@
},
{
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricConstraint": "NO_NMI_WATCHDOG",
"MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )",
"MetricGroup": "TLB",
- "MetricName": "Page_Walks_Utilization",
- "MetricConstraint": "NO_NMI_WATCHDOG"
+ "MetricName": "Page_Walks_Utilization"
},
{
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
- "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) )",
+ "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ) )",
"MetricGroup": "TLB_SMT",
"MetricName": "Page_Walks_Utilization_SMT"
},
@@ -245,7 +246,7 @@
{
"BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
"MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1000000000 / duration_time",
- "MetricGroup": "Memory_BW",
+ "MetricGroup": "Memory_BW;Offcore",
"MetricName": "L3_Cache_Access_BW"
},
{
@@ -263,7 +264,7 @@
{
"BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)",
"MetricExpr": "1000 * L2_RQSTS.MISS / INST_RETIRED.ANY",
- "MetricGroup": "Cache_Misses",
+ "MetricGroup": "Cache_Misses;Offcore",
"MetricName": "L2MPKI_All"
},
{
@@ -298,7 +299,7 @@
},
{
"BriefDescription": "Giga Floating Point Operations Per Second",
- "MetricExpr": "( (( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )) / 1000000000 ) / duration_time",
+ "MetricExpr": "( ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / 1000000000 ) / duration_time",
"MetricGroup": "FLOPS;Summary",
"MetricName": "GFLOPs"
},
@@ -310,44 +311,56 @@
},
{
"BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
- "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+ "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 )",
"MetricGroup": "SMT;Summary",
"MetricName": "SMT_2T_Utilization"
},
{
- "BriefDescription": "Fraction of cycles spent in Kernel mode",
+ "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
"MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "Summary",
+ "MetricGroup": "OS",
"MetricName": "Kernel_Utilization"
},
{
"BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
"MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time",
- "MetricGroup": "Memory_BW",
+ "MetricGroup": "Memory_BW;SoC",
"MetricName": "DRAM_BW_Use"
},
{
"BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches",
- "MetricExpr": "1000000000 * ( cha@event\\=0x36\\,umask\\=0x21@ / cha@event\\=0x35\\,umask\\=0x21@ ) / ( cha_0@event\\=0x0@ / duration_time )",
- "MetricGroup": "Memory_Lat",
- "MetricName": "DRAM_Read_Latency"
+ "MetricExpr": "1000000000 * ( cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433@ / cha@event\\=0x35\\,umask\\=0x21\\,config\\=0x40433@ ) / ( cha_0@event\\=0x0@ / duration_time )",
+ "MetricGroup": "Memory_Lat;SoC",
+ "MetricName": "MEM_Read_Latency"
},
{
"BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches",
- "MetricExpr": "cha@event\\=0x36\\,umask\\=0x21@ / cha@event\\=0x36\\,umask\\=0x21\\,thresh\\=1@",
- "MetricGroup": "Memory_BW",
- "MetricName": "DRAM_Parallel_Reads"
+ "MetricExpr": "cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433@ / cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433\\,thresh\\=1@",
+ "MetricGroup": "Memory_BW;SoC",
+ "MetricName": "MEM_Parallel_Reads"
+ },
+ {
+ "BriefDescription": "Average IO (network or disk) Bandwidth Use for Writes [GB / sec]",
+ "MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3 ) * 4 / 1000000000 / duration_time",
+ "MetricGroup": "IO_BW;SoC;Server",
+ "MetricName": "IO_Write_BW"
+ },
+ {
+ "BriefDescription": "Average IO (network or disk) Bandwidth Use for Reads [GB / sec]",
+ "MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3 ) * 4 / 1000000000 / duration_time",
+ "MetricGroup": "IO_BW;SoC;Server",
+ "MetricName": "IO_Read_BW"
},
{
"BriefDescription": "Socket actual clocks when any core is active on that socket",
"MetricExpr": "cha_0@event\\=0x0@",
- "MetricGroup": "",
+ "MetricGroup": "SoC",
"MetricName": "Socket_CLKS"
},
{
- "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions. )",
+ "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
"MetricExpr": "INST_RETIRED.ANY / ( BR_INST_RETIRED.FAR_BRANCH / 2 )",
- "MetricGroup": "",
+ "MetricGroup": "Branches;OS",
"MetricName": "IpFarBranch"
},
{
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json b/tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json
index 9c7e5f8beee2..b80b5d66385d 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json
@@ -94,17 +94,7 @@
"Unit": "iMC"
},
{
- "BriefDescription": "read requests to memory controller. Derived from unc_m_cas_count.rd",
- "Counter": "0,1,2,3",
- "EventCode": "0x4",
- "EventName": "LLC_MISSES.MEM_READ",
- "PerPkg": "1",
- "ScaleUnit": "64Bytes",
- "UMask": "0x3",
- "Unit": "iMC"
- },
- {
- "BriefDescription": "All DRAM Read CAS Commands issued (does not include underfills) ",
+ "BriefDescription": "All DRAM Read CAS Commands issued (does not include underfills)",
"Counter": "0,1,2,3",
"EventCode": "0x4",
"EventName": "UNC_M_CAS_COUNT.RD_REG",
@@ -119,18 +109,18 @@
"EventCode": "0x4",
"EventName": "UNC_M_CAS_COUNT.RD_UNDERFILL",
"PerPkg": "1",
- "PublicDescription": "Counts CAS (Column Access Select) underfill read commands issued to DRAM due to a partial write, on a per channel basis. CAS commands are issued to specify the address to read or write on DRAM, and this command counts underfill reads. Partial writes must be completed by first reading in the underfill from DRAM and then merging in the partial write data before writing the full line back to DRAM. This event will generally count about the same as the number of partial writes, but may be slightly less because of partials hitting in the WPQ (due to a previous write request). ",
+ "PublicDescription": "Counts CAS (Column Access Select) underfill read commands issued to DRAM due to a partial write, on a per channel basis. CAS commands are issued to specify the address to read or write on DRAM, and this command counts underfill reads. Partial writes must be completed by first reading in the underfill from DRAM and then merging in the partial write data before writing the full line back to DRAM. This event will generally count about the same as the number of partial writes, but may be slightly less because of partials hitting in the WPQ (due to a previous write request).",
"UMask": "0x2",
"Unit": "iMC"
},
{
- "BriefDescription": "write requests to memory controller. Derived from unc_m_cas_count.wr",
+ "BriefDescription": "DRAM CAS (Column Address Strobe) Commands.; DRAM WR_CAS (w/ and w/out auto-pre) in Write Major Mode",
"Counter": "0,1,2,3",
"EventCode": "0x4",
- "EventName": "LLC_MISSES.MEM_WRITE",
+ "EventName": "UNC_M_CAS_COUNT.WR_WMM",
"PerPkg": "1",
- "ScaleUnit": "64Bytes",
- "UMask": "0xC",
+ "PublicDescription": "Counts the total number or DRAM Write CAS commands issued on this channel while in Write-Major-Mode.",
+ "UMask": "0x4",
"Unit": "iMC"
},
{
@@ -139,7 +129,7 @@
"EventCode": "0x10",
"EventName": "UNC_M_RPQ_INSERTS",
"PerPkg": "1",
- "PublicDescription": "Counts the number of read requests allocated into the Read Pending Queue (RPQ). This queue is used to schedule reads out to the memory controller and to track the requests. Requests allocate into the RPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the CHA to the iMC. The requests deallocate after the read CAS command has been issued to DRAM. This event counts both Isochronous and non-Isochronous requests which were issued to the RPQ. ",
+ "PublicDescription": "Counts the number of read requests allocated into the Read Pending Queue (RPQ). This queue is used to schedule reads out to the memory controller and to track the requests. Requests allocate into the RPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the CHA to the iMC. The requests deallocate after the read CAS command has been issued to DRAM. This event counts both Isochronous and non-Isochronous requests which were issued to the RPQ.",
"Unit": "iMC"
},
{
@@ -166,7 +156,7 @@
"EventCode": "0x81",
"EventName": "UNC_M_WPQ_OCCUPANCY",
"PerPkg": "1",
- "PublicDescription": "Counts the number of entries in the Write Pending Queue (WPQ) at each cycle. This can then be used to calculate both the average queue occupancy (in conjunction with the number of cycles not empty) and the average latency (in conjunction with the number of allocations). The WPQ is used to schedule writes out to the memory controller and to track the requests.",
+ "PublicDescription": "Counts the number of entries in the Write Pending Queue (WPQ) at each cycle. This can then be used to calculate both the average queue occupancy (in conjunction with the number of cycles not empty) and the average latency (in conjunction with the number of allocations). The WPQ is used to schedule writes out to the memory controller and to track the requests. Requests allocate into the WPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the CHA to the iMC (memory controller). They deallocate after being issued to DRAM. Write requests themselves are able to complete (from the perspective of the rest of the system) as soon they have 'posted' to the iMC. This is not to be confused with actually performing the write to DRAM. Therefore, the average latency for this queue is actually not useful for deconstruction intermediate write latencies. So, we provide filtering based on if the request has posted or not. By using the 'not posted' filter, we can track how long writes spent in the iMC before completions were sent to the HA. The 'posted' filter, on the other hand, provides information about how much queueing is actually happenning in the iMC for writes before they are actually issued to memory. High average occupancies will generally coincide with high write major mode counts. Is there a filter of sorts???",
"Unit": "iMC"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json b/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json
index adb42c72f5c8..d7a0270de983 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json
@@ -119,18 +119,15 @@
"EventName": "UPI_DATA_BANDWIDTH_TX",
"PerPkg": "1",
"ScaleUnit": "7.11E-06Bytes",
- "UMask": "0x0F",
+ "UMask": "0xf",
"Unit": "UPI LL"
},
{
- "BriefDescription": "PCI Express bandwidth reading at IIO. Derived from unc_iio_data_req_of_cpu.mem_read.part0",
+ "BriefDescription": "PCI Express bandwidth reading at IIO, part 0",
"Counter": "0,1",
"EventCode": "0x83",
- "EventName": "LLC_MISSES.PCIE_READ",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0",
"FCMask": "0x07",
- "Filter": "ch_mask=0x1f",
- "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3",
- "MetricName": "LLC_MISSES.PCIE_READ",
"PerPkg": "1",
"PortMask": "0x01",
"ScaleUnit": "4Bytes",
@@ -138,118 +135,117 @@
"Unit": "IIO"
},
{
- "BriefDescription": "PCI Express bandwidth writing at IIO. Derived from unc_iio_data_req_of_cpu.mem_write.part0",
+ "BriefDescription": "PCI Express bandwidth reading at IIO, part 1",
"Counter": "0,1",
"EventCode": "0x83",
- "EventName": "LLC_MISSES.PCIE_WRITE",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1",
"FCMask": "0x07",
- "Filter": "ch_mask=0x1f",
- "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3",
- "MetricName": "LLC_MISSES.PCIE_WRITE",
"PerPkg": "1",
- "PortMask": "0x01",
+ "PortMask": "0x02",
"ScaleUnit": "4Bytes",
- "UMask": "0x01",
+ "UMask": "0x04",
"Unit": "IIO"
},
{
- "BriefDescription": "PCI Express bandwidth writing at IIO, part 0",
+ "BriefDescription": "PCI Express bandwidth reading at IIO, part 2",
"Counter": "0,1",
"EventCode": "0x83",
- "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2",
"FCMask": "0x07",
- "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3",
- "MetricName": "LLC_MISSES.PCIE_WRITE",
"PerPkg": "1",
- "PortMask": "0x01",
+ "PortMask": "0x04",
"ScaleUnit": "4Bytes",
- "UMask": "0x01",
+ "UMask": "0x04",
"Unit": "IIO"
},
{
- "BriefDescription": "PCI Express bandwidth writing at IIO, part 1",
+ "BriefDescription": "PCI Express bandwidth reading at IIO, part 3",
"Counter": "0,1",
"EventCode": "0x83",
- "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3",
"FCMask": "0x07",
"PerPkg": "1",
- "PortMask": "0x02",
+ "PortMask": "0x08",
"ScaleUnit": "4Bytes",
- "UMask": "0x01",
+ "UMask": "0x04",
"Unit": "IIO"
},
{
- "BriefDescription": "PCI Express bandwidth writing at IIO, part 2",
+ "BriefDescription": "PCI Express bandwidth reading at IIO. Derived from unc_iio_data_req_of_cpu.mem_read.part0",
"Counter": "0,1",
"EventCode": "0x83",
- "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2",
+ "EventName": "LLC_MISSES.PCIE_READ",
"FCMask": "0x07",
+ "Filter": "ch_mask=0x1f",
+ "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 +UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 +UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 +UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3",
+ "MetricName": "LLC_MISSES.PCIE_READ",
"PerPkg": "1",
- "PortMask": "0x04",
+ "PortMask": "0x01",
"ScaleUnit": "4Bytes",
- "UMask": "0x01",
+ "UMask": "0x04",
"Unit": "IIO"
},
{
- "BriefDescription": "PCI Express bandwidth writing at IIO, part 3",
+ "BriefDescription": "PCI Express bandwidth writing at IIO, part 0",
"Counter": "0,1",
"EventCode": "0x83",
- "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0",
"FCMask": "0x07",
"PerPkg": "1",
- "PortMask": "0x08",
+ "PortMask": "0x01",
"ScaleUnit": "4Bytes",
"UMask": "0x01",
"Unit": "IIO"
},
{
- "BriefDescription": "PCI Express bandwidth reading at IIO, part 0",
+ "BriefDescription": "PCI Express bandwidth writing at IIO, part 1",
"Counter": "0,1",
"EventCode": "0x83",
- "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1",
"FCMask": "0x07",
- "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3",
- "MetricName": "LLC_MISSES.PCIE_READ",
"PerPkg": "1",
- "PortMask": "0x01",
+ "PortMask": "0x02",
"ScaleUnit": "4Bytes",
- "UMask": "0x04",
+ "UMask": "0x01",
"Unit": "IIO"
},
{
- "BriefDescription": "PCI Express bandwidth reading at IIO, part 1",
+ "BriefDescription": "PCI Express bandwidth writing at IIO, part 2",
"Counter": "0,1",
"EventCode": "0x83",
- "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2",
"FCMask": "0x07",
"PerPkg": "1",
- "PortMask": "0x02",
+ "PortMask": "0x04",
"ScaleUnit": "4Bytes",
- "UMask": "0x04",
+ "UMask": "0x01",
"Unit": "IIO"
},
{
- "BriefDescription": "PCI Express bandwidth reading at IIO, part 2",
+ "BriefDescription": "PCI Express bandwidth writing at IIO, part 3",
"Counter": "0,1",
"EventCode": "0x83",
- "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3",
"FCMask": "0x07",
"PerPkg": "1",
- "PortMask": "0x04",
+ "PortMask": "0x08",
"ScaleUnit": "4Bytes",
- "UMask": "0x04",
+ "UMask": "0x01",
"Unit": "IIO"
},
{
- "BriefDescription": "PCI Express bandwidth reading at IIO, part 3",
+ "BriefDescription": "PCI Express bandwidth writing at IIO. Derived from unc_iio_data_req_of_cpu.mem_write.part0",
"Counter": "0,1",
"EventCode": "0x83",
- "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3",
+ "EventName": "LLC_MISSES.PCIE_WRITE",
"FCMask": "0x07",
+ "Filter": "ch_mask=0x1f",
+ "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3",
+ "MetricName": "LLC_MISSES.PCIE_WRITE",
"PerPkg": "1",
- "PortMask": "0x08",
+ "PortMask": "0x01",
"ScaleUnit": "4Bytes",
- "UMask": "0x04",
+ "UMask": "0x01",
"Unit": "IIO"
},
{
@@ -313,6 +309,16 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "FaST wire asserted; Horizontal",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xA5",
+ "EventName": "UNC_CHA_FAST_ASSERTED.HORZ",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of cycles either the local or incoming distress signals are asserted. Incoming distress includes up, dn and across.",
+ "UMask": "0x02",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "Read request from a remote socket which hit in the HitMe Cache to a line In the E state",
"Counter": "0,1,2,3",
"EventCode": "0x5F",
@@ -343,6 +349,46 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "Lines Victimized; Lines in E state",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x37",
+ "EventName": "UNC_CHA_LLC_VICTIMS.TOTAL_E",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of lines that were victimized on a fill. This can be filtered by the state that the line was in.",
+ "UMask": "0x02",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Lines Victimized; Lines in F State",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x37",
+ "EventName": "UNC_CHA_LLC_VICTIMS.TOTAL_F",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of lines that were victimized on a fill. This can be filtered by the state that the line was in.",
+ "UMask": "0x08",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Lines Victimized; Lines in M state",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x37",
+ "EventName": "UNC_CHA_LLC_VICTIMS.TOTAL_M",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of lines that were victimized on a fill. This can be filtered by the state that the line was in.",
+ "UMask": "0x01",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Lines Victimized; Lines in S State",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x37",
+ "EventName": "UNC_CHA_LLC_VICTIMS.TOTAL_S",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of lines that were victimized on a fill. This can be filtered by the state that the line was in.",
+ "UMask": "0x04",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "Number of times that an RFO hit in S state.",
"Counter": "0,1,2,3",
"EventCode": "0x39",
@@ -373,6 +419,65 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "Ingress (from CMS) Allocations; IRQ",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x13",
+ "EventName": "UNC_CHA_RxC_INSERTS.IRQ",
+ "PerPkg": "1",
+ "PublicDescription": "Counts number of allocations per cycle into the specified Ingress queue.",
+ "UMask": "0x01",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Ingress (from CMS) Request Queue Rejects; PhyAddr Match",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x19",
+ "EventName": "UNC_CHA_RxC_IRQ1_REJECT.PA_MATCH",
+ "PerPkg": "1",
+ "PublicDescription": "Ingress (from CMS) Request Queue Rejects; PhyAddr Match",
+ "UMask": "0x80",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Ingress (from CMS) Occupancy; IRQ",
+ "EventCode": "0x11",
+ "EventName": "UNC_CHA_RxC_OCCUPANCY.IRQ",
+ "PerPkg": "1",
+ "PublicDescription": "Counts number of entries in the specified Ingress queue in each cycle.",
+ "UMask": "0x01",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Snoop filter capacity evictions for E-state entries.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x3D",
+ "EventName": "UNC_CHA_SF_EVICTION.E_STATE",
+ "PerPkg": "1",
+ "PublicDescription": "Counts snoop filter capacity evictions for entries tracking exclusive lines in the cores cache. Snoop filter capacity evictions occur when the snoop filter is full and evicts an existing entry to track a new entry. Does not count clean evictions such as when a cores cache replaces a tracked cacheline with a new cacheline.",
+ "UMask": "0x02",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Snoop filter capacity evictions for M-state entries.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x3D",
+ "EventName": "UNC_CHA_SF_EVICTION.M_STATE",
+ "PerPkg": "1",
+ "PublicDescription": "Counts snoop filter capacity evictions for entries tracking modified lines in the cores cache. Snoop filter capacity evictions occur when the snoop filter is full and evicts an existing entry to track a new entry. Does not count clean evictions such as when a cores cache replaces a tracked cacheline with a new cacheline.",
+ "UMask": "0x01",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Snoop filter capacity evictions for S-state entries.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x3D",
+ "EventName": "UNC_CHA_SF_EVICTION.S_STATE",
+ "PerPkg": "1",
+ "PublicDescription": "Counts snoop filter capacity evictions for entries tracking shared lines in the cores cache. Snoop filter capacity evictions occur when the snoop filter is full and evicts an existing entry to track a new entry. Does not count clean evictions such as when a cores cache replaces a tracked cacheline with a new cacheline.",
+ "UMask": "0x04",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "RspCnflct* Snoop Responses Received",
"Counter": "0,1,2,3",
"EventCode": "0x5C",
@@ -449,7 +554,7 @@
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x01",
- "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part0. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
+ "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) or by another IIO unit to the MMIO space of a card on IIO Part0. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
"UMask": "0x04",
"Unit": "IIO"
},
@@ -461,7 +566,7 @@
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x02",
- "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part1. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
+ "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) or by another IIO unit to the MMIO space of a card on IIO Part1. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
"UMask": "0x04",
"Unit": "IIO"
},
@@ -473,7 +578,7 @@
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x04",
- "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part2. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
+ "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) or by another IIO unit to the MMIO space of a card on IIO Part2. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
"UMask": "0x04",
"Unit": "IIO"
},
@@ -485,7 +590,7 @@
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x08",
- "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part3. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
+ "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) or by another IIO unit to the MMIO space of a card on IIO Part3. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
"UMask": "0x04",
"Unit": "IIO"
},
@@ -497,7 +602,7 @@
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x01",
- "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part0 by a unit on the main die (generally a core). In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
+ "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part0 by a unit on the main die (generally a core) or by another IIO unit. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
"UMask": "0x01",
"Unit": "IIO"
},
@@ -509,35 +614,227 @@
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x02",
- "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part1 by a unit on the main die (generally a core). In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
+ "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part1 by a unit on the main die (generally a core) or by another IIO unit. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
"UMask": "0x01",
"Unit": "IIO"
},
{
- "BriefDescription": "Write request of 4 bytes made to IIO Part2 by the CPU ",
+ "BriefDescription": "Write request of 4 bytes made to IIO Part2 by the CPU",
"Counter": "2,3",
"EventCode": "0xC0",
"EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART2",
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x04",
- "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part2 by a unit on the main die (generally a core). In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
+ "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part2 by a unit on the main die (generally a core) or by another IIO unit. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
"UMask": "0x01",
"Unit": "IIO"
},
{
- "BriefDescription": "Write request of 4 bytes made to IIO Part3 by the CPU ",
+ "BriefDescription": "Write request of 4 bytes made to IIO Part3 by the CPU",
"Counter": "2,3",
"EventCode": "0xC0",
"EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART3",
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x08",
- "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part3 by a unit on the main die (generally a core). In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
+ "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part3 by a unit on the main die (generally a core) or by another IIO unit. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
"UMask": "0x01",
"Unit": "IIO"
},
{
+ "BriefDescription": "Peer to peer read request for 4 bytes made by a different IIO unit to IIO Part0",
+ "Counter": "2,3",
+ "EventCode": "0xC0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.PEER_READ.PART0",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x01",
+ "PublicDescription": "Counts ever peer to peer read request for 4 bytes of data made by a different IIO unit to the MMIO space of a card on IIO Part0. Does not include requests made by the same IIO unit. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
+ "UMask": "0x08",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer read request for 4 bytes made by a different IIO unit to IIO Part1",
+ "Counter": "2,3",
+ "EventCode": "0xC0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.PEER_READ.PART1",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x02",
+ "PublicDescription": "Counts ever peer to peer read request for 4 bytes of data made by a different IIO unit to the MMIO space of a card on IIO Part1. Does not include requests made by the same IIO unit. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
+ "UMask": "0x08",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer read request for 4 bytes made by a different IIO unit to IIO Part2",
+ "Counter": "2,3",
+ "EventCode": "0xC0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.PEER_READ.PART2",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x04",
+ "PublicDescription": "Counts ever peer to peer read request for 4 bytes of data made by a different IIO unit to the MMIO space of a card on IIO Part2. Does not include requests made by the same IIO unit. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
+ "UMask": "0x08",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer read request for 4 bytes made by a different IIO unit to IIO Part3",
+ "Counter": "2,3",
+ "EventCode": "0xC0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.PEER_READ.PART3",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x08",
+ "PublicDescription": "Counts ever peer to peer read request for 4 bytes of data made by a different IIO unit to the MMIO space of a card on IIO Part3. Does not include requests made by the same IIO unit. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
+ "UMask": "0x08",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer write request of 4 bytes made to IIO Part0 by a different IIO unit",
+ "Counter": "2,3",
+ "EventCode": "0xC0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.PEER_WRITE.PART0",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x01",
+ "PublicDescription": "Counts every peer to peer write request of 4 bytes of data made to the MMIO space of a card on IIO Part0 by a different IIO unit. Does not include requests made by the same IIO unit. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
+ "UMask": "0x02",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer write request of 4 bytes made to IIO Part1 by a different IIO unit",
+ "Counter": "2,3",
+ "EventCode": "0xC0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.PEER_WRITE.PART1",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x02",
+ "PublicDescription": "Counts every peer to peer write request of 4 bytes of data made to the MMIO space of a card on IIO Part1 by a different IIO unit. Does not include requests made by the same IIO unit. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
+ "UMask": "0x02",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer write request of 4 bytes made to IIO Part2 by a different IIO unit",
+ "Counter": "2,3",
+ "EventCode": "0xC0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.PEER_WRITE.PART2",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x04",
+ "PublicDescription": "Counts every peer to peer write request of 4 bytes of data made to the MMIO space of a card on IIO Part2 by a different IIO unit. Does not include requests made by the same IIO unit. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
+ "UMask": "0x02",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer write request of 4 bytes made to IIO Part3 by a different IIO unit",
+ "Counter": "2,3",
+ "EventCode": "0xC0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.PEER_WRITE.PART3",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x08",
+ "PublicDescription": "Counts every peer to peer write request of 4 bytes of data made to the MMIO space of a card on IIO Part3 by a different IIO unit. Does not include requests made by the same IIO unit. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
+ "UMask": "0x02",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer read request for 4 bytes made by IIO Part0 to an IIO target",
+ "Counter": "0,1",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_READ.PART0",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x01",
+ "PublicDescription": "Counts every peer to peer read request for 4 bytes of data made by IIO Part0 to the MMIO space of an IIO target. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
+ "UMask": "0x08",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer read request for 4 bytes made by IIO Part1 to an IIO target",
+ "Counter": "0,1",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_READ.PART1",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x02",
+ "PublicDescription": "Counts every peer to peer read request for 4 bytes of data made by IIO Part1 to the MMIO space of an IIO target. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
+ "UMask": "0x08",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer read request for 4 bytes made by IIO Part2 to an IIO target",
+ "Counter": "0,1",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_READ.PART2",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x04",
+ "PublicDescription": "Counts every peer to peer read request for 4 bytes of data made by IIO Part2 to the MMIO space of an IIO target. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
+ "UMask": "0x08",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer read request for 4 bytes made by IIO Part3 to an IIO target",
+ "Counter": "0,1",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_READ.PART3",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x08",
+ "PublicDescription": "Counts every peer to peer read request for 4 bytes of data made by IIO Part3 to the MMIO space of an IIO target. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
+ "UMask": "0x08",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer write request of 4 bytes made by IIO Part0 to an IIO target",
+ "Counter": "0,1",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_WRITE.PART0",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x01",
+ "PublicDescription": "Counts every peer to peer write request of 4 bytes of data made by IIO Part0 to the MMIO space of an IIO target. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
+ "UMask": "0x02",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer write request of 4 bytes made by IIO Part0 to an IIO target",
+ "Counter": "0,1",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_WRITE.PART1",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x02",
+ "PublicDescription": "Counts every peer to peer write request of 4 bytes of data made by IIO Part1 to the MMIO space of an IIO target. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
+ "UMask": "0x02",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer write request of 4 bytes made by IIO Part0 to an IIO target",
+ "Counter": "0,1",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_WRITE.PART2",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x04",
+ "PublicDescription": "Counts every peer to peer write request of 4 bytes of data made by IIO Part2 to the MMIO space of an IIO target. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
+ "UMask": "0x02",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer write request of 4 bytes made by IIO Part0 to an IIO target",
+ "Counter": "0,1",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_WRITE.PART3",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x08",
+ "PublicDescription": "Counts every peer to peer write request of 4 bytes of data made by IIO Part3 to the MMIO space of an IIO target. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
+ "UMask": "0x02",
+ "Unit": "IIO"
+ },
+ {
"BriefDescription": "Read request for up to a 64 byte transaction is made by the CPU to IIO Part0",
"Counter": "0,1,2,3",
"EventCode": "0xC1",
@@ -545,7 +842,7 @@
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x01",
- "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part0. In the general case, part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
+ "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) or by another IIO unit to the MMIO space of a card on IIO Part0. In the general case, part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
"UMask": "0x04",
"Unit": "IIO"
},
@@ -557,7 +854,7 @@
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x02",
- "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part1. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
+ "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) or by another IIO unit to the MMIO space of a card on IIO Part1. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
"UMask": "0x04",
"Unit": "IIO"
},
@@ -569,7 +866,7 @@
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x04",
- "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part2. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
+ "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) or by another IIO unit to the MMIO space of a card on IIO Part2. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
"UMask": "0x04",
"Unit": "IIO"
},
@@ -581,7 +878,7 @@
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x08",
- "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part3. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
+ "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) or by another IIO unit to the MMIO space of a card on IIO Part3. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
"UMask": "0x04",
"Unit": "IIO"
},
@@ -593,7 +890,7 @@
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x01",
- "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part0 by a unit on the main die (generally a core). In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
+ "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part0 by a unit on the main die (generally a core) or by another IIO unit. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
"UMask": "0x01",
"Unit": "IIO"
},
@@ -605,35 +902,131 @@
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x02",
- "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part1 by a unit on the main die (generally a core). In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
+ "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part1 by a unit on the main die (generally a core) or by another IIO unit. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
"UMask": "0x01",
"Unit": "IIO"
},
{
- "BriefDescription": "Write request of up to a 64 byte transaction is made to IIO Part2 by the CPU ",
+ "BriefDescription": "Write request of up to a 64 byte transaction is made to IIO Part2 by the CPU",
"Counter": "0,1,2,3",
"EventCode": "0xC1",
"EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART2",
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x04",
- "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part2 by a unit on the main die (generally a core). In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
+ "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part2 by a unit on the main die (generally a core) or by another IIO unit. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
"UMask": "0x01",
"Unit": "IIO"
},
{
- "BriefDescription": "Write request of up to a 64 byte transaction is made to IIO Part3 by the CPU ",
+ "BriefDescription": "Write request of up to a 64 byte transaction is made to IIO Part3 by the CPU",
"Counter": "0,1,2,3",
"EventCode": "0xC1",
"EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART3",
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x08",
- "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part3 by a unit on the main die (generally a core). In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
+ "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part3 by a unit on the main die (generally a core) or by another IIO unit. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
"UMask": "0x01",
"Unit": "IIO"
},
{
+ "BriefDescription": "Peer to peer read request for up to a 64 byte transaction is made by a different IIO unit to IIO Part0",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xC1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.PEER_READ.PART0",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x01",
+ "PublicDescription": "Counts every peer to peer read request for up to a 64 byte transaction of data made by a different IIO unit to the MMIO space of a card on IIO Part0. Does not include requests made by the same IIO unit. In the general case, part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
+ "UMask": "0x08",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer read request for up to a 64 byte transaction is made by a different IIO unit to IIO Part1",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xC1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.PEER_READ.PART1",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x02",
+ "PublicDescription": "Counts every peer to peer read request for up to a 64 byte transaction of data made by a different IIO unit to the MMIO space of a card on IIO Part1. Does not include requests made by the same IIO unit. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
+ "UMask": "0x08",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer read request for up to a 64 byte transaction is made by a different IIO unit to IIO Part2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xC1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.PEER_READ.PART2",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x04",
+ "PublicDescription": "Counts every peer to peer read request for up to a 64 byte transaction of data made by a different IIO unit to the MMIO space of a card on IIO Part2. Does not include requests made by the same IIO unit. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
+ "UMask": "0x08",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer read request for up to a 64 byte transaction is made by a different IIO unit to IIO Part3",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xC1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.PEER_READ.PART3",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x08",
+ "PublicDescription": "Counts every peer to peer read request for up to a 64 byte transaction of data made by a different IIO unit to the MMIO space of a card on IIO Part3. Does not include requests made by the same IIO unit. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
+ "UMask": "0x08",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer write request of up to a 64 byte transaction is made to IIO Part0 by a different IIO unit",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xC1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.PEER_WRITE.PART0",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x01",
+ "PublicDescription": "Counts every peer to peer write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part0 by a different IIO unit. Does not include requests made by the same IIO unit. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
+ "UMask": "0x02",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer write request of up to a 64 byte transaction is made to IIO Part1 by a different IIO unit",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xC1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.PEER_WRITE.PART1",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x02",
+ "PublicDescription": "Counts every peer to peer write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part1 by a different IIO unit. Does not include requests made by the same IIO unit. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
+ "UMask": "0x02",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer write request of up to a 64 byte transaction is made to IIO Part2 by a different IIO unit",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xC1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.PEER_WRITE.PART2",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x04",
+ "PublicDescription": "Counts every peer to peer write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part2 by a different IIO unit. Does not include requests made by the same IIO unit. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
+ "UMask": "0x02",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer write request of up to a 64 byte transaction is made to IIO Part3 by a different IIO unit",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xC1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.PEER_WRITE.PART3",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x08",
+ "PublicDescription": "Counts every peer to peer write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part3 by a different IIO unit. Does not include requests made by the same IIO unit. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
+ "UMask": "0x02",
+ "Unit": "IIO"
+ },
+ {
"BriefDescription": "Read request for up to a 64 byte transaction is made by IIO Part0 to Memory",
"Counter": "0,1,2,3",
"EventCode": "0x84",
@@ -730,6 +1123,102 @@
"Unit": "IIO"
},
{
+ "BriefDescription": "Peer to peer read request of up to a 64 byte transaction is made by IIO Part0 to an IIO target",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_READ.PART0",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x01",
+ "PublicDescription": "Counts every peer to peer read request of up to a 64 byte transaction made by IIO Part0 to the MMIO space of an IIO target. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
+ "UMask": "0x08",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer read request of up to a 64 byte transaction is made by IIO Part1 to an IIO target",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_READ.PART1",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x02",
+ "PublicDescription": "Counts every peer to peer read request of up to a 64 byte transaction made by IIO Part1 to the MMIO space of an IIO target. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
+ "UMask": "0x08",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer read request of up to a 64 byte transaction is made by IIO Part2 to an IIO target",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_READ.PART2",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x04",
+ "PublicDescription": "Counts every peer to peer read request of up to a 64 byte transaction made by IIO Part2 to the MMIO space of an IIO target. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
+ "UMask": "0x08",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer read request of up to a 64 byte transaction is made by IIO Part3 to an IIO target",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_READ.PART3",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x08",
+ "PublicDescription": "Counts every peer to peer read request of up to a 64 byte transaction made by IIO Part3 to the MMIO space of an IIO target. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
+ "UMask": "0x08",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer write request of up to a 64 byte transaction is made by IIO Part0 to an IIO target",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_WRITE.PART0",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x01",
+ "PublicDescription": "Counts every peer to peer write request of up to a 64 byte transaction of data made by IIO Part0 to the MMIO space of an IIO target. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
+ "UMask": "0x02",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer write request of up to a 64 byte transaction is made by IIO Part1 to an IIO target",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_WRITE.PART1",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x02",
+ "PublicDescription": "Counts every peer to peer write request of up to a 64 byte transaction of data made by IIO Part1 to the MMIO space of an IIO target.In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
+ "UMask": "0x02",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer write request of up to a 64 byte transaction is made by IIO Part2 to an IIO target",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_WRITE.PART2",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x04",
+ "PublicDescription": "Counts every peer to peer write request of up to a 64 byte transaction of data made by IIO Part2 to the MMIO space of an IIO target. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
+ "UMask": "0x02",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer write request of up to a 64 byte transaction is made by IIO Part3 to an IIO target",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_WRITE.PART3",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x08",
+ "PublicDescription": "Counts every peer to peer write request of up to a 64 byte transaction of data made by IIO Part3 to the MMIO space of an IIO target. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
+ "UMask": "0x02",
+ "Unit": "IIO"
+ },
+ {
"BriefDescription": "Traffic in which the M2M to iMC Bypass was not taken",
"Counter": "0,1,2,3",
"EventCode": "0x22",
@@ -813,7 +1302,7 @@
"Unit": "M2M"
},
{
- "BriefDescription": "Multi-socket cacheline Directory lookups (cacheline found in A state) ",
+ "BriefDescription": "Multi-socket cacheline Directory lookups (cacheline found in A state)",
"Counter": "0,1,2,3",
"EventCode": "0x2D",
"EventName": "UNC_M2M_DIRECTORY_LOOKUP.STATE_A",
@@ -823,7 +1312,7 @@
"Unit": "M2M"
},
{
- "BriefDescription": "Multi-socket cacheline Directory lookup (cacheline found in I state) ",
+ "BriefDescription": "Multi-socket cacheline Directory lookup (cacheline found in I state)",
"Counter": "0,1,2,3",
"EventCode": "0x2D",
"EventName": "UNC_M2M_DIRECTORY_LOOKUP.STATE_I",
@@ -833,7 +1322,7 @@
"Unit": "M2M"
},
{
- "BriefDescription": "Multi-socket cacheline Directory lookup (cacheline found in S state) ",
+ "BriefDescription": "Multi-socket cacheline Directory lookup (cacheline found in S state)",
"Counter": "0,1,2,3",
"EventCode": "0x2D",
"EventName": "UNC_M2M_DIRECTORY_LOOKUP.STATE_S",
@@ -863,7 +1352,7 @@
"Unit": "M2M"
},
{
- "BriefDescription": "Multi-socket cacheline Directory update from/to Any state ",
+ "BriefDescription": "Multi-socket cacheline Directory update from/to Any state",
"Counter": "0,1,2,3",
"EventCode": "0x2E",
"EventName": "UNC_M2M_DIRECTORY_UPDATE.ANY",
@@ -918,7 +1407,7 @@
"EventCode": "0x37",
"EventName": "UNC_M2M_IMC_READS.ALL",
"PerPkg": "1",
- "PublicDescription": "Counts when the M2M (Mesh to Memory) issues reads to the iMC (Memory Controller). ",
+ "PublicDescription": "Counts when the M2M (Mesh to Memory) issues reads to the iMC (Memory Controller).",
"UMask": "0x4",
"Unit": "M2M"
},
@@ -943,6 +1432,16 @@
"Unit": "M2M"
},
{
+ "BriefDescription": "M2M Writes Issued to iMC; All, regardless of priority.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x38",
+ "EventName": "UNC_M2M_IMC_WRITES.NI",
+ "PerPkg": "1",
+ "PublicDescription": "M2M Writes Issued to iMC; All, regardless of priority.",
+ "UMask": "0x80",
+ "Unit": "M2M"
+ },
+ {
"BriefDescription": "Partial Non-Isochronous writes to the iMC",
"Counter": "0,1,2,3",
"EventCode": "0x38",
@@ -976,12 +1475,77 @@
"EventCode": "0x1",
"EventName": "UNC_M2M_RxC_AD_INSERTS",
"PerPkg": "1",
- "PublicDescription": "Counts when the a new entry is Received(RxC) and then added to the AD (Address Ring) Ingress Queue from the CMS (Common Mesh Stop). This is generally used for reads, and ",
+ "PublicDescription": "Counts when the a new entry is Received(RxC) and then added to the AD (Address Ring) Ingress Queue from the CMS (Common Mesh Stop). This is generally used for reads, and",
"Unit": "M2M"
},
{
- "BriefDescription": "Prefetches generated by the flow control queue of the M3UPI unit.",
+ "BriefDescription": "AD Ingress (from CMS) Occupancy",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2",
+ "EventName": "UNC_M2M_RxC_AD_OCCUPANCY",
+ "PerPkg": "1",
+ "PublicDescription": "AD Ingress (from CMS) Occupancy",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "BL Ingress (from CMS) Allocations",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x5",
+ "EventName": "UNC_M2M_RxC_BL_INSERTS",
+ "PerPkg": "1",
+ "PublicDescription": "BL Ingress (from CMS) Allocations",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "BL Ingress (from CMS) Occupancy",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x6",
+ "EventName": "UNC_M2M_RxC_BL_OCCUPANCY",
+ "PerPkg": "1",
+ "PublicDescription": "BL Ingress (from CMS) Occupancy",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "AD Egress (to CMS) Allocations",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x9",
+ "EventName": "UNC_M2M_TxC_AD_INSERTS",
+ "PerPkg": "1",
+ "PublicDescription": "AD Egress (to CMS) Allocations",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "AD Egress (to CMS) Occupancy",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xA",
+ "EventName": "UNC_M2M_TxC_AD_OCCUPANCY",
+ "PerPkg": "1",
+ "PublicDescription": "AD Egress (to CMS) Occupancy",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "BL Egress (to CMS) Allocations; All",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x15",
+ "EventName": "UNC_M2M_TxC_BL_INSERTS.ALL",
+ "PerPkg": "1",
+ "PublicDescription": "BL Egress (to CMS) Allocations; All",
+ "UMask": "0x03",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "BL Egress (to CMS) Occupancy; All",
"Counter": "0,1,2,3",
+ "EventCode": "0x16",
+ "EventName": "UNC_M2M_TxC_BL_OCCUPANCY.ALL",
+ "PerPkg": "1",
+ "PublicDescription": "BL Egress (to CMS) Occupancy; All",
+ "UMask": "0x03",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "Prefetches generated by the flow control queue of the M3UPI unit.",
+ "Counter": "0,1,2",
"EventCode": "0x29",
"EventName": "UNC_M3UPI_UPI_PREFETCH_SPAWN",
"PerPkg": "1",
@@ -1114,23 +1678,23 @@
"Unit": "UPI LL"
},
{
- "BriefDescription": "UPI interconnect send bandwidth for payload. Derived from unc_upi_txl_flits.all_data",
+ "BriefDescription": "Null FLITs transmitted from any slot",
"Counter": "0,1,2,3",
"EventCode": "0x2",
- "EventName": "UPI_DATA_BANDWIDTH_TX",
+ "EventName": "UNC_UPI_TxL_FLITS.ALL_NULL",
"PerPkg": "1",
- "ScaleUnit": "7.11E-06Bytes",
- "UMask": "0x0F",
+ "PublicDescription": "Counts null FLITs (80 bit FLow control unITs) transmitted via any of the 3 Intel Ulra Path Interconnect (UPI) slots on this UPI unit.",
+ "UMask": "0x27",
"Unit": "UPI LL"
},
{
- "BriefDescription": "Null FLITs transmitted from any slot",
+ "BriefDescription": "Valid Flits Sent; Data",
"Counter": "0,1,2,3",
"EventCode": "0x2",
- "EventName": "UNC_UPI_TxL_FLITS.ALL_NULL",
+ "EventName": "UNC_UPI_TxL_FLITS.DATA",
"PerPkg": "1",
- "PublicDescription": "Counts null FLITs (80 bit FLow control unITs) transmitted via any of the 3 Intel Ulra Path Interconnect (UPI) slots on this UPI unit.",
- "UMask": "0x27",
+ "PublicDescription": "Shows legal flit time (hides impact of L0p and L0c).; Count Data Flits (which consume all slots), but how much to count is based on Slot0-2 mask, so count can be 0-3 depending on which slots are enabled for counting..",
+ "UMask": "0x8",
"Unit": "UPI LL"
},
{
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json b/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json
index 7f466c97e485..bbeee1058096 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json
@@ -1,284 +1,284 @@
[
{
- "EventCode": "0x08",
- "UMask": "0x1",
- "BriefDescription": "Load misses in all DTLB levels that cause page walks",
+ "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.",
"Counter": "0,1,2,3",
- "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK",
- "PublicDescription": "Counts demand data loads that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.STLB_HIT",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x20"
},
{
- "EventCode": "0x08",
- "UMask": "0x2",
- "BriefDescription": "Page walk completed due to a demand data load to a 4K page",
+ "BriefDescription": "Store misses in all DTLB levels that cause page walks",
"Counter": "0,1,2,3",
- "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
- "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK",
+ "PublicDescription": "Counts demand data stores that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x08",
- "UMask": "0x4",
- "BriefDescription": "Page walk completed due to a demand data load to a 2M/4M page",
+ "BriefDescription": "Page walk completed due to a demand data store to a 2M/4M page",
"Counter": "0,1,2,3",
- "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
- "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 2M/4M pages. The page walks can end with or without a page fault.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
+ "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 2M/4M pages. The page walks can end with or without a page fault.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4"
},
{
- "EventCode": "0x08",
- "UMask": "0x8",
- "BriefDescription": "Page walk completed due to a demand data load to a 1G page",
+ "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake.",
"Counter": "0,1,2,3",
- "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
- "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.WALK_PENDING",
+ "PublicDescription": "Counts 1 per cycle for each PMH (Page Miss Handler) that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake michroarchitecture.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x10"
},
{
- "EventCode": "0x08",
- "UMask": "0xe",
- "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)",
+ "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)",
"Counter": "0,1,2,3",
- "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
- "PublicDescription": "Counts demand data loads that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
+ "PublicDescription": "Counts completed page walks (4K page size) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x2"
},
{
- "EventCode": "0x08",
- "UMask": "0x10",
- "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.",
+ "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.",
"Counter": "0,1,2,3",
- "EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
- "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake microarchitecture.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xAE",
+ "EventName": "ITLB.ITLB_FLUSH",
+ "PublicDescription": "Counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).",
+ "SampleAfterValue": "100007",
+ "UMask": "0x1"
},
{
- "EventCode": "0x08",
- "UMask": "0x10",
- "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.",
+ "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake.",
"Counter": "0,1,2,3",
- "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
"CounterMask": "1",
- "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a load.",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.WALK_ACTIVE",
+ "PublicDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake microarchitecture.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x10"
},
{
- "EventCode": "0x08",
- "UMask": "0x20",
"BriefDescription": "Loads that miss the DTLB and hit the STLB.",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x08",
"EventName": "DTLB_LOAD_MISSES.STLB_HIT",
"PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
- },
- {
- "EventCode": "0x49",
- "UMask": "0x1",
- "BriefDescription": "Store misses in all DTLB levels that cause page walks",
- "Counter": "0,1,2,3",
- "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK",
- "PublicDescription": "Counts demand data stores that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x20"
},
{
- "EventCode": "0x49",
- "UMask": "0x2",
- "BriefDescription": "Page walk completed due to a demand data store to a 4K page",
- "Counter": "0,1,2,3",
- "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
- "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
- },
- {
- "EventCode": "0x49",
- "UMask": "0x4",
- "BriefDescription": "Page walk completed due to a demand data store to a 2M/4M page",
+ "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.",
"Counter": "0,1,2,3",
- "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
- "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 2M/4M pages. The page walks can end with or without a page fault.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
- },
- {
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
"EventCode": "0x49",
- "UMask": "0x8",
- "BriefDescription": "Page walk completed due to a demand data store to a 1G page",
- "Counter": "0,1,2,3",
- "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
- "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 1G pages. The page walks can end with or without a page fault.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "EventName": "DTLB_STORE_MISSES.WALK_PENDING",
+ "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake microarchitecture.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x10"
},
{
- "EventCode": "0x49",
- "UMask": "0xe",
- "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)",
+ "BriefDescription": "DTLB flush attempts of the thread-specific entries",
"Counter": "0,1,2,3",
- "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
- "PublicDescription": "Counts demand data stores that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xBD",
+ "EventName": "TLB_FLUSH.DTLB_THREAD",
+ "PublicDescription": "Counts the number of DTLB flush attempts of the thread-specific entries.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x1"
},
{
- "EventCode": "0x49",
- "UMask": "0x10",
- "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.",
+ "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.",
"Counter": "0,1,2,3",
- "EventName": "DTLB_STORE_MISSES.WALK_PENDING",
- "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake microarchitecture.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
+ "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake microarchitecture.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x10"
},
{
- "EventCode": "0x49",
- "UMask": "0x10",
"BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.",
"Counter": "0,1,2,3",
- "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
"CounterMask": "1",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE",
"PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x10"
},
{
- "EventCode": "0x49",
- "UMask": "0x20",
- "BriefDescription": "Stores that miss the DTLB and hit the STLB.",
+ "BriefDescription": "Misses at all ITLB levels that cause page walks",
"Counter": "0,1,2,3",
- "EventName": "DTLB_STORE_MISSES.STLB_HIT",
- "PublicDescription": "Stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK",
+ "PublicDescription": "Counts page walks of any page size (4K/2M/4M/1G) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB, but the walk need not have completed.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0x4F",
- "UMask": "0x10",
- "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a EPT (Extended Page Table) walk for any request type.",
+ "BriefDescription": "Stores that miss the DTLB and hit the STLB.",
"Counter": "0,1,2,3",
- "EventName": "EPT.WALK_PENDING",
- "PublicDescription": "Counts cycles for each PMH (Page Miss Handler) that is busy with an EPT (Extended Page Table) walk for any request type.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+ "PublicDescription": "Stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).",
+ "SampleAfterValue": "100003",
+ "UMask": "0x20"
},
{
- "EventCode": "0x85",
- "UMask": "0x1",
- "BriefDescription": "Misses at all ITLB levels that cause page walks",
+ "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)",
"Counter": "0,1,2,3",
- "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK",
- "PublicDescription": "Counts page walks of any page size (4K/2M/4M/1G) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB, but the walk need not have completed.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+ "PublicDescription": "Counts demand data stores that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0xe"
},
{
- "EventCode": "0x85",
- "UMask": "0x2",
- "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)",
+ "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)",
"Counter": "0,1,2,3",
- "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
- "PublicDescription": "Counts completed page walks (4K page size) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+ "PublicDescription": "Counts demand data loads that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0xe"
},
{
- "EventCode": "0x85",
- "UMask": "0x4",
- "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)",
+ "BriefDescription": "Page walk completed due to a demand data store to a 4K page",
"Counter": "0,1,2,3",
- "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
- "PublicDescription": "Counts code misses in all ITLB levels that caused a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
+ "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x2"
},
{
- "EventCode": "0x85",
- "UMask": "0x8",
"BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (1G)",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x85",
"EventName": "ITLB_MISSES.WALK_COMPLETED_1G",
"PublicDescription": "Counts store misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x8"
},
{
- "EventCode": "0x85",
- "UMask": "0xe",
"BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x85",
"EventName": "ITLB_MISSES.WALK_COMPLETED",
"PublicDescription": "Counts completed page walks (2M and 4M page sizes) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0xe"
},
{
- "EventCode": "0x85",
- "UMask": "0x10",
- "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake.",
+ "BriefDescription": "Page walk completed due to a demand data load to a 4K page",
"Counter": "0,1,2,3",
- "EventName": "ITLB_MISSES.WALK_PENDING",
- "PublicDescription": "Counts 1 per cycle for each PMH (Page Miss Handler) that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake michroarchitecture.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
+ "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
},
{
- "EventCode": "0x85",
- "UMask": "0x10",
- "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake.",
+ "BriefDescription": "Page walk completed due to a demand data load to a 2M/4M page",
"Counter": "0,1,2,3",
- "EventName": "ITLB_MISSES.WALK_ACTIVE",
- "CounterMask": "1",
- "PublicDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake microarchitecture.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
+ "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 2M/4M pages. The page walks can end with or without a page fault.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4"
},
{
- "EventCode": "0x85",
- "UMask": "0x20",
- "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.",
+ "BriefDescription": "Load misses in all DTLB levels that cause page walks",
"Counter": "0,1,2,3",
- "EventName": "ITLB_MISSES.STLB_HIT",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK",
+ "PublicDescription": "Counts demand data loads that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
- },
- {
- "EventCode": "0xAE",
- "UMask": "0x1",
- "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.",
- "Counter": "0,1,2,3",
- "EventName": "ITLB.ITLB_FLUSH",
- "PublicDescription": "Counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xBD",
- "UMask": "0x1",
- "BriefDescription": "DTLB flush attempts of the thread-specific entries",
+ "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a EPT (Extended Page Table) walk for any request type.",
"Counter": "0,1,2,3",
- "EventName": "TLB_FLUSH.DTLB_THREAD",
- "PublicDescription": "Counts the number of DTLB flush attempts of the thread-specific entries.",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x4F",
+ "EventName": "EPT.WALK_PENDING",
+ "PublicDescription": "Counts cycles for each PMH (Page Miss Handler) that is busy with an EPT (Extended Page Table) walk for any request type.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x10"
},
{
- "EventCode": "0xBD",
- "UMask": "0x20",
"BriefDescription": "STLB flush attempts",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xBD",
"EventName": "TLB_FLUSH.STLB_ANY",
"PublicDescription": "Counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, etc.).",
"SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x20"
+ },
+ {
+ "BriefDescription": "Page walk completed due to a demand data load to a 1G page",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
+ "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a load.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
+ "PublicDescription": "Counts code misses in all ITLB levels that caused a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Page walk completed due to a demand data store to a 1G page",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
+ "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 1G pages. The page walks can end with or without a page fault.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x8"
}
] \ No newline at end of file
diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index fc9c158bfa13..e47644cab3fa 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -48,11 +48,40 @@
#include <linux/list.h>
#include "jsmn.h"
#include "json.h"
-#include "jevents.h"
+#include "pmu-events.h"
int verbose;
char *prog;
+struct json_event {
+ char *name;
+ char *event;
+ char *desc;
+ char *long_desc;
+ char *pmu;
+ char *unit;
+ char *perpkg;
+ char *aggr_mode;
+ char *metric_expr;
+ char *metric_name;
+ char *metric_group;
+ char *deprecated;
+ char *metric_constraint;
+};
+
+enum aggr_mode_class convert(const char *aggr_mode)
+{
+ if (!strcmp(aggr_mode, "PerCore"))
+ return PerCore;
+ else if (!strcmp(aggr_mode, "PerChip"))
+ return PerChip;
+
+ pr_err("%s: Wrong AggregationMode value '%s'\n", prog, aggr_mode);
+ return -1;
+}
+
+typedef int (*func)(void *data, struct json_event *je);
+
int eprintf(int level, int var, const char *fmt, ...)
{
@@ -71,11 +100,6 @@ int eprintf(int level, int var, const char *fmt, ...)
return ret;
}
-__attribute__((weak)) char *get_cpu_str(void)
-{
- return NULL;
-}
-
static void addfield(char *map, char **dst, const char *sep,
const char *a, jsmntok_t *bt)
{
@@ -240,6 +264,7 @@ static struct map {
{ "hisi_sccl,hha", "hisi_sccl,hha" },
{ "hisi_sccl,l3c", "hisi_sccl,l3c" },
{ "L3PMC", "amd_l3" },
+ { "DFPMC", "amd_df" },
{}
};
@@ -318,12 +343,7 @@ static void print_events_table_prefix(FILE *fp, const char *tblname)
close_table = 1;
}
-static int print_events_table_entry(void *data, char *name, char *event,
- char *desc, char *long_desc,
- char *pmu, char *unit, char *perpkg,
- char *metric_expr,
- char *metric_name, char *metric_group,
- char *deprecated, char *metric_constraint)
+static int print_events_table_entry(void *data, struct json_event *je)
{
struct perf_entry_data *pd = data;
FILE *outfp = pd->outfp;
@@ -335,30 +355,32 @@ static int print_events_table_entry(void *data, char *name, char *event,
*/
fprintf(outfp, "{\n");
- if (name)
- fprintf(outfp, "\t.name = \"%s\",\n", name);
- if (event)
- fprintf(outfp, "\t.event = \"%s\",\n", event);
- fprintf(outfp, "\t.desc = \"%s\",\n", desc);
+ if (je->name)
+ fprintf(outfp, "\t.name = \"%s\",\n", je->name);
+ if (je->event)
+ fprintf(outfp, "\t.event = \"%s\",\n", je->event);
+ fprintf(outfp, "\t.desc = \"%s\",\n", je->desc);
fprintf(outfp, "\t.topic = \"%s\",\n", topic);
- if (long_desc && long_desc[0])
- fprintf(outfp, "\t.long_desc = \"%s\",\n", long_desc);
- if (pmu)
- fprintf(outfp, "\t.pmu = \"%s\",\n", pmu);
- if (unit)
- fprintf(outfp, "\t.unit = \"%s\",\n", unit);
- if (perpkg)
- fprintf(outfp, "\t.perpkg = \"%s\",\n", perpkg);
- if (metric_expr)
- fprintf(outfp, "\t.metric_expr = \"%s\",\n", metric_expr);
- if (metric_name)
- fprintf(outfp, "\t.metric_name = \"%s\",\n", metric_name);
- if (metric_group)
- fprintf(outfp, "\t.metric_group = \"%s\",\n", metric_group);
- if (deprecated)
- fprintf(outfp, "\t.deprecated = \"%s\",\n", deprecated);
- if (metric_constraint)
- fprintf(outfp, "\t.metric_constraint = \"%s\",\n", metric_constraint);
+ if (je->long_desc && je->long_desc[0])
+ fprintf(outfp, "\t.long_desc = \"%s\",\n", je->long_desc);
+ if (je->pmu)
+ fprintf(outfp, "\t.pmu = \"%s\",\n", je->pmu);
+ if (je->unit)
+ fprintf(outfp, "\t.unit = \"%s\",\n", je->unit);
+ if (je->perpkg)
+ fprintf(outfp, "\t.perpkg = \"%s\",\n", je->perpkg);
+ if (je->aggr_mode)
+ fprintf(outfp, "\t.aggr_mode = \"%d\",\n", convert(je->aggr_mode));
+ if (je->metric_expr)
+ fprintf(outfp, "\t.metric_expr = \"%s\",\n", je->metric_expr);
+ if (je->metric_name)
+ fprintf(outfp, "\t.metric_name = \"%s\",\n", je->metric_name);
+ if (je->metric_group)
+ fprintf(outfp, "\t.metric_group = \"%s\",\n", je->metric_group);
+ if (je->deprecated)
+ fprintf(outfp, "\t.deprecated = \"%s\",\n", je->deprecated);
+ if (je->metric_constraint)
+ fprintf(outfp, "\t.metric_constraint = \"%s\",\n", je->metric_constraint);
fprintf(outfp, "},\n");
return 0;
@@ -373,6 +395,7 @@ struct event_struct {
char *pmu;
char *unit;
char *perpkg;
+ char *aggr_mode;
char *metric_expr;
char *metric_name;
char *metric_group;
@@ -380,17 +403,17 @@ struct event_struct {
char *metric_constraint;
};
-#define ADD_EVENT_FIELD(field) do { if (field) { \
- es->field = strdup(field); \
+#define ADD_EVENT_FIELD(field) do { if (je->field) { \
+ es->field = strdup(je->field); \
if (!es->field) \
goto out_free; \
} } while (0)
#define FREE_EVENT_FIELD(field) free(es->field)
-#define TRY_FIXUP_FIELD(field) do { if (es->field && !*field) {\
- *field = strdup(es->field); \
- if (!*field) \
+#define TRY_FIXUP_FIELD(field) do { if (es->field && !je->field) {\
+ je->field = strdup(es->field); \
+ if (!je->field) \
return -ENOMEM; \
} } while (0)
@@ -402,6 +425,7 @@ struct event_struct {
op(pmu); \
op(unit); \
op(perpkg); \
+ op(aggr_mode); \
op(metric_expr); \
op(metric_name); \
op(metric_group); \
@@ -421,11 +445,7 @@ static void free_arch_std_events(void)
}
}
-static int save_arch_std_events(void *data, char *name, char *event,
- char *desc, char *long_desc, char *pmu,
- char *unit, char *perpkg, char *metric_expr,
- char *metric_name, char *metric_group,
- char *deprecated, char *metric_constraint)
+static int save_arch_std_events(void *data, struct json_event *je)
{
struct event_struct *es;
@@ -485,23 +505,15 @@ static char *real_event(const char *name, char *event)
}
static int
-try_fixup(const char *fn, char *arch_std, char **event, char **desc,
- char **name, char **long_desc, char **pmu, char **filter,
- char **perpkg, char **unit, char **metric_expr, char **metric_name,
- char **metric_group, unsigned long long eventcode,
- char **deprecated, char **metric_constraint)
+try_fixup(const char *fn, char *arch_std, struct json_event *je, char **event)
{
/* try to find matching event from arch standard values */
struct event_struct *es;
list_for_each_entry(es, &arch_std_events, list) {
if (!strcmp(arch_std, es->name)) {
- if (!eventcode && es->event) {
- /* allow EventCode to be overridden */
- free(*event);
- *event = NULL;
- }
FOR_ALL_EVENT_STRUCT_FIELDS(TRY_FIXUP_FIELD);
+ *event = je->event;
return 0;
}
}
@@ -512,14 +524,9 @@ try_fixup(const char *fn, char *arch_std, char **event, char **desc,
}
/* Call func with each event in the json file */
-int json_events(const char *fn,
- int (*func)(void *data, char *name, char *event, char *desc,
- char *long_desc,
- char *pmu, char *unit, char *perpkg,
- char *metric_expr,
- char *metric_name, char *metric_group,
- char *deprecated, char *metric_constraint),
- void *data)
+static int json_events(const char *fn,
+ int (*func)(void *data, struct json_event *je),
+ void *data)
{
int err;
size_t size;
@@ -537,18 +544,10 @@ int json_events(const char *fn,
EXPECT(tokens->type == JSMN_ARRAY, tokens, "expected top level array");
tok = tokens + 1;
for (i = 0; i < tokens->size; i++) {
- char *event = NULL, *desc = NULL, *name = NULL;
- char *long_desc = NULL;
+ char *event = NULL;
char *extra_desc = NULL;
- char *pmu = NULL;
char *filter = NULL;
- char *perpkg = NULL;
- char *unit = NULL;
- char *metric_expr = NULL;
- char *metric_name = NULL;
- char *metric_group = NULL;
- char *deprecated = NULL;
- char *metric_constraint = NULL;
+ struct json_event je = {};
char *arch_std = NULL;
unsigned long long eventcode = 0;
struct msrmap *msr = NULL;
@@ -583,14 +582,14 @@ int json_events(const char *fn,
eventcode |= strtoul(code, NULL, 0) << 21;
free(code);
} else if (json_streq(map, field, "EventName")) {
- addfield(map, &name, "", "", val);
+ addfield(map, &je.name, "", "", val);
} else if (json_streq(map, field, "BriefDescription")) {
- addfield(map, &desc, "", "", val);
- fixdesc(desc);
+ addfield(map, &je.desc, "", "", val);
+ fixdesc(je.desc);
} else if (json_streq(map, field,
"PublicDescription")) {
- addfield(map, &long_desc, "", "", val);
- fixdesc(long_desc);
+ addfield(map, &je.long_desc, "", "", val);
+ fixdesc(je.long_desc);
} else if (json_streq(map, field, "PEBS") && nz) {
precise = val;
} else if (json_streq(map, field, "MSRIndex") && nz) {
@@ -610,34 +609,36 @@ int json_events(const char *fn,
ppmu = field_to_perf(unit_to_pmu, map, val);
if (ppmu) {
- pmu = strdup(ppmu);
+ je.pmu = strdup(ppmu);
} else {
- if (!pmu)
- pmu = strdup("uncore_");
- addfield(map, &pmu, "", "", val);
- for (s = pmu; *s; s++)
+ if (!je.pmu)
+ je.pmu = strdup("uncore_");
+ addfield(map, &je.pmu, "", "", val);
+ for (s = je.pmu; *s; s++)
*s = tolower(*s);
}
- addfield(map, &desc, ". ", "Unit: ", NULL);
- addfield(map, &desc, "", pmu, NULL);
- addfield(map, &desc, "", " ", NULL);
+ addfield(map, &je.desc, ". ", "Unit: ", NULL);
+ addfield(map, &je.desc, "", je.pmu, NULL);
+ addfield(map, &je.desc, "", " ", NULL);
} else if (json_streq(map, field, "Filter")) {
addfield(map, &filter, "", "", val);
} else if (json_streq(map, field, "ScaleUnit")) {
- addfield(map, &unit, "", "", val);
+ addfield(map, &je.unit, "", "", val);
} else if (json_streq(map, field, "PerPkg")) {
- addfield(map, &perpkg, "", "", val);
+ addfield(map, &je.perpkg, "", "", val);
+ } else if (json_streq(map, field, "AggregationMode")) {
+ addfield(map, &je.aggr_mode, "", "", val);
} else if (json_streq(map, field, "Deprecated")) {
- addfield(map, &deprecated, "", "", val);
+ addfield(map, &je.deprecated, "", "", val);
} else if (json_streq(map, field, "MetricName")) {
- addfield(map, &metric_name, "", "", val);
+ addfield(map, &je.metric_name, "", "", val);
} else if (json_streq(map, field, "MetricGroup")) {
- addfield(map, &metric_group, "", "", val);
+ addfield(map, &je.metric_group, "", "", val);
} else if (json_streq(map, field, "MetricConstraint")) {
- addfield(map, &metric_constraint, "", "", val);
+ addfield(map, &je.metric_constraint, "", "", val);
} else if (json_streq(map, field, "MetricExpr")) {
- addfield(map, &metric_expr, "", "", val);
- for (s = metric_expr; *s; s++)
+ addfield(map, &je.metric_expr, "", "", val);
+ for (s = je.metric_expr; *s; s++)
*s = tolower(*s);
} else if (json_streq(map, field, "ArchStdEvent")) {
addfield(map, &arch_std, "", "", val);
@@ -646,7 +647,7 @@ int json_events(const char *fn,
}
/* ignore unknown fields */
}
- if (precise && desc && !strstr(desc, "(Precise Event)")) {
+ if (precise && je.desc && !strstr(je.desc, "(Precise Event)")) {
if (json_streq(map, precise, "2"))
addfield(map, &extra_desc, " ",
"(Must be precise)", NULL);
@@ -656,48 +657,44 @@ int json_events(const char *fn,
}
snprintf(buf, sizeof buf, "event=%#llx", eventcode);
addfield(map, &event, ",", buf, NULL);
- if (desc && extra_desc)
- addfield(map, &desc, " ", extra_desc, NULL);
- if (long_desc && extra_desc)
- addfield(map, &long_desc, " ", extra_desc, NULL);
+ if (je.desc && extra_desc)
+ addfield(map, &je.desc, " ", extra_desc, NULL);
+ if (je.long_desc && extra_desc)
+ addfield(map, &je.long_desc, " ", extra_desc, NULL);
if (filter)
addfield(map, &event, ",", filter, NULL);
if (msr != NULL)
addfield(map, &event, ",", msr->pname, msrval);
- if (name)
- fixname(name);
+ if (je.name)
+ fixname(je.name);
if (arch_std) {
/*
* An arch standard event is referenced, so try to
* fixup any unassigned values.
*/
- err = try_fixup(fn, arch_std, &event, &desc, &name,
- &long_desc, &pmu, &filter, &perpkg,
- &unit, &metric_expr, &metric_name,
- &metric_group, eventcode,
- &deprecated, &metric_constraint);
+ err = try_fixup(fn, arch_std, &je, &event);
if (err)
goto free_strings;
}
- err = func(data, name, real_event(name, event), desc, long_desc,
- pmu, unit, perpkg, metric_expr, metric_name,
- metric_group, deprecated, metric_constraint);
+ je.event = real_event(je.name, event);
+ err = func(data, &je);
free_strings:
free(event);
- free(desc);
- free(name);
- free(long_desc);
+ free(je.desc);
+ free(je.name);
+ free(je.long_desc);
free(extra_desc);
- free(pmu);
+ free(je.pmu);
free(filter);
- free(perpkg);
- free(deprecated);
- free(unit);
- free(metric_expr);
- free(metric_name);
- free(metric_group);
- free(metric_constraint);
+ free(je.perpkg);
+ free(je.aggr_mode);
+ free(je.deprecated);
+ free(je.unit);
+ free(je.metric_expr);
+ free(je.metric_name);
+ free(je.metric_group);
+ free(je.metric_constraint);
free(arch_std);
if (err)
diff --git a/tools/perf/pmu-events/jevents.h b/tools/perf/pmu-events/jevents.h
deleted file mode 100644
index 2afc8304529e..000000000000
--- a/tools/perf/pmu-events/jevents.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef JEVENTS_H
-#define JEVENTS_H 1
-
-int json_events(const char *fn,
- int (*func)(void *data, char *name, char *event, char *desc,
- char *long_desc,
- char *pmu,
- char *unit, char *perpkg, char *metric_expr,
- char *metric_name, char *metric_group,
- char *deprecated, char *metric_constraint),
- void *data);
-char *get_cpu_str(void);
-
-#ifndef min
-#define min(x, y) ({ \
- typeof(x) _min1 = (x); \
- typeof(y) _min2 = (y); \
- (void) (&_min1 == &_min2); \
- _min1 < _min2 ? _min1 : _min2; })
-#endif
-
-#endif
diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h
index c8f306b572f4..7da1a3743b77 100644
--- a/tools/perf/pmu-events/pmu-events.h
+++ b/tools/perf/pmu-events/pmu-events.h
@@ -2,6 +2,11 @@
#ifndef PMU_EVENTS_H
#define PMU_EVENTS_H
+enum aggr_mode_class {
+ PerChip = 1,
+ PerCore
+};
+
/*
* Describe each PMU event. Each CPU has a table of PMU events.
*/
@@ -14,6 +19,7 @@ struct pmu_event {
const char *pmu;
const char *unit;
const char *perpkg;
+ const char *aggr_mode;
const char *metric_expr;
const char *metric_name;
const char *metric_group;
diff --git a/tools/perf/scripts/python/futex-contention.py b/tools/perf/scripts/python/futex-contention.py
index 0c4841acf75d..7e884d46f920 100644
--- a/tools/perf/scripts/python/futex-contention.py
+++ b/tools/perf/scripts/python/futex-contention.py
@@ -12,41 +12,46 @@
from __future__ import print_function
-import os, sys
-sys.path.append(os.environ['PERF_EXEC_PATH'] + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+import os
+import sys
+sys.path.append(os.environ['PERF_EXEC_PATH'] +
+ '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
from Util import *
process_names = {}
thread_thislock = {}
thread_blocktime = {}
-lock_waits = {} # long-lived stats on (tid,lock) blockage elapsed time
-process_names = {} # long-lived pid-to-execname mapping
+lock_waits = {} # long-lived stats on (tid,lock) blockage elapsed time
+process_names = {} # long-lived pid-to-execname mapping
+
def syscalls__sys_enter_futex(event, ctxt, cpu, s, ns, tid, comm, callchain,
- nr, uaddr, op, val, utime, uaddr2, val3):
- cmd = op & FUTEX_CMD_MASK
- if cmd != FUTEX_WAIT:
- return # we don't care about originators of WAKE events
+ nr, uaddr, op, val, utime, uaddr2, val3):
+ cmd = op & FUTEX_CMD_MASK
+ if cmd != FUTEX_WAIT:
+ return # we don't care about originators of WAKE events
+
+ process_names[tid] = comm
+ thread_thislock[tid] = uaddr
+ thread_blocktime[tid] = nsecs(s, ns)
- process_names[tid] = comm
- thread_thislock[tid] = uaddr
- thread_blocktime[tid] = nsecs(s, ns)
def syscalls__sys_exit_futex(event, ctxt, cpu, s, ns, tid, comm, callchain,
- nr, ret):
- if tid in thread_blocktime:
- elapsed = nsecs(s, ns) - thread_blocktime[tid]
- add_stats(lock_waits, (tid, thread_thislock[tid]), elapsed)
- del thread_blocktime[tid]
- del thread_thislock[tid]
+ nr, ret):
+ if tid in thread_blocktime:
+ elapsed = nsecs(s, ns) - thread_blocktime[tid]
+ add_stats(lock_waits, (tid, thread_thislock[tid]), elapsed)
+ del thread_blocktime[tid]
+ del thread_thislock[tid]
+
def trace_begin():
- print("Press control+C to stop and show the summary")
+ print("Press control+C to stop and show the summary")
-def trace_end():
- for (tid, lock) in lock_waits:
- min, max, avg, count = lock_waits[tid, lock]
- print("%s[%d] lock %x contended %d times, %d avg ns" %
- (process_names[tid], tid, lock, count, avg))
+def trace_end():
+ for (tid, lock) in lock_waits:
+ min, max, avg, count = lock_waits[tid, lock]
+ print("%s[%d] lock %x contended %d times, %d avg ns [max: %d ns, min %d ns]" %
+ (process_names[tid], tid, lock, count, avg, max, min))
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index 84352fc49a20..4d15bf6041fb 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -60,6 +60,8 @@ perf-y += api-io.o
perf-y += demangle-java-test.o
perf-y += pfm.o
perf-y += parse-metric.o
+perf-y += pe-file-parsing.o
+perf-y += expand-cgroup.o
$(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
$(call rule_mkdir)
diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c
index a9599ab8c471..ec972e0892ab 100644
--- a/tools/perf/tests/attr.c
+++ b/tools/perf/tests/attr.c
@@ -30,9 +30,9 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
-#include "../perf-sys.h"
#include <subcmd/exec-cmd.h>
#include "event.h"
+#include "util.h"
#include "tests.h"
#define ENV "PERF_TEST_ATTR"
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index d328caaba45d..132bdb3e6c31 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -342,6 +342,14 @@ static struct test generic_tests[] = {
.func = test__parse_metric,
},
{
+ .desc = "PE file support",
+ .func = test__pe_file_parsing,
+ },
+ {
+ .desc = "Event expansion for cgroups",
+ .func = test__expand_cgroup_events,
+ },
+ {
.func = NULL,
},
};
diff --git a/tools/perf/tests/expand-cgroup.c b/tools/perf/tests/expand-cgroup.c
new file mode 100644
index 000000000000..d5771e4d094f
--- /dev/null
+++ b/tools/perf/tests/expand-cgroup.c
@@ -0,0 +1,241 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "tests.h"
+#include "debug.h"
+#include "evlist.h"
+#include "cgroup.h"
+#include "rblist.h"
+#include "metricgroup.h"
+#include "parse-events.h"
+#include "pmu-events/pmu-events.h"
+#include "pfm.h"
+#include <subcmd/parse-options.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+static int test_expand_events(struct evlist *evlist,
+ struct rblist *metric_events)
+{
+ int i, ret = TEST_FAIL;
+ int nr_events;
+ bool was_group_event;
+ int nr_members; /* for the first evsel only */
+ const char cgrp_str[] = "A,B,C";
+ const char *cgrp_name[] = { "A", "B", "C" };
+ int nr_cgrps = ARRAY_SIZE(cgrp_name);
+ char **ev_name;
+ struct evsel *evsel;
+
+ TEST_ASSERT_VAL("evlist is empty", !perf_evlist__empty(evlist));
+
+ nr_events = evlist->core.nr_entries;
+ ev_name = calloc(nr_events, sizeof(*ev_name));
+ if (ev_name == NULL) {
+ pr_debug("memory allocation failure\n");
+ return TEST_FAIL;
+ }
+ i = 0;
+ evlist__for_each_entry(evlist, evsel) {
+ ev_name[i] = strdup(evsel->name);
+ if (ev_name[i] == NULL) {
+ pr_debug("memory allocation failure\n");
+ goto out;
+ }
+ i++;
+ }
+ /* remember grouping info */
+ was_group_event = evsel__is_group_event(evlist__first(evlist));
+ nr_members = evlist__first(evlist)->core.nr_members;
+
+ ret = evlist__expand_cgroup(evlist, cgrp_str, metric_events, false);
+ if (ret < 0) {
+ pr_debug("failed to expand events for cgroups\n");
+ goto out;
+ }
+
+ ret = TEST_FAIL;
+ if (evlist->core.nr_entries != nr_events * nr_cgrps) {
+ pr_debug("event count doesn't match\n");
+ goto out;
+ }
+
+ i = 0;
+ evlist__for_each_entry(evlist, evsel) {
+ if (strcmp(evsel->name, ev_name[i % nr_events])) {
+ pr_debug("event name doesn't match:\n");
+ pr_debug(" evsel[%d]: %s\n expected: %s\n",
+ i, evsel->name, ev_name[i % nr_events]);
+ goto out;
+ }
+ if (strcmp(evsel->cgrp->name, cgrp_name[i / nr_events])) {
+ pr_debug("cgroup name doesn't match:\n");
+ pr_debug(" evsel[%d]: %s\n expected: %s\n",
+ i, evsel->cgrp->name, cgrp_name[i / nr_events]);
+ goto out;
+ }
+
+ if ((i % nr_events) == 0) {
+ if (evsel__is_group_event(evsel) != was_group_event) {
+ pr_debug("event group doesn't match: got %s, expect %s\n",
+ evsel__is_group_event(evsel) ? "true" : "false",
+ was_group_event ? "true" : "false");
+ goto out;
+ }
+ if (evsel->core.nr_members != nr_members) {
+ pr_debug("event group member doesn't match: %d vs %d\n",
+ evsel->core.nr_members, nr_members);
+ goto out;
+ }
+ }
+ i++;
+ }
+ ret = TEST_OK;
+
+out: for (i = 0; i < nr_events; i++)
+ free(ev_name[i]);
+ free(ev_name);
+ return ret;
+}
+
+static int expand_default_events(void)
+{
+ int ret;
+ struct evlist *evlist;
+ struct rblist metric_events;
+
+ evlist = perf_evlist__new_default();
+ TEST_ASSERT_VAL("failed to get evlist", evlist);
+
+ rblist__init(&metric_events);
+ ret = test_expand_events(evlist, &metric_events);
+ evlist__delete(evlist);
+ return ret;
+}
+
+static int expand_group_events(void)
+{
+ int ret;
+ struct evlist *evlist;
+ struct rblist metric_events;
+ struct parse_events_error err;
+ const char event_str[] = "{cycles,instructions}";
+
+ symbol_conf.event_group = true;
+
+ evlist = evlist__new();
+ TEST_ASSERT_VAL("failed to get evlist", evlist);
+
+ ret = parse_events(evlist, event_str, &err);
+ if (ret < 0) {
+ pr_debug("failed to parse event '%s', err %d, str '%s'\n",
+ event_str, ret, err.str);
+ parse_events_print_error(&err, event_str);
+ goto out;
+ }
+
+ rblist__init(&metric_events);
+ ret = test_expand_events(evlist, &metric_events);
+out:
+ evlist__delete(evlist);
+ return ret;
+}
+
+static int expand_libpfm_events(void)
+{
+ int ret;
+ struct evlist *evlist;
+ struct rblist metric_events;
+ const char event_str[] = "UNHALTED_CORE_CYCLES";
+ struct option opt = {
+ .value = &evlist,
+ };
+
+ symbol_conf.event_group = true;
+
+ evlist = evlist__new();
+ TEST_ASSERT_VAL("failed to get evlist", evlist);
+
+ ret = parse_libpfm_events_option(&opt, event_str, 0);
+ if (ret < 0) {
+ pr_debug("failed to parse libpfm event '%s', err %d\n",
+ event_str, ret);
+ goto out;
+ }
+ if (perf_evlist__empty(evlist)) {
+ pr_debug("libpfm was not enabled\n");
+ goto out;
+ }
+
+ rblist__init(&metric_events);
+ ret = test_expand_events(evlist, &metric_events);
+out:
+ evlist__delete(evlist);
+ return ret;
+}
+
+static int expand_metric_events(void)
+{
+ int ret;
+ struct evlist *evlist;
+ struct rblist metric_events;
+ const char metric_str[] = "CPI";
+
+ struct pmu_event pme_test[] = {
+ {
+ .metric_expr = "instructions / cycles",
+ .metric_name = "IPC",
+ },
+ {
+ .metric_expr = "1 / IPC",
+ .metric_name = "CPI",
+ },
+ {
+ .metric_expr = NULL,
+ .metric_name = NULL,
+ },
+ };
+ struct pmu_events_map ev_map = {
+ .cpuid = "test",
+ .version = "1",
+ .type = "core",
+ .table = pme_test,
+ };
+
+ evlist = evlist__new();
+ TEST_ASSERT_VAL("failed to get evlist", evlist);
+
+ rblist__init(&metric_events);
+ ret = metricgroup__parse_groups_test(evlist, &ev_map, metric_str,
+ false, false, &metric_events);
+ if (ret < 0) {
+ pr_debug("failed to parse '%s' metric\n", metric_str);
+ goto out;
+ }
+
+ ret = test_expand_events(evlist, &metric_events);
+
+out:
+ metricgroup__rblist_exit(&metric_events);
+ evlist__delete(evlist);
+ return ret;
+}
+
+int test__expand_cgroup_events(struct test *test __maybe_unused,
+ int subtest __maybe_unused)
+{
+ int ret;
+
+ ret = expand_default_events();
+ TEST_ASSERT_EQUAL("failed to expand default events", ret, 0);
+
+ ret = expand_group_events();
+ TEST_ASSERT_EQUAL("failed to expand event group", ret, 0);
+
+ ret = expand_libpfm_events();
+ TEST_ASSERT_EQUAL("failed to expand event group", ret, 0);
+
+ ret = expand_metric_events();
+ TEST_ASSERT_EQUAL("failed to expand metric events", ret, 0);
+
+ return ret;
+}
diff --git a/tools/perf/tests/make b/tools/perf/tests/make
index 9b651dfe0a6b..a90fa043c066 100644
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make
@@ -91,6 +91,7 @@ make_no_sdt := NO_SDT=1
make_no_syscall_tbl := NO_SYSCALL_TABLE=1
make_with_clangllvm := LIBCLANGLLVM=1
make_with_libpfm4 := LIBPFM4=1
+make_with_gtk2 := GTK2=1
make_tags := tags
make_cscope := cscope
make_help := help
@@ -154,6 +155,7 @@ run += make_no_syscall_tbl
run += make_with_babeltrace
run += make_with_clangllvm
run += make_with_libpfm4
+run += make_with_gtk2
run += make_help
run += make_doc
run += make_perf_o
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index aae0fd9045c1..611512f22b34 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -557,6 +557,7 @@ static int test__checkevent_pmu_events(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned);
+ TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive);
return 0;
}
@@ -575,6 +576,7 @@ static int test__checkevent_pmu_events_mix(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned);
+ TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive);
/* cpu/pmu-event/u*/
evsel = evsel__next(evsel);
@@ -587,6 +589,7 @@ static int test__checkevent_pmu_events_mix(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned);
+ TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.pinned);
return 0;
}
@@ -1277,6 +1280,49 @@ static int test__pinned_group(struct evlist *evlist)
return 0;
}
+static int test__checkevent_exclusive_modifier(struct evlist *evlist)
+{
+ struct evsel *evsel = evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip);
+ TEST_ASSERT_VAL("wrong exclusive", evsel->core.attr.exclusive);
+
+ return test__checkevent_symbolic_name(evlist);
+}
+
+static int test__exclusive_group(struct evlist *evlist)
+{
+ struct evsel *evsel, *leader;
+
+ TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->core.nr_entries);
+
+ /* cycles - group leader */
+ evsel = leader = evlist__first(evlist);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
+ TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+ TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+ TEST_ASSERT_VAL("wrong exclusive", evsel->core.attr.exclusive);
+
+ /* cache-misses - can not be pinned, but will go on with the leader */
+ evsel = evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config);
+ TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive);
+
+ /* branch-misses - ditto */
+ evsel = evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_BRANCH_MISSES == evsel->core.attr.config);
+ TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive);
+
+ return 0;
+}
static int test__checkevent_breakpoint_len(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
@@ -1765,7 +1811,17 @@ static struct evlist_test test__events[] = {
.name = "cycles:k",
.check = test__sym_event_dc,
.id = 55,
- }
+ },
+ {
+ .name = "instructions:uep",
+ .check = test__checkevent_exclusive_modifier,
+ .id = 56,
+ },
+ {
+ .name = "{cycles,cache-misses,branch-misses}:e",
+ .check = test__exclusive_group,
+ .id = 57,
+ },
};
static struct evlist_test test__events_pmu[] = {
diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c
index cd7331aac3bd..7c1bde01cb50 100644
--- a/tools/perf/tests/parse-metric.c
+++ b/tools/perf/tests/parse-metric.c
@@ -11,8 +11,6 @@
#include "debug.h"
#include "expr.h"
#include "stat.h"
-#include <perf/cpumap.h>
-#include <perf/evlist.h>
static struct pmu_event pme_test[] = {
{
@@ -159,6 +157,7 @@ static int __compute_metric(const char *name, struct value *vals,
}
perf_evlist__set_maps(&evlist->core, cpus, NULL);
+ runtime_stat__init(&st);
/* Parse the metric into metric_events list. */
err = metricgroup__parse_groups_test(evlist, &map, name,
@@ -172,7 +171,6 @@ static int __compute_metric(const char *name, struct value *vals,
goto out;
/* Load the runtime stats with given numbers for events. */
- runtime_stat__init(&st);
load_runtime_stat(&st, evlist, vals);
/* And execute the metric */
diff --git a/tools/perf/tests/pe-file-parsing.c b/tools/perf/tests/pe-file-parsing.c
new file mode 100644
index 000000000000..58b90c42eb38
--- /dev/null
+++ b/tools/perf/tests/pe-file-parsing.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdbool.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <subcmd/exec-cmd.h>
+
+#include "debug.h"
+#include "util/build-id.h"
+#include "util/symbol.h"
+#include "util/dso.h"
+
+#include "tests.h"
+
+#ifdef HAVE_LIBBFD_SUPPORT
+
+static int run_dir(const char *d)
+{
+ char filename[PATH_MAX];
+ char debugfile[PATH_MAX];
+ struct build_id bid;
+ char debuglink[PATH_MAX];
+ char expect_build_id[] = {
+ 0x5a, 0x0f, 0xd8, 0x82, 0xb5, 0x30, 0x84, 0x22,
+ 0x4b, 0xa4, 0x7b, 0x62, 0x4c, 0x55, 0xa4, 0x69,
+ };
+ char expect_debuglink[PATH_MAX] = "pe-file.exe.debug";
+ struct dso *dso;
+ struct symbol *sym;
+ int ret;
+
+ scnprintf(filename, PATH_MAX, "%s/pe-file.exe", d);
+ ret = filename__read_build_id(filename, &bid);
+ TEST_ASSERT_VAL("Failed to read build_id",
+ ret == sizeof(expect_build_id));
+ TEST_ASSERT_VAL("Wrong build_id", !memcmp(bid.data, expect_build_id,
+ sizeof(expect_build_id)));
+
+ ret = filename__read_debuglink(filename, debuglink, PATH_MAX);
+ TEST_ASSERT_VAL("Failed to read debuglink", ret == 0);
+ TEST_ASSERT_VAL("Wrong debuglink",
+ !strcmp(debuglink, expect_debuglink));
+
+ scnprintf(debugfile, PATH_MAX, "%s/%s", d, debuglink);
+ ret = filename__read_build_id(debugfile, &bid);
+ TEST_ASSERT_VAL("Failed to read debug file build_id",
+ ret == sizeof(expect_build_id));
+ TEST_ASSERT_VAL("Wrong build_id", !memcmp(bid.data, expect_build_id,
+ sizeof(expect_build_id)));
+
+ dso = dso__new(filename);
+ TEST_ASSERT_VAL("Failed to get dso", dso);
+
+ ret = dso__load_bfd_symbols(dso, debugfile);
+ TEST_ASSERT_VAL("Failed to load symbols", ret == 0);
+
+ dso__sort_by_name(dso);
+ sym = dso__find_symbol_by_name(dso, "main");
+ TEST_ASSERT_VAL("Failed to find main", sym);
+ dso__delete(dso);
+
+ return TEST_OK;
+}
+
+int test__pe_file_parsing(struct test *test __maybe_unused,
+ int subtest __maybe_unused)
+{
+ struct stat st;
+ char path_dir[PATH_MAX];
+
+ /* First try development tree tests. */
+ if (!lstat("./tests", &st))
+ return run_dir("./tests");
+
+ /* Then installed path. */
+ snprintf(path_dir, PATH_MAX, "%s/tests", get_argv_exec_path());
+
+ if (!lstat(path_dir, &st))
+ return run_dir(path_dir);
+
+ return TEST_SKIP;
+}
+
+#else
+
+int test__pe_file_parsing(struct test *test __maybe_unused,
+ int subtest __maybe_unused)
+{
+ return TEST_SKIP;
+}
+
+#endif
diff --git a/tools/perf/tests/pe-file.c b/tools/perf/tests/pe-file.c
new file mode 100644
index 000000000000..eb3df5e9886f
--- /dev/null
+++ b/tools/perf/tests/pe-file.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// pe-file.exe and pe-file.exe.debug built with;
+// x86_64-w64-mingw32-gcc -o pe-file.exe pe-file.c
+// -Wl,--file-alignment,4096 -Wl,--build-id
+// x86_64-w64-mingw32-objcopy --only-keep-debug
+// --compress-debug-sections pe-file.exe pe-file.exe.debug
+// x86_64-w64-mingw32-objcopy --strip-debug
+// --add-gnu-debuglink=pe-file.exe.debug pe-file.exe
+
+int main(int argc, char const *argv[])
+{
+ return 0;
+}
diff --git a/tools/perf/tests/pe-file.exe b/tools/perf/tests/pe-file.exe
new file mode 100644
index 000000000000..838a46dae724
--- /dev/null
+++ b/tools/perf/tests/pe-file.exe
Binary files differ
diff --git a/tools/perf/tests/pe-file.exe.debug b/tools/perf/tests/pe-file.exe.debug
new file mode 100644
index 000000000000..287d6718d6c9
--- /dev/null
+++ b/tools/perf/tests/pe-file.exe.debug
Binary files differ
diff --git a/tools/perf/tests/python-use.c b/tools/perf/tests/python-use.c
index 40ab72149ce1..98c6d474aa6f 100644
--- a/tools/perf/tests/python-use.c
+++ b/tools/perf/tests/python-use.c
@@ -18,6 +18,7 @@ int test__python_use(struct test *test __maybe_unused, int subtest __maybe_unuse
PYTHONPATH, PYTHON, verbose > 0 ? "" : "2> /dev/null") < 0)
return -1;
+ pr_debug("python usage test: \"%s\"\n", cmd);
ret = system(cmd) ? -1 : 0;
free(cmd);
return ret;
diff --git a/tools/perf/tests/sdt.c b/tools/perf/tests/sdt.c
index 60f0e9ee04fb..ed76c693f65e 100644
--- a/tools/perf/tests/sdt.c
+++ b/tools/perf/tests/sdt.c
@@ -28,16 +28,16 @@ static int target_function(void)
static int build_id_cache__add_file(const char *filename)
{
char sbuild_id[SBUILD_ID_SIZE];
- u8 build_id[BUILD_ID_SIZE];
+ struct build_id bid;
int err;
- err = filename__read_build_id(filename, &build_id, sizeof(build_id));
+ err = filename__read_build_id(filename, &bid);
if (err < 0) {
pr_debug("Failed to read build id of %s\n", filename);
return err;
}
- build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
+ build_id__sprintf(&bid, sbuild_id);
err = build_id_cache__add_s(sbuild_id, filename, NULL, false, false);
if (err < 0)
pr_debug("Failed to add build id cache of %s\n", filename);
diff --git a/tools/perf/tests/shell/buildid.sh b/tools/perf/tests/shell/buildid.sh
new file mode 100755
index 000000000000..4861a20edee2
--- /dev/null
+++ b/tools/perf/tests/shell/buildid.sh
@@ -0,0 +1,101 @@
+#!/bin/sh
+# build id cache operations
+# SPDX-License-Identifier: GPL-2.0
+
+# skip if there's no readelf
+if ! [ -x "$(command -v readelf)" ]; then
+ echo "failed: no readelf, install binutils"
+ exit 2
+fi
+
+# skip if there's no compiler
+if ! [ -x "$(command -v cc)" ]; then
+ echo "failed: no compiler, install gcc"
+ exit 2
+fi
+
+ex_md5=$(mktemp /tmp/perf.ex.MD5.XXX)
+ex_sha1=$(mktemp /tmp/perf.ex.SHA1.XXX)
+
+echo 'int main(void) { return 0; }' | cc -Wl,--build-id=sha1 -o ${ex_sha1} -x c -
+echo 'int main(void) { return 0; }' | cc -Wl,--build-id=md5 -o ${ex_md5} -x c -
+
+echo "test binaries: ${ex_sha1} ${ex_md5}"
+
+check()
+{
+ id=`readelf -n ${1} 2>/dev/null | grep 'Build ID' | awk '{print $3}'`
+
+ echo "build id: ${id}"
+
+ link=${build_id_dir}/.build-id/${id:0:2}/${id:2}
+ echo "link: ${link}"
+
+ if [ ! -h $link ]; then
+ echo "failed: link ${link} does not exist"
+ exit 1
+ fi
+
+ file=${build_id_dir}/.build-id/${id:0:2}/`readlink ${link}`/elf
+ echo "file: ${file}"
+
+ if [ ! -x $file ]; then
+ echo "failed: file ${file} does not exist"
+ exit 1
+ fi
+
+ diff ${file} ${1}
+ if [ $? -ne 0 ]; then
+ echo "failed: ${file} do not match"
+ exit 1
+ fi
+
+ echo "OK for ${1}"
+}
+
+test_add()
+{
+ build_id_dir=$(mktemp -d /tmp/perf.debug.XXX)
+ perf="perf --buildid-dir ${build_id_dir}"
+
+ ${perf} buildid-cache -v -a ${1}
+ if [ $? -ne 0 ]; then
+ echo "failed: add ${1} to build id cache"
+ exit 1
+ fi
+
+ check ${1}
+
+ rm -rf ${build_id_dir}
+}
+
+test_record()
+{
+ data=$(mktemp /tmp/perf.data.XXX)
+ build_id_dir=$(mktemp -d /tmp/perf.debug.XXX)
+ perf="perf --buildid-dir ${build_id_dir}"
+
+ ${perf} record --buildid-all -o ${data} ${1}
+ if [ $? -ne 0 ]; then
+ echo "failed: record ${1}"
+ exit 1
+ fi
+
+ check ${1}
+
+ rm -rf ${build_id_dir}
+ rm -rf ${data}
+}
+
+# add binaries manual via perf buildid-cache -a
+test_add ${ex_sha1}
+test_add ${ex_md5}
+
+# add binaries via perf record post processing
+test_record ${ex_sha1}
+test_record ${ex_md5}
+
+# cleanup
+rm ${ex_sha1} ${ex_md5}
+
+exit ${err}
diff --git a/tools/perf/tests/shell/test_arm_coresight.sh b/tools/perf/tests/shell/test_arm_coresight.sh
new file mode 100755
index 000000000000..8d84fdbed6a6
--- /dev/null
+++ b/tools/perf/tests/shell/test_arm_coresight.sh
@@ -0,0 +1,183 @@
+#!/bin/sh
+# Check Arm CoreSight trace data recording and synthesized samples
+
+# Uses the 'perf record' to record trace data with Arm CoreSight sinks;
+# then verify if there have any branch samples and instruction samples
+# are generated by CoreSight with 'perf script' and 'perf report'
+# commands.
+
+# SPDX-License-Identifier: GPL-2.0
+# Leo Yan <leo.yan@linaro.org>, 2020
+
+perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
+file=$(mktemp /tmp/temporary_file.XXXXX)
+
+skip_if_no_cs_etm_event() {
+ perf list | grep -q 'cs_etm//' && return 0
+
+ # cs_etm event doesn't exist
+ return 2
+}
+
+skip_if_no_cs_etm_event || exit 2
+
+cleanup_files()
+{
+ rm -f ${perfdata}
+ rm -f ${file}
+}
+
+trap cleanup_files exit
+
+record_touch_file() {
+ echo "Recording trace (only user mode) with path: CPU$2 => $1"
+ rm -f $file
+ perf record -o ${perfdata} -e cs_etm/@$1/u --per-thread \
+ -- taskset -c $2 touch $file
+}
+
+perf_script_branch_samples() {
+ echo "Looking at perf.data file for dumping branch samples:"
+
+ # Below is an example of the branch samples dumping:
+ # touch 6512 1 branches:u: ffffb220824c strcmp+0xc (/lib/aarch64-linux-gnu/ld-2.27.so)
+ # touch 6512 1 branches:u: ffffb22082e0 strcmp+0xa0 (/lib/aarch64-linux-gnu/ld-2.27.so)
+ # touch 6512 1 branches:u: ffffb2208320 strcmp+0xe0 (/lib/aarch64-linux-gnu/ld-2.27.so)
+ perf script -F,-time -i ${perfdata} | \
+ egrep " +$1 +[0-9]+ .* +branches:([u|k]:)? +"
+}
+
+perf_report_branch_samples() {
+ echo "Looking at perf.data file for reporting branch samples:"
+
+ # Below is an example of the branch samples reporting:
+ # 73.04% 73.04% touch libc-2.27.so [.] _dl_addr
+ # 7.71% 7.71% touch libc-2.27.so [.] getenv
+ # 2.59% 2.59% touch ld-2.27.so [.] strcmp
+ perf report --stdio -i ${perfdata} | \
+ egrep " +[0-9]+\.[0-9]+% +[0-9]+\.[0-9]+% +$1 "
+}
+
+perf_report_instruction_samples() {
+ echo "Looking at perf.data file for instruction samples:"
+
+ # Below is an example of the instruction samples reporting:
+ # 68.12% touch libc-2.27.so [.] _dl_addr
+ # 5.80% touch libc-2.27.so [.] getenv
+ # 4.35% touch ld-2.27.so [.] _dl_fixup
+ perf report --itrace=i1000i --stdio -i ${perfdata} | \
+ egrep " +[0-9]+\.[0-9]+% +$1"
+}
+
+is_device_sink() {
+ # If the node of "enable_sink" is existed under the device path, this
+ # means the device is a sink device. Need to exclude 'tpiu' since it
+ # cannot support perf PMU.
+ echo "$1" | egrep -q -v "tpiu"
+
+ if [ $? -eq 0 -a -e "$1/enable_sink" ]; then
+
+ pmu_dev="/sys/bus/event_source/devices/cs_etm/sinks/$2"
+
+ # Warn if the device is not supported by PMU
+ if ! [ -f $pmu_dev ]; then
+ echo "PMU doesn't support $pmu_dev"
+ fi
+
+ return 0
+ fi
+
+ # Otherwise, it's not a sink device
+ return 1
+}
+
+arm_cs_iterate_devices() {
+ for dev in $1/connections/out\:*; do
+
+ # Skip testing if it's not a directory
+ ! [ -d $dev ] && continue;
+
+ # Read out its symbol link file name
+ path=`readlink -f $dev`
+
+ # Extract device name from path, e.g.
+ # path = '/sys/devices/platform/20010000.etf/tmc_etf0'
+ # `> device_name = 'tmc_etf0'
+ device_name=$(basename $path)
+
+ if is_device_sink $path $devce_name; then
+
+ record_touch_file $device_name $2 &&
+ perf_script_branch_samples touch &&
+ perf_report_branch_samples touch &&
+ perf_report_instruction_samples touch
+
+ err=$?
+
+ # Exit when find failure
+ [ $err != 0 ] && exit $err
+ fi
+
+ arm_cs_iterate_devices $dev $2
+ done
+}
+
+arm_cs_etm_traverse_path_test() {
+ # Iterate for every ETM device
+ for dev in /sys/bus/coresight/devices/etm*; do
+
+ # Find the ETM device belonging to which CPU
+ cpu=`cat $dev/cpu`
+
+ echo $dev
+ echo $cpu
+
+ # Use depth-first search (DFS) to iterate outputs
+ arm_cs_iterate_devices $dev $cpu
+ done
+}
+
+arm_cs_etm_system_wide_test() {
+ echo "Recording trace with system wide mode"
+ perf record -o ${perfdata} -e cs_etm// -a -- ls
+
+ perf_script_branch_samples perf &&
+ perf_report_branch_samples perf &&
+ perf_report_instruction_samples perf
+
+ err=$?
+
+ # Exit when find failure
+ [ $err != 0 ] && exit $err
+}
+
+arm_cs_etm_snapshot_test() {
+ echo "Recording trace with snapshot mode"
+ perf record -o ${perfdata} -e cs_etm// -S \
+ -- dd if=/dev/zero of=/dev/null &
+ PERFPID=$!
+
+ # Wait for perf program
+ sleep 1
+
+ # Send signal to snapshot trace data
+ kill -USR2 $PERFPID
+
+ # Stop perf program
+ kill $PERFPID
+ wait $PERFPID
+
+ perf_script_branch_samples dd &&
+ perf_report_branch_samples dd &&
+ perf_report_instruction_samples dd
+
+ err=$?
+
+ # Exit when find failure
+ [ $err != 0 ] && exit $err
+}
+
+arm_cs_etm_traverse_path_test
+arm_cs_etm_system_wide_test
+arm_cs_etm_snapshot_test
+exit 0
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 4447a516c689..c85a2c08e407 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -122,6 +122,8 @@ int test__pfm(struct test *test, int subtest);
const char *test__pfm_subtest_get_desc(int subtest);
int test__pfm_subtest_get_nr(void);
int test__parse_metric(struct test *test, int subtest);
+int test__pe_file_parsing(struct test *test, int subtest);
+int test__expand_cgroup_events(struct test *test, int subtest);
bool test__bp_signal_is_supported(void);
bool test__bp_account_is_supported(void);
diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c
index 862c8331dded..3c5e97b93dd5 100644
--- a/tools/perf/trace/beauty/mmap.c
+++ b/tools/perf/trace/beauty/mmap.c
@@ -1,40 +1,28 @@
// SPDX-License-Identifier: LGPL-2.1
-#include <uapi/linux/mman.h>
#include <linux/log2.h>
-static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
- struct syscall_arg *arg)
+#include "trace/beauty/generated/mmap_prot_array.c"
+static DEFINE_STRARRAY(mmap_prot, "PROT_");
+
+static size_t mmap__scnprintf_prot(unsigned long prot, char *bf, size_t size, bool show_prefix)
+{
+ return strarray__scnprintf_flags(&strarray__mmap_prot, bf, size, show_prefix, prot);
+}
+
+static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, struct syscall_arg *arg)
{
- const char *prot_prefix = "PROT_";
- int printed = 0, prot = arg->val;
- bool show_prefix = arg->show_string_prefix;
-
- if (prot == PROT_NONE)
- return scnprintf(bf, size, "%sNONE", show_prefix ? prot_prefix : "");
-#define P_MMAP_PROT(n) \
- if (prot & PROT_##n) { \
- printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prot_prefix :"", #n); \
- prot &= ~PROT_##n; \
- }
-
- P_MMAP_PROT(READ);
- P_MMAP_PROT(WRITE);
- P_MMAP_PROT(EXEC);
- P_MMAP_PROT(SEM);
- P_MMAP_PROT(GROWSDOWN);
- P_MMAP_PROT(GROWSUP);
-#undef P_MMAP_PROT
-
- if (prot)
- printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
-
- return printed;
+ unsigned long prot = arg->val;
+
+ if (prot == 0)
+ return scnprintf(bf, size, "%sNONE", arg->show_string_prefix ? strarray__mmap_prot.prefix : "");
+
+ return mmap__scnprintf_prot(prot, bf, size, arg->show_string_prefix);
}
#define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
#include "trace/beauty/generated/mmap_flags_array.c"
- static DEFINE_STRARRAY(mmap_flags, "MAP_");
+static DEFINE_STRARRAY(mmap_flags, "MAP_");
static size_t mmap__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix)
{
@@ -54,28 +42,22 @@ static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
#define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
-static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
- struct syscall_arg *arg)
-{
- const char *flags_prefix = "MREMAP_";
- bool show_prefix = arg->show_string_prefix;
- int printed = 0, flags = arg->val;
+#include "trace/beauty/generated/mremap_flags_array.c"
+static DEFINE_STRARRAY(mremap_flags, "MREMAP_");
-#define P_MREMAP_FLAG(n) \
- if (flags & MREMAP_##n) { \
- printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? flags_prefix : "", #n); \
- flags &= ~MREMAP_##n; \
- }
+static size_t mremap__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix)
+{
+ return strarray__scnprintf_flags(&strarray__mremap_flags, bf, size, show_prefix, flags);
+}
- P_MREMAP_FLAG(MAYMOVE);
- P_MREMAP_FLAG(FIXED);
- P_MREMAP_FLAG(DONTUNMAP);
-#undef P_MREMAP_FLAG
+static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size, struct syscall_arg *arg)
+{
+ unsigned long flags = arg->val;
- if (flags)
- printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+ if (!(flags & MREMAP_FIXED))
+ arg->mask |= (1 << 5); /* Mask 5th ('new_address') args, ignored */
- return printed;
+ return mremap__scnprintf_flags(flags, bf, size, arg->show_string_prefix);
}
#define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
diff --git a/tools/perf/trace/beauty/mmap_flags.sh b/tools/perf/trace/beauty/mmap_flags.sh
index 5f5eefcb3c74..39eb2595983b 100755
--- a/tools/perf/trace/beauty/mmap_flags.sh
+++ b/tools/perf/trace/beauty/mmap_flags.sh
@@ -21,20 +21,20 @@ printf "static const char *mmap_flags[] = {\n"
regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MAP_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
egrep -q $regex ${arch_mman} && \
(egrep $regex ${arch_mman} | \
- sed -r "s/$regex/\2 \1/g" | \
- xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n")
+ sed -r "s/$regex/\2 \1 \1 \1 \2/g" | \
+ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef MAP_%s\n#define MAP_%s %s\n#endif\n")
egrep -q $regex ${linux_mman} && \
(egrep $regex ${linux_mman} | \
egrep -vw 'MAP_(UNINITIALIZED|TYPE|SHARED_VALIDATE)' | \
- sed -r "s/$regex/\2 \1/g" | \
- xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n")
+ sed -r "s/$regex/\2 \1 \1 \1 \2/g" | \
+ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef MAP_%s\n#define MAP_%s %s\n#endif\n")
([ ! -f ${arch_mman} ] || egrep -q '#[[:space:]]*include[[:space:]]+<uapi/asm-generic/mman.*' ${arch_mman}) &&
(egrep $regex ${header_dir}/mman-common.h | \
egrep -vw 'MAP_(UNINITIALIZED|TYPE|SHARED_VALIDATE)' | \
- sed -r "s/$regex/\2 \1/g" | \
- xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n")
+ sed -r "s/$regex/\2 \1 \1 \1 \2/g" | \
+ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef MAP_%s\n#define MAP_%s %s\n#endif\n")
([ ! -f ${arch_mman} ] || egrep -q '#[[:space:]]*include[[:space:]]+<uapi/asm-generic/mman.h>.*' ${arch_mman}) &&
(egrep $regex ${header_dir}/mman.h | \
- sed -r "s/$regex/\2 \1/g" | \
- xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n")
+ sed -r "s/$regex/\2 \1 \1 \1 \2/g" | \
+ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef MAP_%s\n#define MAP_%s %s\n#endif\n")
printf "};\n"
diff --git a/tools/perf/trace/beauty/mmap_prot.sh b/tools/perf/trace/beauty/mmap_prot.sh
new file mode 100755
index 000000000000..28f638f8d216
--- /dev/null
+++ b/tools/perf/trace/beauty/mmap_prot.sh
@@ -0,0 +1,30 @@
+#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
+
+if [ $# -ne 2 ] ; then
+ [ $# -eq 1 ] && hostarch=$1 || hostarch=`uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/`
+ asm_header_dir=tools/include/uapi/asm-generic
+ arch_header_dir=tools/arch/${hostarch}/include/uapi/asm
+else
+ asm_header_dir=$1
+ arch_header_dir=$2
+fi
+
+common_mman=${asm_header_dir}/mman-common.h
+arch_mman=${arch_header_dir}/mman.h
+
+prefix="PROT"
+
+printf "static const char *mmap_prot[] = {\n"
+regex=`printf '^[[:space:]]*#[[:space:]]*define[[:space:]]+%s_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*' ${prefix}`
+([ ! -f ${arch_mman} ] || egrep -q '#[[:space:]]*include[[:space:]]+<uapi/asm-generic/mman.*' ${arch_mman}) &&
+(egrep $regex ${common_mman} | \
+ egrep -vw PROT_NONE | \
+ sed -r "s/$regex/\2 \1 \1 \1 \2/g" | \
+ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef ${prefix}_%s\n#define ${prefix}_%s %s\n#endif\n")
+[ -f ${arch_mman} ] && egrep -q $regex ${arch_mman} &&
+(egrep $regex ${arch_mman} | \
+ egrep -vw PROT_NONE | \
+ sed -r "s/$regex/\2 \1 \1 \1 \2/g" | \
+ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef ${prefix}_%s\n#define ${prefix}_%s %s\n#endif\n")
+printf "};\n"
diff --git a/tools/perf/trace/beauty/mremap_flags.sh b/tools/perf/trace/beauty/mremap_flags.sh
new file mode 100755
index 000000000000..d58182300bb1
--- /dev/null
+++ b/tools/perf/trace/beauty/mremap_flags.sh
@@ -0,0 +1,18 @@
+#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
+
+if [ $# -ne 1 ] ; then
+ linux_header_dir=tools/include/uapi/linux
+else
+ linux_header_dir=$1
+fi
+
+linux_mman=${linux_header_dir}/mman.h
+
+printf "static const char *mremap_flags[] = {\n"
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MREMAP_([[:alnum:]_]+)[[:space:]]+((0x)?[[:xdigit:]]+)[[:space:]]*.*'
+egrep -q $regex ${linux_mman} && \
+(egrep $regex ${linux_mman} | \
+ sed -r "s/$regex/\2 \1 \1 \1 \2/g" | \
+ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef MREMAP_%s\n#define MREMAP_%s %s\n#endif\n")
+printf "};\n"
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index cd5e41960e64..e2563d0154eb 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -101,6 +101,8 @@ perf-y += call-path.o
perf-y += rwsem.o
perf-y += thread-stack.o
perf-y += spark.o
+perf-y += topdown.o
+perf-y += stream.o
perf-$(CONFIG_AUXTRACE) += auxtrace.o
perf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
perf-$(CONFIG_AUXTRACE) += intel-pt.o
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 0a1fcf787538..6c8575e182ed 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1578,8 +1578,7 @@ int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, s
char *build_id_msg = NULL;
if (dso->has_build_id) {
- build_id__sprintf(dso->build_id,
- sizeof(dso->build_id), bf + 15);
+ build_id__sprintf(&dso->bid, bf + 15);
build_id_msg = bf;
}
scnprintf(buf, buflen,
@@ -3127,6 +3126,8 @@ static int annotation__config(const char *var, const char *value, void *data)
value);
} else if (!strcmp(var, "annotate.use_offset")) {
opt->use_offset = perf_config_bool("use_offset", value);
+ } else if (!strcmp(var, "annotate.disassembler_style")) {
+ opt->disassembler_style = value;
} else {
pr_debug("%s variable unknown, ignoring...", var);
}
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 31207b6e2066..8763772f1095 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -37,6 +37,7 @@
#include <linux/ctype.h>
#include <linux/zalloc.h>
+#include <asm/bug.h>
static bool no_buildid_cache;
@@ -95,13 +96,13 @@ struct perf_tool build_id__mark_dso_hit_ops = {
.ordered_events = true,
};
-int build_id__sprintf(const u8 *build_id, int len, char *bf)
+int build_id__sprintf(const struct build_id *build_id, char *bf)
{
char *bid = bf;
- const u8 *raw = build_id;
- int i;
+ const u8 *raw = build_id->data;
+ size_t i;
- for (i = 0; i < len; ++i) {
+ for (i = 0; i < build_id->size; ++i) {
sprintf(bid, "%02x", *raw);
++raw;
bid += 2;
@@ -113,7 +114,7 @@ int build_id__sprintf(const u8 *build_id, int len, char *bf)
int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id)
{
char notes[PATH_MAX];
- u8 build_id[BUILD_ID_SIZE];
+ struct build_id bid;
int ret;
if (!root_dir)
@@ -121,25 +122,23 @@ int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id)
scnprintf(notes, sizeof(notes), "%s/sys/kernel/notes", root_dir);
- ret = sysfs__read_build_id(notes, build_id, sizeof(build_id));
+ ret = sysfs__read_build_id(notes, &bid);
if (ret < 0)
return ret;
- return build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
+ return build_id__sprintf(&bid, sbuild_id);
}
int filename__sprintf_build_id(const char *pathname, char *sbuild_id)
{
- u8 build_id[BUILD_ID_SIZE];
+ struct build_id bid;
int ret;
- ret = filename__read_build_id(pathname, build_id, sizeof(build_id));
+ ret = filename__read_build_id(pathname, &bid);
if (ret < 0)
return ret;
- else if (ret != sizeof(build_id))
- return -EINVAL;
- return build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
+ return build_id__sprintf(&bid, sbuild_id);
}
/* asnprintf consolidates asprintf and snprintf */
@@ -272,7 +271,7 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size,
if (!dso->has_build_id)
return NULL;
- build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
+ build_id__sprintf(&dso->bid, sbuild_id);
linkname = build_id_cache__linkname(sbuild_id, NULL, 0);
if (!linkname)
return NULL;
@@ -297,7 +296,7 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size,
continue; \
else
-static int write_buildid(const char *name, size_t name_len, u8 *build_id,
+static int write_buildid(const char *name, size_t name_len, struct build_id *bid,
pid_t pid, u16 misc, struct feat_fd *fd)
{
int err;
@@ -308,7 +307,9 @@ static int write_buildid(const char *name, size_t name_len, u8 *build_id,
len = PERF_ALIGN(len, NAME_ALIGN);
memset(&b, 0, sizeof(b));
- memcpy(&b.build_id, build_id, BUILD_ID_SIZE);
+ memcpy(&b.data, bid->data, bid->size);
+ b.size = (u8) bid->size;
+ misc |= PERF_RECORD_MISC_BUILD_ID_SIZE;
b.pid = pid;
b.header.misc = misc;
b.header.size = sizeof(b) + len;
@@ -355,7 +356,7 @@ static int machine__write_buildid_table(struct machine *machine,
in_kernel = pos->kernel ||
is_kernel_module(name,
PERF_RECORD_MISC_CPUMODE_UNKNOWN);
- err = write_buildid(name, name_len, pos->build_id, machine->pid,
+ err = write_buildid(name, name_len, &pos->bid, machine->pid,
in_kernel ? kmisc : umisc, fd);
if (err)
break;
@@ -769,13 +770,13 @@ out_free:
return err;
}
-static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size,
+static int build_id_cache__add_b(const struct build_id *bid,
const char *name, struct nsinfo *nsi,
bool is_kallsyms, bool is_vdso)
{
char sbuild_id[SBUILD_ID_SIZE];
- build_id__sprintf(build_id, build_id_size, sbuild_id);
+ build_id__sprintf(bid, sbuild_id);
return build_id_cache__add_s(sbuild_id, name, nsi, is_kallsyms,
is_vdso);
@@ -841,8 +842,8 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine)
is_kallsyms = true;
name = machine->mmap_name;
}
- return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), name,
- dso->nsinfo, is_kallsyms, is_vdso);
+ return build_id_cache__add_b(&dso->bid, name, dso->nsinfo,
+ is_kallsyms, is_vdso);
}
static int __dsos__cache_build_ids(struct list_head *head,
@@ -902,3 +903,10 @@ bool perf_session__read_build_ids(struct perf_session *session, bool with_hits)
return ret;
}
+
+void build_id__init(struct build_id *bid, const u8 *data, size_t size)
+{
+ WARN_ON(size > BUILD_ID_SIZE);
+ memcpy(bid->data, data, size);
+ bid->size = size;
+}
diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h
index aad419bb165c..f293f99d5dba 100644
--- a/tools/perf/util/build-id.h
+++ b/tools/perf/util/build-id.h
@@ -8,13 +8,19 @@
#include "tool.h"
#include <linux/types.h>
+struct build_id {
+ u8 data[BUILD_ID_SIZE];
+ size_t size;
+};
+
struct nsinfo;
extern struct perf_tool build_id__mark_dso_hit_ops;
struct dso;
struct feat_fd;
-int build_id__sprintf(const u8 *build_id, int len, char *bf);
+void build_id__init(struct build_id *bid, const u8 *data, size_t size);
+int build_id__sprintf(const struct build_id *build_id, char *bf);
int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id);
int filename__sprintf_build_id(const char *pathname, char *sbuild_id);
char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf,
@@ -29,6 +35,10 @@ int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event,
int dsos__hit_all(struct perf_session *session);
+int perf_event__inject_buildid(struct perf_tool *tool, union perf_event *event,
+ struct perf_sample *sample, struct evsel *evsel,
+ struct machine *machine);
+
bool perf_session__read_build_ids(struct perf_session *session, bool with_hits);
int perf_session__write_buildid_table(struct perf_session *session,
struct feat_fd *fd);
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 2775b752f2fa..1b60985690bb 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -1613,3 +1613,102 @@ void callchain_param_setup(u64 sample_type)
callchain_param.record_mode = CALLCHAIN_FP;
}
}
+
+static bool chain_match(struct callchain_list *base_chain,
+ struct callchain_list *pair_chain)
+{
+ enum match_result match;
+
+ match = match_chain_strings(base_chain->srcline,
+ pair_chain->srcline);
+ if (match != MATCH_ERROR)
+ return match == MATCH_EQ;
+
+ match = match_chain_dso_addresses(base_chain->ms.map,
+ base_chain->ip,
+ pair_chain->ms.map,
+ pair_chain->ip);
+
+ return match == MATCH_EQ;
+}
+
+bool callchain_cnode_matched(struct callchain_node *base_cnode,
+ struct callchain_node *pair_cnode)
+{
+ struct callchain_list *base_chain, *pair_chain;
+ bool match = false;
+
+ pair_chain = list_first_entry(&pair_cnode->val,
+ struct callchain_list,
+ list);
+
+ list_for_each_entry(base_chain, &base_cnode->val, list) {
+ if (&pair_chain->list == &pair_cnode->val)
+ return false;
+
+ if (!base_chain->srcline || !pair_chain->srcline) {
+ pair_chain = list_next_entry(pair_chain, list);
+ continue;
+ }
+
+ match = chain_match(base_chain, pair_chain);
+ if (!match)
+ return false;
+
+ pair_chain = list_next_entry(pair_chain, list);
+ }
+
+ /*
+ * Say chain1 is ABC, chain2 is ABCD, we consider they are
+ * not fully matched.
+ */
+ if (pair_chain && (&pair_chain->list != &pair_cnode->val))
+ return false;
+
+ return match;
+}
+
+static u64 count_callchain_hits(struct hist_entry *he)
+{
+ struct rb_root *root = &he->sorted_chain;
+ struct rb_node *rb_node = rb_first(root);
+ struct callchain_node *node;
+ u64 chain_hits = 0;
+
+ while (rb_node) {
+ node = rb_entry(rb_node, struct callchain_node, rb_node);
+ chain_hits += node->hit;
+ rb_node = rb_next(rb_node);
+ }
+
+ return chain_hits;
+}
+
+u64 callchain_total_hits(struct hists *hists)
+{
+ struct rb_node *next = rb_first_cached(&hists->entries);
+ u64 chain_hits = 0;
+
+ while (next) {
+ struct hist_entry *he = rb_entry(next, struct hist_entry,
+ rb_node);
+
+ chain_hits += count_callchain_hits(he);
+ next = rb_next(&he->rb_node);
+ }
+
+ return chain_hits;
+}
+
+s64 callchain_avg_cycles(struct callchain_node *cnode)
+{
+ struct callchain_list *chain;
+ s64 cycles = 0;
+
+ list_for_each_entry(chain, &cnode->val, list) {
+ if (chain->srcline && chain->branch_count)
+ cycles += chain->cycles_count / chain->branch_count;
+ }
+
+ return cycles;
+}
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index fe36a9e5ccd1..5824134f983b 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -13,6 +13,7 @@ struct ip_callchain;
struct map;
struct perf_sample;
struct thread;
+struct hists;
#define HELP_PAD "\t\t\t\t"
@@ -298,4 +299,12 @@ int callchain_branch_counts(struct callchain_root *root,
u64 *abort_count, u64 *cycles_count);
void callchain_param_setup(u64 sample_type);
+
+bool callchain_cnode_matched(struct callchain_node *base_cnode,
+ struct callchain_node *pair_cnode);
+
+u64 callchain_total_hits(struct hists *hists);
+
+s64 callchain_avg_cycles(struct callchain_node *cnode);
+
#endif /* __PERF_CALLCHAIN_H */
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index 050dea9f1e88..b81324a13a2b 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -3,6 +3,9 @@
#include "evsel.h"
#include "cgroup.h"
#include "evlist.h"
+#include "rblist.h"
+#include "metricgroup.h"
+#include "stat.h"
#include <linux/zalloc.h>
#include <sys/types.h>
#include <sys/stat.h>
@@ -48,7 +51,7 @@ static struct cgroup *evlist__find_cgroup(struct evlist *evlist, const char *str
return NULL;
}
-static struct cgroup *cgroup__new(const char *name)
+static struct cgroup *cgroup__new(const char *name, bool do_open)
{
struct cgroup *cgroup = zalloc(sizeof(*cgroup));
@@ -58,9 +61,14 @@ static struct cgroup *cgroup__new(const char *name)
cgroup->name = strdup(name);
if (!cgroup->name)
goto out_err;
- cgroup->fd = open_cgroup(name);
- if (cgroup->fd == -1)
- goto out_free_name;
+
+ if (do_open) {
+ cgroup->fd = open_cgroup(name);
+ if (cgroup->fd == -1)
+ goto out_free_name;
+ } else {
+ cgroup->fd = -1;
+ }
}
return cgroup;
@@ -76,7 +84,7 @@ struct cgroup *evlist__findnew_cgroup(struct evlist *evlist, const char *name)
{
struct cgroup *cgroup = evlist__find_cgroup(evlist, name);
- return cgroup ?: cgroup__new(name);
+ return cgroup ?: cgroup__new(name, true);
}
static int add_cgroup(struct evlist *evlist, const char *str)
@@ -193,6 +201,103 @@ int parse_cgroups(const struct option *opt, const char *str,
return 0;
}
+int evlist__expand_cgroup(struct evlist *evlist, const char *str,
+ struct rblist *metric_events, bool open_cgroup)
+{
+ struct evlist *orig_list, *tmp_list;
+ struct evsel *pos, *evsel, *leader;
+ struct rblist orig_metric_events;
+ struct cgroup *cgrp = NULL;
+ const char *p, *e, *eos = str + strlen(str);
+ int ret = -1;
+
+ if (evlist->core.nr_entries == 0) {
+ fprintf(stderr, "must define events before cgroups\n");
+ return -EINVAL;
+ }
+
+ orig_list = evlist__new();
+ tmp_list = evlist__new();
+ if (orig_list == NULL || tmp_list == NULL) {
+ fprintf(stderr, "memory allocation failed\n");
+ return -ENOMEM;
+ }
+
+ /* save original events and init evlist */
+ perf_evlist__splice_list_tail(orig_list, &evlist->core.entries);
+ evlist->core.nr_entries = 0;
+
+ if (metric_events) {
+ orig_metric_events = *metric_events;
+ rblist__init(metric_events);
+ } else {
+ rblist__init(&orig_metric_events);
+ }
+
+ for (;;) {
+ p = strchr(str, ',');
+ e = p ? p : eos;
+
+ /* allow empty cgroups, i.e., skip */
+ if (e - str) {
+ /* termination added */
+ char *name = strndup(str, e - str);
+ if (!name)
+ goto out_err;
+
+ cgrp = cgroup__new(name, open_cgroup);
+ free(name);
+ if (cgrp == NULL)
+ goto out_err;
+ } else {
+ cgrp = NULL;
+ }
+
+ leader = NULL;
+ evlist__for_each_entry(orig_list, pos) {
+ evsel = evsel__clone(pos);
+ if (evsel == NULL)
+ goto out_err;
+
+ cgroup__put(evsel->cgrp);
+ evsel->cgrp = cgroup__get(cgrp);
+
+ if (evsel__is_group_leader(pos))
+ leader = evsel;
+ evsel->leader = leader;
+
+ evlist__add(tmp_list, evsel);
+ }
+ /* cgroup__new() has a refcount, release it here */
+ cgroup__put(cgrp);
+ nr_cgroups++;
+
+ if (metric_events) {
+ perf_stat__collect_metric_expr(tmp_list);
+ if (metricgroup__copy_metric_events(tmp_list, cgrp,
+ metric_events,
+ &orig_metric_events) < 0)
+ break;
+ }
+
+ perf_evlist__splice_list_tail(evlist, &tmp_list->core.entries);
+ tmp_list->core.nr_entries = 0;
+
+ if (!p) {
+ ret = 0;
+ break;
+ }
+ str = p+1;
+ }
+
+out_err:
+ evlist__delete(orig_list);
+ evlist__delete(tmp_list);
+ rblist__exit(&orig_metric_events);
+
+ return ret;
+}
+
static struct cgroup *__cgroup__findnew(struct rb_root *root, uint64_t id,
bool create, const char *path)
{
diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h
index e98d5975fe55..162906f3412a 100644
--- a/tools/perf/util/cgroup.h
+++ b/tools/perf/util/cgroup.h
@@ -22,8 +22,11 @@ struct cgroup *cgroup__get(struct cgroup *cgroup);
void cgroup__put(struct cgroup *cgroup);
struct evlist;
+struct rblist;
struct cgroup *evlist__findnew_cgroup(struct evlist *evlist, const char *name);
+int evlist__expand_cgroup(struct evlist *evlist, const char *cgroups,
+ struct rblist *metric_events, bool open_cgroup);
void evlist__set_default_cgroup(struct evlist *evlist, struct cgroup *cgroup);
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 20be0504fb95..6969f82843ee 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -489,7 +489,7 @@ int perf_default_config(const char *var, const char *value,
return 0;
}
-static int perf_config_from_file(config_fn_t fn, const char *filename, void *data)
+int perf_config_from_file(config_fn_t fn, const char *filename, void *data)
{
int ret;
FILE *f = fopen(filename, "r");
diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h
index c10b66dde2f3..8c881e3a3ec3 100644
--- a/tools/perf/util/config.h
+++ b/tools/perf/util/config.h
@@ -26,6 +26,8 @@ struct perf_config_set {
extern const char *config_exclusive_filename;
typedef int (*config_fn_t)(const char *, const char *, void *);
+
+int perf_config_from_file(config_fn_t fn, const char *filename, void *data);
int perf_default_config(const char *, const char *, void *);
int perf_config(config_fn_t fn, void *);
int perf_config_int(int *dest, const char *, const char *);
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 5a3b4755f0b3..55c11e854fe4 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -172,9 +172,7 @@ int dso__read_binary_type_filename(const struct dso *dso,
break;
}
- build_id__sprintf(dso->build_id,
- sizeof(dso->build_id),
- build_id_hex);
+ build_id__sprintf(&dso->bid, build_id_hex);
len = __symbol__join_symfs(filename, size, "/usr/lib/debug/.build-id/");
snprintf(filename + len, size - len, "%.2s/%s.debug",
build_id_hex, build_id_hex + 2);
@@ -1328,15 +1326,16 @@ void dso__put(struct dso *dso)
dso__delete(dso);
}
-void dso__set_build_id(struct dso *dso, void *build_id)
+void dso__set_build_id(struct dso *dso, struct build_id *bid)
{
- memcpy(dso->build_id, build_id, sizeof(dso->build_id));
+ dso->bid = *bid;
dso->has_build_id = 1;
}
-bool dso__build_id_equal(const struct dso *dso, u8 *build_id)
+bool dso__build_id_equal(const struct dso *dso, struct build_id *bid)
{
- return memcmp(dso->build_id, build_id, sizeof(dso->build_id)) == 0;
+ return dso->bid.size == bid->size &&
+ memcmp(dso->bid.data, bid->data, dso->bid.size) == 0;
}
void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine)
@@ -1346,8 +1345,7 @@ void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine)
if (machine__is_default_guest(machine))
return;
sprintf(path, "%s/sys/kernel/notes", machine->root_dir);
- if (sysfs__read_build_id(path, dso->build_id,
- sizeof(dso->build_id)) == 0)
+ if (sysfs__read_build_id(path, &dso->bid) == 0)
dso->has_build_id = true;
}
@@ -1365,18 +1363,17 @@ int dso__kernel_module_get_build_id(struct dso *dso,
"%s/sys/module/%.*s/notes/.note.gnu.build-id",
root_dir, (int)strlen(name) - 1, name);
- if (sysfs__read_build_id(filename, dso->build_id,
- sizeof(dso->build_id)) == 0)
+ if (sysfs__read_build_id(filename, &dso->bid) == 0)
dso->has_build_id = true;
return 0;
}
-size_t dso__fprintf_buildid(struct dso *dso, FILE *fp)
+static size_t dso__fprintf_buildid(struct dso *dso, FILE *fp)
{
char sbuild_id[SBUILD_ID_SIZE];
- build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
+ build_id__sprintf(&dso->bid, sbuild_id);
return fprintf(fp, "%s", sbuild_id);
}
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 8ad17f395a19..d8cb4f5680a4 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -176,7 +176,7 @@ struct dso {
bool sorted_by_name;
bool loaded;
u8 rel;
- u8 build_id[BUILD_ID_SIZE];
+ struct build_id bid;
u64 text_offset;
const char *short_name;
const char *long_name;
@@ -260,8 +260,8 @@ bool dso__sorted_by_name(const struct dso *dso);
void dso__set_sorted_by_name(struct dso *dso);
void dso__sort_by_name(struct dso *dso);
-void dso__set_build_id(struct dso *dso, void *build_id);
-bool dso__build_id_equal(const struct dso *dso, u8 *build_id);
+void dso__set_build_id(struct dso *dso, struct build_id *bid);
+bool dso__build_id_equal(const struct dso *dso, struct build_id *bid);
void dso__read_running_kernel_build_id(struct dso *dso,
struct machine *machine);
int dso__kernel_module_get_build_id(struct dso *dso, const char *root_dir);
@@ -362,7 +362,6 @@ struct dso *machine__findnew_kernel(struct machine *machine, const char *name,
void dso__reset_find_symbol_cache(struct dso *dso);
-size_t dso__fprintf_buildid(struct dso *dso, FILE *fp);
size_t dso__fprintf_symbols_by_name(struct dso *dso, FILE *fp);
size_t dso__fprintf(struct dso *dso, FILE *fp);
diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c
index 939471731ea6..183a81d5b2f9 100644
--- a/tools/perf/util/dsos.c
+++ b/tools/perf/util/dsos.c
@@ -73,8 +73,7 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits)
continue;
}
nsinfo__mountns_enter(pos->nsinfo, &nsc);
- if (filename__read_build_id(pos->long_name, pos->build_id,
- sizeof(pos->build_id)) > 0) {
+ if (filename__read_build_id(pos->long_name, &pos->bid) > 0) {
have_build_id = true;
pos->has_build_id = true;
}
@@ -288,10 +287,12 @@ size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
size_t ret = 0;
list_for_each_entry(pos, head, node) {
+ char sbuild_id[SBUILD_ID_SIZE];
+
if (skip && skip(pos, parm))
continue;
- ret += dso__fprintf_buildid(pos, fp);
- ret += fprintf(fp, " %s\n", pos->long_name);
+ build_id__sprintf(&pos->bid, sbuild_id);
+ ret += fprintf(fp, "%-40s %s\n", sbuild_id, pos->long_name);
}
return ret;
}
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 317a26571845..05616d4138a9 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -398,7 +398,7 @@ size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp)
if (event->header.type == PERF_RECORD_SWITCH)
return fprintf(fp, " %s\n", in_out);
- return fprintf(fp, " %s %s pid/tid: %5u/%-5u\n",
+ return fprintf(fp, " %s %s pid/tid: %5d/%-5d\n",
in_out, out ? "next" : "prev",
event->context_switch.next_prev_pid,
event->context_switch.next_prev_tid);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index c0768c61eb43..8bdf3d2c907c 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1732,6 +1732,91 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list,
return leader;
}
+static int evlist__parse_control_fifo(const char *str, int *ctl_fd, int *ctl_fd_ack, bool *ctl_fd_close)
+{
+ char *s, *p;
+ int ret = 0, fd;
+
+ if (strncmp(str, "fifo:", 5))
+ return -EINVAL;
+
+ str += 5;
+ if (!*str || *str == ',')
+ return -EINVAL;
+
+ s = strdup(str);
+ if (!s)
+ return -ENOMEM;
+
+ p = strchr(s, ',');
+ if (p)
+ *p = '\0';
+
+ /*
+ * O_RDWR avoids POLLHUPs which is necessary to allow the other
+ * end of a FIFO to be repeatedly opened and closed.
+ */
+ fd = open(s, O_RDWR | O_NONBLOCK | O_CLOEXEC);
+ if (fd < 0) {
+ pr_err("Failed to open '%s'\n", s);
+ ret = -errno;
+ goto out_free;
+ }
+ *ctl_fd = fd;
+ *ctl_fd_close = true;
+
+ if (p && *++p) {
+ /* O_RDWR | O_NONBLOCK means the other end need not be open */
+ fd = open(p, O_RDWR | O_NONBLOCK | O_CLOEXEC);
+ if (fd < 0) {
+ pr_err("Failed to open '%s'\n", p);
+ ret = -errno;
+ goto out_free;
+ }
+ *ctl_fd_ack = fd;
+ }
+
+out_free:
+ free(s);
+ return ret;
+}
+
+int evlist__parse_control(const char *str, int *ctl_fd, int *ctl_fd_ack, bool *ctl_fd_close)
+{
+ char *comma = NULL, *endptr = NULL;
+
+ *ctl_fd_close = false;
+
+ if (strncmp(str, "fd:", 3))
+ return evlist__parse_control_fifo(str, ctl_fd, ctl_fd_ack, ctl_fd_close);
+
+ *ctl_fd = strtoul(&str[3], &endptr, 0);
+ if (endptr == &str[3])
+ return -EINVAL;
+
+ comma = strchr(str, ',');
+ if (comma) {
+ if (endptr != comma)
+ return -EINVAL;
+
+ *ctl_fd_ack = strtoul(comma + 1, &endptr, 0);
+ if (endptr == comma + 1 || *endptr != '\0')
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+void evlist__close_control(int ctl_fd, int ctl_fd_ack, bool *ctl_fd_close)
+{
+ if (*ctl_fd_close) {
+ *ctl_fd_close = false;
+ close(ctl_fd);
+ if (ctl_fd_ack >= 0)
+ close(ctl_fd_ack);
+ }
+}
+
int evlist__initialize_ctlfd(struct evlist *evlist, int fd, int ack)
{
if (fd == -1) {
@@ -1783,6 +1868,7 @@ static int evlist__ctlfd_recv(struct evlist *evlist, enum evlist_ctl_cmd *cmd,
char c;
size_t bytes_read = 0;
+ *cmd = EVLIST_CTL_CMD_UNSUPPORTED;
memset(cmd_data, 0, data_size);
data_size--;
@@ -1794,30 +1880,39 @@ static int evlist__ctlfd_recv(struct evlist *evlist, enum evlist_ctl_cmd *cmd,
cmd_data[bytes_read++] = c;
if (bytes_read == data_size)
break;
- } else {
- if (err == -1)
+ continue;
+ } else if (err == -1) {
+ if (errno == EINTR)
+ continue;
+ if (errno == EAGAIN || errno == EWOULDBLOCK)
+ err = 0;
+ else
pr_err("Failed to read from ctlfd %d: %m\n", evlist->ctl_fd.fd);
- break;
}
+ break;
} while (1);
pr_debug("Message from ctl_fd: \"%s%s\"\n", cmd_data,
bytes_read == data_size ? "" : c == '\n' ? "\\n" : "\\0");
- if (err > 0) {
+ if (bytes_read > 0) {
if (!strncmp(cmd_data, EVLIST_CTL_CMD_ENABLE_TAG,
(sizeof(EVLIST_CTL_CMD_ENABLE_TAG)-1))) {
*cmd = EVLIST_CTL_CMD_ENABLE;
} else if (!strncmp(cmd_data, EVLIST_CTL_CMD_DISABLE_TAG,
(sizeof(EVLIST_CTL_CMD_DISABLE_TAG)-1))) {
*cmd = EVLIST_CTL_CMD_DISABLE;
+ } else if (!strncmp(cmd_data, EVLIST_CTL_CMD_SNAPSHOT_TAG,
+ (sizeof(EVLIST_CTL_CMD_SNAPSHOT_TAG)-1))) {
+ *cmd = EVLIST_CTL_CMD_SNAPSHOT;
+ pr_debug("is snapshot\n");
}
}
- return err;
+ return bytes_read ? (int)bytes_read : err;
}
-static int evlist__ctlfd_ack(struct evlist *evlist)
+int evlist__ctlfd_ack(struct evlist *evlist)
{
int err;
@@ -1853,13 +1948,16 @@ int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd)
case EVLIST_CTL_CMD_DISABLE:
evlist__disable(evlist);
break;
+ case EVLIST_CTL_CMD_SNAPSHOT:
+ break;
case EVLIST_CTL_CMD_ACK:
case EVLIST_CTL_CMD_UNSUPPORTED:
default:
pr_debug("ctlfd: unsupported %d\n", *cmd);
break;
}
- if (!(*cmd == EVLIST_CTL_CMD_ACK || *cmd == EVLIST_CTL_CMD_UNSUPPORTED))
+ if (!(*cmd == EVLIST_CTL_CMD_ACK || *cmd == EVLIST_CTL_CMD_UNSUPPORTED ||
+ *cmd == EVLIST_CTL_CMD_SNAPSHOT))
evlist__ctlfd_ack(evlist);
}
}
@@ -1871,3 +1969,14 @@ int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd)
return err;
}
+
+struct evsel *evlist__find_evsel(struct evlist *evlist, int idx)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->idx == idx)
+ return evsel;
+ }
+ return NULL;
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index c73f7f7f120b..e1a450322bc5 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -363,6 +363,7 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evlist,
#define EVLIST_CTL_CMD_ENABLE_TAG "enable"
#define EVLIST_CTL_CMD_DISABLE_TAG "disable"
#define EVLIST_CTL_CMD_ACK_TAG "ack\n"
+#define EVLIST_CTL_CMD_SNAPSHOT_TAG "snapshot"
#define EVLIST_CTL_CMD_MAX_LEN 64
@@ -370,15 +371,20 @@ enum evlist_ctl_cmd {
EVLIST_CTL_CMD_UNSUPPORTED = 0,
EVLIST_CTL_CMD_ENABLE,
EVLIST_CTL_CMD_DISABLE,
- EVLIST_CTL_CMD_ACK
+ EVLIST_CTL_CMD_ACK,
+ EVLIST_CTL_CMD_SNAPSHOT,
};
+int evlist__parse_control(const char *str, int *ctl_fd, int *ctl_fd_ack, bool *ctl_fd_close);
+void evlist__close_control(int ctl_fd, int ctl_fd_ack, bool *ctl_fd_close);
int evlist__initialize_ctlfd(struct evlist *evlist, int ctl_fd, int ctl_fd_ack);
int evlist__finalize_ctlfd(struct evlist *evlist);
bool evlist__ctlfd_initialized(struct evlist *evlist);
int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd);
+int evlist__ctlfd_ack(struct evlist *evlist);
#define EVLIST_ENABLED_MSG "Events enabled\n"
#define EVLIST_DISABLED_MSG "Events disabled\n"
+struct evsel *evlist__find_evsel(struct evlist *evlist, int idx);
#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 459b51e90063..1cad6051d8b0 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -331,6 +331,110 @@ error_free:
goto out;
}
+static int evsel__copy_config_terms(struct evsel *dst, struct evsel *src)
+{
+ struct evsel_config_term *pos, *tmp;
+
+ list_for_each_entry(pos, &src->config_terms, list) {
+ tmp = malloc(sizeof(*tmp));
+ if (tmp == NULL)
+ return -ENOMEM;
+
+ *tmp = *pos;
+ if (tmp->free_str) {
+ tmp->val.str = strdup(pos->val.str);
+ if (tmp->val.str == NULL) {
+ free(tmp);
+ return -ENOMEM;
+ }
+ }
+ list_add_tail(&tmp->list, &dst->config_terms);
+ }
+ return 0;
+}
+
+/**
+ * evsel__clone - create a new evsel copied from @orig
+ * @orig: original evsel
+ *
+ * The assumption is that @orig is not configured nor opened yet.
+ * So we only care about the attributes that can be set while it's parsed.
+ */
+struct evsel *evsel__clone(struct evsel *orig)
+{
+ struct evsel *evsel;
+
+ BUG_ON(orig->core.fd);
+ BUG_ON(orig->counts);
+ BUG_ON(orig->priv);
+ BUG_ON(orig->per_pkg_mask);
+
+ /* cannot handle BPF objects for now */
+ if (orig->bpf_obj)
+ return NULL;
+
+ evsel = evsel__new(&orig->core.attr);
+ if (evsel == NULL)
+ return NULL;
+
+ evsel->core.cpus = perf_cpu_map__get(orig->core.cpus);
+ evsel->core.own_cpus = perf_cpu_map__get(orig->core.own_cpus);
+ evsel->core.threads = perf_thread_map__get(orig->core.threads);
+ evsel->core.nr_members = orig->core.nr_members;
+ evsel->core.system_wide = orig->core.system_wide;
+
+ if (orig->name) {
+ evsel->name = strdup(orig->name);
+ if (evsel->name == NULL)
+ goto out_err;
+ }
+ if (orig->group_name) {
+ evsel->group_name = strdup(orig->group_name);
+ if (evsel->group_name == NULL)
+ goto out_err;
+ }
+ if (orig->pmu_name) {
+ evsel->pmu_name = strdup(orig->pmu_name);
+ if (evsel->pmu_name == NULL)
+ goto out_err;
+ }
+ if (orig->filter) {
+ evsel->filter = strdup(orig->filter);
+ if (evsel->filter == NULL)
+ goto out_err;
+ }
+ evsel->cgrp = cgroup__get(orig->cgrp);
+ evsel->tp_format = orig->tp_format;
+ evsel->handler = orig->handler;
+ evsel->leader = orig->leader;
+
+ evsel->max_events = orig->max_events;
+ evsel->tool_event = orig->tool_event;
+ evsel->unit = orig->unit;
+ evsel->scale = orig->scale;
+ evsel->snapshot = orig->snapshot;
+ evsel->per_pkg = orig->per_pkg;
+ evsel->percore = orig->percore;
+ evsel->precise_max = orig->precise_max;
+ evsel->use_uncore_alias = orig->use_uncore_alias;
+ evsel->is_libpfm_event = orig->is_libpfm_event;
+
+ evsel->exclude_GH = orig->exclude_GH;
+ evsel->sample_read = orig->sample_read;
+ evsel->auto_merge_stats = orig->auto_merge_stats;
+ evsel->collect_stat = orig->collect_stat;
+ evsel->weak_group = orig->weak_group;
+
+ if (evsel__copy_config_terms(evsel, orig) < 0)
+ goto out_err;
+
+ return evsel;
+
+out_err:
+ evsel__delete(evsel);
+ return NULL;
+}
+
/*
* Returns pointer with encoded error via <linux/err.h> interface.
*/
@@ -1684,6 +1788,11 @@ retry_open:
FD(evsel, cpu, thread) = fd;
+ if (unlikely(test_attr__enabled)) {
+ test_attr__open(&evsel->core.attr, pid, cpus->map[cpu],
+ fd, group_fd, flags);
+ }
+
if (fd < 0) {
err = -errno;
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 35e3f6d66085..79a860d8e3ee 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -42,65 +42,79 @@ enum perf_tool_event {
*/
struct evsel {
struct perf_evsel core;
- struct evlist *evlist;
- char *filter;
+ struct evlist *evlist;
+ off_t id_offset;
+ int idx;
+ int id_pos;
+ int is_pos;
+ unsigned int sample_size;
+
+ /*
+ * These fields can be set in the parse-events code or similar.
+ * Please check evsel__clone() to copy them properly so that
+ * they can be released properly.
+ */
+ struct {
+ char *name;
+ char *group_name;
+ const char *pmu_name;
+ struct tep_event *tp_format;
+ char *filter;
+ unsigned long max_events;
+ double scale;
+ const char *unit;
+ struct cgroup *cgrp;
+ enum perf_tool_event tool_event;
+ /* parse modifier helper */
+ int exclude_GH;
+ int sample_read;
+ bool snapshot;
+ bool per_pkg;
+ bool percore;
+ bool precise_max;
+ bool use_uncore_alias;
+ bool is_libpfm_event;
+ bool auto_merge_stats;
+ bool collect_stat;
+ bool weak_group;
+ int bpf_fd;
+ struct bpf_object *bpf_obj;
+ };
+
+ /*
+ * metric fields are similar, but needs more care as they can have
+ * references to other metric (evsel).
+ */
+ const char * metric_expr;
+ const char * metric_name;
+ struct evsel **metric_events;
+ struct evsel *metric_leader;
+
+ void *handler;
struct perf_counts *counts;
struct perf_counts *prev_raw_counts;
- int idx;
- unsigned long max_events;
unsigned long nr_events_printed;
- char *name;
- double scale;
- const char *unit;
- struct tep_event *tp_format;
- off_t id_offset;
struct perf_stat_evsel *stats;
void *priv;
u64 db_id;
- struct cgroup *cgrp;
- void *handler;
- unsigned int sample_size;
- int id_pos;
- int is_pos;
- enum perf_tool_event tool_event;
bool uniquified_name;
- bool snapshot;
bool supported;
bool needs_swap;
bool disabled;
bool no_aux_samples;
bool immediate;
bool tracking;
- bool per_pkg;
- bool precise_max;
bool ignore_missing_thread;
bool forced_leader;
- bool use_uncore_alias;
- bool is_libpfm_event;
- /* parse modifier helper */
- int exclude_GH;
- int sample_read;
- unsigned long *per_pkg_mask;
- struct evsel *leader;
- char *group_name;
bool cmdline_group_boundary;
- struct list_head config_terms;
- struct bpf_object *bpf_obj;
- int bpf_fd;
- int err;
- bool auto_merge_stats;
bool merged_stat;
- const char * metric_expr;
- const char * metric_name;
- struct evsel **metric_events;
- struct evsel *metric_leader;
- bool collect_stat;
- bool weak_group;
bool reset_group;
bool errored;
- bool percore;
+ unsigned long *per_pkg_mask;
+ struct evsel *leader;
+ struct list_head config_terms;
+ int err;
int cpu_iter;
- const char *pmu_name;
struct {
evsel__sb_cb_t *cb;
void *data;
@@ -169,6 +183,7 @@ static inline struct evsel *evsel__new(struct perf_event_attr *attr)
return evsel__new_idx(attr, 0);
}
+struct evsel *evsel__clone(struct evsel *orig);
struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx);
/*
diff --git a/tools/perf/util/group.h b/tools/perf/util/group.h
deleted file mode 100644
index f36c7e31780a..000000000000
--- a/tools/perf/util/group.h
+++ /dev/null
@@ -1,8 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef GROUP_H
-#define GROUP_H 1
-
-bool arch_topdown_check_group(bool *warn);
-void arch_topdown_group_warn(void);
-
-#endif
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 9cf4efdcbbbd..be850e9f8852 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -2082,8 +2082,14 @@ static int __event_process_build_id(struct perf_record_header_build_id *bev,
dso = machine__findnew_dso(machine, filename);
if (dso != NULL) {
char sbuild_id[SBUILD_ID_SIZE];
+ struct build_id bid;
+ size_t size = BUILD_ID_SIZE;
- dso__set_build_id(dso, &bev->build_id);
+ if (bev->header.misc & PERF_RECORD_MISC_BUILD_ID_SIZE)
+ size = bev->size;
+
+ build_id__init(&bid, bev->data, size);
+ dso__set_build_id(dso, &bid);
if (dso_space != DSO_SPACE__USER) {
struct kmod_path m = { .name = NULL, };
@@ -2095,10 +2101,9 @@ static int __event_process_build_id(struct perf_record_header_build_id *bev,
free(m.name);
}
- build_id__sprintf(dso->build_id, sizeof(dso->build_id),
- sbuild_id);
- pr_debug("build id event received for %s: %s\n",
- dso->long_name, sbuild_id);
+ build_id__sprintf(&dso->bid, sbuild_id);
+ pr_debug("build id event received for %s: %s [%zu]\n",
+ dso->long_name, sbuild_id, size);
dso__put(dso);
}
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 0af4e81c46e2..3a0348caec7d 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -1101,6 +1101,8 @@ static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt,
if (queue->tid == -1 || pt->have_sched_switch) {
ptq->tid = machine__get_current_tid(pt->machine, ptq->cpu);
+ if (ptq->tid == -1)
+ ptq->pid = -1;
thread__zput(ptq->thread);
}
@@ -2603,10 +2605,8 @@ static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event,
tid = sample->tid;
}
- if (tid == -1) {
- pr_err("context_switch event has no tid\n");
- return -EINVAL;
- }
+ if (tid == -1)
+ intel_pt_log("context_switch event has no tid\n");
ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
if (ret <= 0)
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index 0804308ef285..055bab7a92b3 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -374,11 +374,15 @@ static uint64_t convert_timestamp(struct jit_buf_desc *jd, uint64_t timestamp)
if (!jd->use_arch_timestamp)
return timestamp;
- tc.time_shift = jd->session->time_conv.time_shift;
- tc.time_mult = jd->session->time_conv.time_mult;
- tc.time_zero = jd->session->time_conv.time_zero;
-
- if (!tc.time_mult)
+ tc.time_shift = jd->session->time_conv.time_shift;
+ tc.time_mult = jd->session->time_conv.time_mult;
+ tc.time_zero = jd->session->time_conv.time_zero;
+ tc.time_cycles = jd->session->time_conv.time_cycles;
+ tc.time_mask = jd->session->time_conv.time_mask;
+ tc.cap_user_time_zero = jd->session->time_conv.cap_user_time_zero;
+ tc.cap_user_time_short = jd->session->time_conv.cap_user_time_short;
+
+ if (!tc.cap_user_time_zero)
return 0;
return tsc_to_perf_time(timestamp, &tc);
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 85587de027a5..7d4194ffc5b0 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -3100,3 +3100,15 @@ char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, ch
*addrp = map->unmap_ip(map, sym->start);
return sym->name;
}
+
+int machine__for_each_dso(struct machine *machine, machine__dso_t fn, void *priv)
+{
+ struct dso *pos;
+ int err = 0;
+
+ list_for_each_entry(pos, &machine->dsos.head, node) {
+ if (fn(pos, machine, priv))
+ err = -1;
+ }
+ return err;
+}
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 062c36a8433c..26368d3c1754 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -250,6 +250,10 @@ void machines__destroy_kernel_maps(struct machines *machines);
size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp);
+typedef int (*machine__dso_t)(struct dso *dso, struct machine *machine, void *priv);
+
+int machine__for_each_dso(struct machine *machine, machine__dso_t fn,
+ void *priv);
int machine__for_each_thread(struct machine *machine,
int (*fn)(struct thread *thread, void *p),
void *priv);
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index cc0faf8f1321..f44ede437dc7 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -27,21 +27,6 @@
static void __maps__insert(struct maps *maps, struct map *map);
-static inline int is_anon_memory(const char *filename, u32 flags)
-{
- return flags & MAP_HUGETLB ||
- !strcmp(filename, "//anon") ||
- !strncmp(filename, "/dev/zero", sizeof("/dev/zero") - 1) ||
- !strncmp(filename, "/anon_hugepage", sizeof("/anon_hugepage") - 1);
-}
-
-static inline int is_no_dso_memory(const char *filename)
-{
- return !strncmp(filename, "[stack", 6) ||
- !strncmp(filename, "/SYSV",5) ||
- !strcmp(filename, "[heap]");
-}
-
static inline int is_android_lib(const char *filename)
{
return strstarts(filename, "/data/app-lib/") ||
@@ -158,7 +143,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
int anon, no_dso, vdso, android;
android = is_android_lib(filename);
- anon = is_anon_memory(filename, flags);
+ anon = is_anon_memory(filename) || flags & MAP_HUGETLB;
vdso = is_vdso_map(filename);
no_dso = is_no_dso_memory(filename);
map->prot = prot;
@@ -346,9 +331,7 @@ int map__load(struct map *map)
if (map->dso->has_build_id) {
char sbuild_id[SBUILD_ID_SIZE];
- build_id__sprintf(map->dso->build_id,
- sizeof(map->dso->build_id),
- sbuild_id);
+ build_id__sprintf(&map->dso->bid, sbuild_id);
pr_debug("%s with build id %s not found", name, sbuild_id);
} else
pr_debug("Failed to open %s", name);
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index c2f5d28fe73a..b1c0686db1b7 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -171,4 +171,18 @@ static inline bool is_bpf_image(const char *name)
return strncmp(name, "bpf_trampoline_", sizeof("bpf_trampoline_") - 1) == 0 ||
strncmp(name, "bpf_dispatcher_", sizeof("bpf_dispatcher_") - 1) == 0;
}
+
+static inline int is_anon_memory(const char *filename)
+{
+ return !strcmp(filename, "//anon") ||
+ !strncmp(filename, "/dev/zero", sizeof("/dev/zero") - 1) ||
+ !strncmp(filename, "/anon_hugepage", sizeof("/anon_hugepage") - 1);
+}
+
+static inline int is_no_dso_memory(const char *filename)
+{
+ return !strncmp(filename, "[stack", 6) ||
+ !strncmp(filename, "/SYSV", 5) ||
+ !strcmp(filename, "[heap]");
+}
#endif /* __PERF_MAP_H */
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index ab5030fcfed4..060454a17293 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -15,7 +15,6 @@
#include "rblist.h"
#include <string.h>
#include <errno.h>
-#include "pmu-events/pmu-events.h"
#include "strlist.h"
#include <assert.h>
#include <linux/ctype.h>
@@ -25,6 +24,7 @@
#include <api/fs/fs.h>
#include "util.h"
#include <asm/bug.h>
+#include "cgroup.h"
struct metric_event *metricgroup__lookup(struct rblist *metric_events,
struct evsel *evsel,
@@ -150,8 +150,20 @@ static void expr_ids__exit(struct expr_ids *ids)
free(ids->id[i].id);
}
+static bool contains_event(struct evsel **metric_events, int num_events,
+ const char *event_name)
+{
+ int i;
+
+ for (i = 0; i < num_events; i++) {
+ if (!strcmp(metric_events[i]->name, event_name))
+ return true;
+ }
+ return false;
+}
+
/**
- * Find a group of events in perf_evlist that correpond to those from a parsed
+ * Find a group of events in perf_evlist that correspond to those from a parsed
* metric expression. Note, as find_evsel_group is called in the same order as
* perf_evlist was constructed, metric_no_merge doesn't need to test for
* underfilling a group.
@@ -180,7 +192,11 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist,
int i = 0, matched_events = 0, events_to_match;
const int idnum = (int)hashmap__size(&pctx->ids);
- /* duration_time is grouped separately. */
+ /*
+ * duration_time is always grouped separately, when events are grouped
+ * (ie has_constraint is false) then ignore it in the matching loop and
+ * add it to metric_events at the end.
+ */
if (!has_constraint &&
hashmap__find(&pctx->ids, "duration_time", (void **)&val_ptr))
events_to_match = idnum - 1;
@@ -207,23 +223,20 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist,
sizeof(struct evsel *) * idnum);
current_leader = ev->leader;
}
- if (hashmap__find(&pctx->ids, ev->name, (void **)&val_ptr)) {
- if (has_constraint) {
- /*
- * Events aren't grouped, ensure the same event
- * isn't matched from two groups.
- */
- for (i = 0; i < matched_events; i++) {
- if (!strcmp(ev->name,
- metric_events[i]->name)) {
- break;
- }
- }
- if (i != matched_events)
- continue;
- }
+ /*
+ * Check for duplicate events with the same name. For example,
+ * uncore_imc/cas_count_read/ will turn into 6 events per socket
+ * on skylakex. Only the first such event is placed in
+ * metric_events. If events aren't grouped then this also
+ * ensures that the same event in different sibling groups
+ * aren't both added to metric_events.
+ */
+ if (contains_event(metric_events, matched_events, ev->name))
+ continue;
+ /* Does this event belong to the parse context? */
+ if (hashmap__find(&pctx->ids, ev->name, (void **)&val_ptr))
metric_events[matched_events++] = ev;
- }
+
if (matched_events == events_to_match)
break;
}
@@ -239,7 +252,7 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist,
}
if (matched_events != idnum) {
- /* Not whole match */
+ /* Not a whole match */
return NULL;
}
@@ -247,8 +260,32 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist,
for (i = 0; i < idnum; i++) {
ev = metric_events[i];
- ev->metric_leader = ev;
+ /* Don't free the used events. */
set_bit(ev->idx, evlist_used);
+ /*
+ * The metric leader points to the identically named event in
+ * metric_events.
+ */
+ ev->metric_leader = ev;
+ /*
+ * Mark two events with identical names in the same group (or
+ * globally) as being in use as uncore events may be duplicated
+ * for each pmu. Set the metric leader of such events to be the
+ * event that appears in metric_events.
+ */
+ evlist__for_each_entry_continue(perf_evlist, ev) {
+ /*
+ * If events are grouped then the search can terminate
+ * when then group is left.
+ */
+ if (!has_constraint &&
+ ev->leader != metric_events[i]->leader)
+ break;
+ if (!strcmp(metric_events[i]->name, ev->name)) {
+ set_bit(ev->idx, evlist_used);
+ ev->metric_leader = metric_events[i];
+ }
+ }
}
return metric_events[0];
@@ -540,10 +577,12 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter,
}
}
- if (metricgroups && !raw)
- printf("\nMetric Groups:\n\n");
- else if (metrics && !raw)
- printf("\nMetrics:\n\n");
+ if (!filter || !rblist__empty(&groups)) {
+ if (metricgroups && !raw)
+ printf("\nMetric Groups:\n\n");
+ else if (metrics && !raw)
+ printf("\nMetrics:\n\n");
+ }
for (node = rb_first_cached(&groups.entries); node; node = next) {
struct mep *me = container_of(node, struct mep, nd);
@@ -639,7 +678,7 @@ static bool metricgroup__has_constraint(struct pmu_event *pe)
return false;
}
-int __weak arch_get_runtimeparam(void)
+int __weak arch_get_runtimeparam(struct pmu_event *pe __maybe_unused)
{
return 1;
}
@@ -910,7 +949,7 @@ static int add_metric(struct list_head *metric_list,
} else {
int j, count;
- count = arch_get_runtimeparam();
+ count = arch_get_runtimeparam(pe);
/* This loop is added to create multiple
* events depend on count value and add
@@ -1119,3 +1158,87 @@ bool metricgroup__has_metric(const char *metric)
}
return false;
}
+
+int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp,
+ struct rblist *new_metric_events,
+ struct rblist *old_metric_events)
+{
+ unsigned i;
+
+ for (i = 0; i < rblist__nr_entries(old_metric_events); i++) {
+ struct rb_node *nd;
+ struct metric_event *old_me, *new_me;
+ struct metric_expr *old_expr, *new_expr;
+ struct evsel *evsel;
+ size_t alloc_size;
+ int idx, nr;
+
+ nd = rblist__entry(old_metric_events, i);
+ old_me = container_of(nd, struct metric_event, nd);
+
+ evsel = evlist__find_evsel(evlist, old_me->evsel->idx);
+ if (!evsel)
+ return -EINVAL;
+ new_me = metricgroup__lookup(new_metric_events, evsel, true);
+ if (!new_me)
+ return -ENOMEM;
+
+ pr_debug("copying metric event for cgroup '%s': %s (idx=%d)\n",
+ cgrp ? cgrp->name : "root", evsel->name, evsel->idx);
+
+ list_for_each_entry(old_expr, &old_me->head, nd) {
+ new_expr = malloc(sizeof(*new_expr));
+ if (!new_expr)
+ return -ENOMEM;
+
+ new_expr->metric_expr = old_expr->metric_expr;
+ new_expr->metric_name = old_expr->metric_name;
+ new_expr->metric_unit = old_expr->metric_unit;
+ new_expr->runtime = old_expr->runtime;
+
+ if (old_expr->metric_refs) {
+ /* calculate number of metric_events */
+ for (nr = 0; old_expr->metric_refs[nr].metric_name; nr++)
+ continue;
+ alloc_size = sizeof(*new_expr->metric_refs);
+ new_expr->metric_refs = calloc(nr + 1, alloc_size);
+ if (!new_expr->metric_refs) {
+ free(new_expr);
+ return -ENOMEM;
+ }
+
+ memcpy(new_expr->metric_refs, old_expr->metric_refs,
+ nr * alloc_size);
+ } else {
+ new_expr->metric_refs = NULL;
+ }
+
+ /* calculate number of metric_events */
+ for (nr = 0; old_expr->metric_events[nr]; nr++)
+ continue;
+ alloc_size = sizeof(*new_expr->metric_events);
+ new_expr->metric_events = calloc(nr + 1, alloc_size);
+ if (!new_expr->metric_events) {
+ free(new_expr->metric_refs);
+ free(new_expr);
+ return -ENOMEM;
+ }
+
+ /* copy evsel in the same position */
+ for (idx = 0; idx < nr; idx++) {
+ evsel = old_expr->metric_events[idx];
+ evsel = evlist__find_evsel(evlist, evsel->idx);
+ if (evsel == NULL) {
+ free(new_expr->metric_events);
+ free(new_expr->metric_refs);
+ free(new_expr);
+ return -EINVAL;
+ }
+ new_expr->metric_events[idx] = evsel;
+ }
+
+ list_add(&new_expr->nd, &new_me->head);
+ }
+ }
+ return 0;
+}
diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h
index 62623a39cbec..ed1b9392e624 100644
--- a/tools/perf/util/metricgroup.h
+++ b/tools/perf/util/metricgroup.h
@@ -5,12 +5,15 @@
#include <linux/list.h>
#include <linux/rbtree.h>
#include <stdbool.h>
+#include "pmu-events/pmu-events.h"
+struct evlist;
struct evsel;
struct evlist;
struct option;
struct rblist;
struct pmu_events_map;
+struct cgroup;
struct metric_event {
struct rb_node nd;
@@ -52,6 +55,10 @@ int metricgroup__parse_groups_test(struct evlist *evlist,
void metricgroup__print(bool metrics, bool groups, char *filter,
bool raw, bool details);
bool metricgroup__has_metric(const char *metric);
-int arch_get_runtimeparam(void);
+int arch_get_runtimeparam(struct pmu_event *pe __maybe_unused);
void metricgroup__rblist_exit(struct rblist *metric_events);
+
+int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp,
+ struct rblist *new_metric_events,
+ struct rblist *old_metric_events);
#endif
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 667cbca1547a..3b273580fb84 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -353,18 +353,20 @@ __add_event(struct list_head *list, int *idx,
const char *cpu_list)
{
struct evsel *evsel;
- struct perf_cpu_map *cpus = pmu ? pmu->cpus :
+ struct perf_cpu_map *cpus = pmu ? perf_cpu_map__get(pmu->cpus) :
cpu_list ? perf_cpu_map__new(cpu_list) : NULL;
if (init_attr)
event_attr_init(attr);
evsel = evsel__new_idx(attr, *idx);
- if (!evsel)
+ if (!evsel) {
+ perf_cpu_map__put(cpus);
return NULL;
+ }
(*idx)++;
- evsel->core.cpus = perf_cpu_map__get(cpus);
+ evsel->core.cpus = cpus;
evsel->core.own_cpus = perf_cpu_map__get(cpus);
evsel->core.system_wide = pmu ? pmu->is_uncore : false;
evsel->auto_merge_stats = auto_merge_stats;
@@ -940,12 +942,12 @@ do { \
}
int parse_events_add_breakpoint(struct list_head *list, int *idx,
- void *ptr, char *type, u64 len)
+ u64 addr, char *type, u64 len)
{
struct perf_event_attr attr;
memset(&attr, 0, sizeof(attr));
- attr.bp_addr = (unsigned long) ptr;
+ attr.bp_addr = addr;
if (parse_breakpoint_type(type, &attr))
return -EINVAL;
@@ -1773,6 +1775,7 @@ struct event_modifier {
int sample_read;
int pinned;
int weak;
+ int exclusive;
};
static int get_event_modifier(struct event_modifier *mod, char *str,
@@ -1788,6 +1791,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str,
int precise_max = 0;
int sample_read = 0;
int pinned = evsel ? evsel->core.attr.pinned : 0;
+ int exclusive = evsel ? evsel->core.attr.exclusive : 0;
int exclude = eu | ek | eh;
int exclude_GH = evsel ? evsel->exclude_GH : 0;
@@ -1831,6 +1835,8 @@ static int get_event_modifier(struct event_modifier *mod, char *str,
sample_read = 1;
} else if (*str == 'D') {
pinned = 1;
+ } else if (*str == 'e') {
+ exclusive = 1;
} else if (*str == 'W') {
weak = 1;
} else
@@ -1864,6 +1870,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str,
mod->sample_read = sample_read;
mod->pinned = pinned;
mod->weak = weak;
+ mod->exclusive = exclusive;
return 0;
}
@@ -1877,7 +1884,7 @@ static int check_modifier(char *str)
char *p = str;
/* The sizeof includes 0 byte as well. */
- if (strlen(str) > (sizeof("ukhGHpppPSDIW") - 1))
+ if (strlen(str) > (sizeof("ukhGHpppPSDIWe") - 1))
return -1;
while (*p) {
@@ -1919,8 +1926,10 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add)
evsel->precise_max = mod.precise_max;
evsel->weak_group = mod.weak;
- if (evsel__is_group_leader(evsel))
+ if (evsel__is_group_leader(evsel)) {
evsel->core.attr.pinned = mod.pinned;
+ evsel->core.attr.exclusive = mod.exclusive;
+ }
}
return 0;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 00cde7d2e30c..e80c9b74f2f2 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -190,7 +190,7 @@ int parse_events_add_cache(struct list_head *list, int *idx,
struct parse_events_error *error,
struct list_head *head_config);
int parse_events_add_breakpoint(struct list_head *list, int *idx,
- void *ptr, char *type, u64 len);
+ u64 addr, char *type, u64 len);
int parse_events_add_pmu(struct parse_events_state *parse_state,
struct list_head *list, char *name,
struct list_head *head_config,
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 3ca5fd2829ca..9db5097317f4 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -210,7 +210,7 @@ name_tag [\'][a-zA-Z_*?\[\]][a-zA-Z0-9_*?\-,\.\[\]:=]*[\']
name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]*
drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)?
/* If you add a modifier you need to update check_modifier() */
-modifier_event [ukhpPGHSDIW]+
+modifier_event [ukhpPGHSDIWe]+
modifier_bp [rwx]{1,3}
%%
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index 645bf4f1859f..d5b6aff82f21 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -511,7 +511,7 @@ PE_PREFIX_MEM PE_VALUE '/' PE_VALUE ':' PE_MODIFIER_BP sep_dc
list = alloc_list();
ABORT_ON(!list);
err = parse_events_add_breakpoint(list, &parse_state->idx,
- (void *)(uintptr_t) $2, $6, $4);
+ $2, $6, $4);
free($6);
if (err) {
free(list);
@@ -528,7 +528,7 @@ PE_PREFIX_MEM PE_VALUE '/' PE_VALUE sep_dc
list = alloc_list();
ABORT_ON(!list);
if (parse_events_add_breakpoint(list, &parse_state->idx,
- (void *)(uintptr_t) $2, NULL, $4)) {
+ $2, NULL, $4)) {
free(list);
YYABORT;
}
@@ -544,7 +544,7 @@ PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP sep_dc
list = alloc_list();
ABORT_ON(!list);
err = parse_events_add_breakpoint(list, &parse_state->idx,
- (void *)(uintptr_t) $2, $4, 0);
+ $2, $4, 0);
free($4);
if (err) {
free(list);
@@ -561,7 +561,7 @@ PE_PREFIX_MEM PE_VALUE sep_dc
list = alloc_list();
ABORT_ON(!list);
if (parse_events_add_breakpoint(list, &parse_state->idx,
- (void *)(uintptr_t) $2, NULL, 0)) {
+ $2, NULL, 0)) {
free(list);
YYABORT;
}
diff --git a/tools/perf/util/print_binary.c b/tools/perf/util/print_binary.c
index 599a1543871d..13fdc51c61d9 100644
--- a/tools/perf/util/print_binary.c
+++ b/tools/perf/util/print_binary.c
@@ -50,7 +50,7 @@ int is_printable_array(char *p, unsigned int len)
len--;
- for (i = 0; i < len; i++) {
+ for (i = 0; i < len && p[i]; i++) {
if (!isprint(p[i]) && !isspace(p[i]))
return 0;
}
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 99d36ac77c08..8eae2afff71a 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -43,6 +43,10 @@
#include <linux/ctype.h>
#include <linux/zalloc.h>
+#ifdef HAVE_DEBUGINFOD_SUPPORT
+#include <elfutils/debuginfod.h>
+#endif
+
#define PERFPROBE_GROUP "probe"
bool probe_event_dry_run; /* Dry run flag */
@@ -129,9 +133,10 @@ static int kernel_get_symbol_address_by_name(const char *name, u64 *addr,
struct map *map;
/* ref_reloc_sym is just a label. Need a special fix*/
- reloc_sym = kernel_get_ref_reloc_sym(NULL);
+ reloc_sym = kernel_get_ref_reloc_sym(&map);
if (reloc_sym && strcmp(name, reloc_sym->name) == 0)
- *addr = (reloc) ? reloc_sym->addr : reloc_sym->unrelocated_addr;
+ *addr = (!map->reloc || reloc) ? reloc_sym->addr :
+ reloc_sym->unrelocated_addr;
else {
sym = machine__find_kernel_symbol_by_name(host_machine, name, &map);
if (!sym)
@@ -337,6 +342,8 @@ static int kernel_get_module_dso(const char *module, struct dso **pdso)
map = machine__kernel_map(host_machine);
dso = map->dso;
+ if (!dso->has_build_id)
+ dso__read_running_kernel_build_id(dso, host_machine);
vmlinux_name = symbol_conf.vmlinux_name;
dso->load_errno = 0;
@@ -452,6 +459,49 @@ static int get_alternative_line_range(struct debuginfo *dinfo,
return ret;
}
+#ifdef HAVE_DEBUGINFOD_SUPPORT
+static struct debuginfo *open_from_debuginfod(struct dso *dso, struct nsinfo *nsi,
+ bool silent)
+{
+ debuginfod_client *c = debuginfod_begin();
+ char sbuild_id[SBUILD_ID_SIZE + 1];
+ struct debuginfo *ret = NULL;
+ struct nscookie nsc;
+ char *path;
+ int fd;
+
+ if (!c)
+ return NULL;
+
+ build_id__sprintf(&dso->bid, sbuild_id);
+ fd = debuginfod_find_debuginfo(c, (const unsigned char *)sbuild_id,
+ 0, &path);
+ if (fd >= 0)
+ close(fd);
+ debuginfod_end(c);
+ if (fd < 0) {
+ if (!silent)
+ pr_debug("Failed to find debuginfo in debuginfod.\n");
+ return NULL;
+ }
+ if (!silent)
+ pr_debug("Load debuginfo from debuginfod (%s)\n", path);
+
+ nsinfo__mountns_enter(nsi, &nsc);
+ ret = debuginfo__new((const char *)path);
+ nsinfo__mountns_exit(&nsc);
+ return ret;
+}
+#else
+static inline
+struct debuginfo *open_from_debuginfod(struct dso *dso __maybe_unused,
+ struct nsinfo *nsi __maybe_unused,
+ bool silent __maybe_unused)
+{
+ return NULL;
+}
+#endif
+
/* Open new debuginfo of given module */
static struct debuginfo *open_debuginfo(const char *module, struct nsinfo *nsi,
bool silent)
@@ -471,6 +521,10 @@ static struct debuginfo *open_debuginfo(const char *module, struct nsinfo *nsi,
strcpy(reason, "(unknown)");
} else
dso__strerror_load(dso, reason, STRERR_BUFSIZE);
+ if (dso)
+ ret = open_from_debuginfod(dso, nsi, silent);
+ if (ret)
+ return ret;
if (!silent) {
if (module)
pr_err("Module %s is not loaded, please specify its full path name.\n", module);
@@ -795,7 +849,8 @@ post_process_kernel_probe_trace_events(struct probe_trace_event *tevs,
free(tevs[i].point.symbol);
tevs[i].point.symbol = tmp;
tevs[i].point.offset = tevs[i].point.address -
- reloc_sym->unrelocated_addr;
+ (map->reloc ? reloc_sym->unrelocated_addr :
+ reloc_sym->addr);
}
return skipped;
}
@@ -950,6 +1005,7 @@ static int _show_one_line(FILE *fp, int l, bool skip, bool show_num)
static int __show_line_range(struct line_range *lr, const char *module,
bool user)
{
+ struct build_id bid;
int l = 1;
struct int_node *ln;
struct debuginfo *dinfo;
@@ -957,6 +1013,7 @@ static int __show_line_range(struct line_range *lr, const char *module,
int ret;
char *tmp;
char sbuf[STRERR_BUFSIZE];
+ char sbuild_id[SBUILD_ID_SIZE] = "";
/* Search a line range */
dinfo = open_debuginfo(module, NULL, false);
@@ -969,6 +1026,10 @@ static int __show_line_range(struct line_range *lr, const char *module,
if (!ret)
ret = debuginfo__find_line_range(dinfo, lr);
}
+ if (dinfo->build_id) {
+ build_id__init(&bid, dinfo->build_id, BUILD_ID_SIZE);
+ build_id__sprintf(&bid, sbuild_id);
+ }
debuginfo__delete(dinfo);
if (ret == 0 || ret == -ENOENT) {
pr_warning("Specified source line is not found.\n");
@@ -980,7 +1041,7 @@ static int __show_line_range(struct line_range *lr, const char *module,
/* Convert source file path */
tmp = lr->path;
- ret = get_real_path(tmp, lr->comp_dir, &lr->path);
+ ret = find_source_path(tmp, sbuild_id, lr->comp_dir, &lr->path);
/* Free old path when new path is assigned */
if (tmp != lr->path)
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 659024342e9a..2c4061035f77 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -31,6 +31,10 @@
#include "probe-file.h"
#include "string2.h"
+#ifdef HAVE_DEBUGINFOD_SUPPORT
+#include <elfutils/debuginfod.h>
+#endif
+
/* Kprobe tracer basic type is up to u64 */
#define MAX_BASIC_TYPE_BITS 64
@@ -51,6 +55,7 @@ static const Dwfl_Callbacks offline_callbacks = {
static int debuginfo__init_offline_dwarf(struct debuginfo *dbg,
const char *path)
{
+ GElf_Addr dummy;
int fd;
fd = open(path, O_RDONLY);
@@ -70,6 +75,8 @@ static int debuginfo__init_offline_dwarf(struct debuginfo *dbg,
if (!dbg->dbg)
goto error;
+ dwfl_module_build_id(dbg->mod, &dbg->build_id, &dummy);
+
dwfl_report_end(dbg->dwfl, NULL, NULL);
return 0;
@@ -942,6 +949,8 @@ static int probe_point_lazy_walker(const char *fname, int lineno,
/* Find probe points from lazy pattern */
static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
{
+ struct build_id bid;
+ char sbuild_id[SBUILD_ID_SIZE] = "";
int ret = 0;
char *fpath;
@@ -949,7 +958,11 @@ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
const char *comp_dir;
comp_dir = cu_get_comp_dir(&pf->cu_die);
- ret = get_real_path(pf->fname, comp_dir, &fpath);
+ if (pf->dbg->build_id) {
+ build_id__init(&bid, pf->dbg->build_id, BUILD_ID_SIZE);
+ build_id__sprintf(&bid, sbuild_id);
+ }
+ ret = find_source_path(pf->fname, sbuild_id, comp_dir, &fpath);
if (ret < 0) {
pr_warning("Failed to find source file path.\n");
return ret;
@@ -1448,7 +1461,7 @@ int debuginfo__find_trace_events(struct debuginfo *dbg,
struct probe_trace_event **tevs)
{
struct trace_event_finder tf = {
- .pf = {.pev = pev, .callback = add_probe_trace_event},
+ .pf = {.pev = pev, .dbg = dbg, .callback = add_probe_trace_event},
.max_tevs = probe_conf.max_probes, .mod = dbg->mod};
int ret, i;
@@ -1618,7 +1631,7 @@ int debuginfo__find_available_vars_at(struct debuginfo *dbg,
struct variable_list **vls)
{
struct available_var_finder af = {
- .pf = {.pev = pev, .callback = add_available_vars},
+ .pf = {.pev = pev, .dbg = dbg, .callback = add_available_vars},
.mod = dbg->mod,
.max_vls = probe_conf.max_probes};
int ret;
@@ -1973,17 +1986,57 @@ found:
return (ret < 0) ? ret : lf.found;
}
+#ifdef HAVE_DEBUGINFOD_SUPPORT
+/* debuginfod doesn't require the comp_dir but buildid is required */
+static int get_source_from_debuginfod(const char *raw_path,
+ const char *sbuild_id, char **new_path)
+{
+ debuginfod_client *c = debuginfod_begin();
+ const char *p = raw_path;
+ int fd;
+
+ if (!c)
+ return -ENOMEM;
+
+ fd = debuginfod_find_source(c, (const unsigned char *)sbuild_id,
+ 0, p, new_path);
+ pr_debug("Search %s from debuginfod -> %d\n", p, fd);
+ if (fd >= 0)
+ close(fd);
+ debuginfod_end(c);
+ if (fd < 0) {
+ pr_debug("Failed to find %s in debuginfod (%s)\n",
+ raw_path, sbuild_id);
+ return -ENOENT;
+ }
+ pr_debug("Got a source %s\n", *new_path);
+
+ return 0;
+}
+#else
+static inline int get_source_from_debuginfod(const char *raw_path __maybe_unused,
+ const char *sbuild_id __maybe_unused,
+ char **new_path __maybe_unused)
+{
+ return -ENOTSUP;
+}
+#endif
/*
* Find a src file from a DWARF tag path. Prepend optional source path prefix
* and chop off leading directories that do not exist. Result is passed back as
* a newly allocated path on success.
* Return 0 if file was found and readable, -errno otherwise.
*/
-int get_real_path(const char *raw_path, const char *comp_dir,
- char **new_path)
+int find_source_path(const char *raw_path, const char *sbuild_id,
+ const char *comp_dir, char **new_path)
{
const char *prefix = symbol_conf.source_prefix;
+ if (sbuild_id && !prefix) {
+ if (!get_source_from_debuginfod(raw_path, sbuild_id, new_path))
+ return 0;
+ }
+
if (!prefix) {
if (raw_path[0] != '/' && comp_dir)
/* If not an absolute path, try to use comp_dir */
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index 11be10080613..2febb5875678 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -4,6 +4,7 @@
#include <stdbool.h>
#include "intlist.h"
+#include "build-id.h"
#include "probe-event.h"
#include <linux/ctype.h>
@@ -32,6 +33,7 @@ struct debuginfo {
Dwfl_Module *mod;
Dwfl *dwfl;
Dwarf_Addr bias;
+ const unsigned char *build_id;
};
/* This also tries to open distro debuginfo */
@@ -59,11 +61,12 @@ int debuginfo__find_available_vars_at(struct debuginfo *dbg,
struct variable_list **vls);
/* Find a src file from a DWARF tag path */
-int get_real_path(const char *raw_path, const char *comp_dir,
- char **new_path);
+int find_source_path(const char *raw_path, const char *sbuild_id,
+ const char *comp_dir, char **new_path);
struct probe_finder {
struct perf_probe_event *pev; /* Target probe event */
+ struct debuginfo *dbg;
/* Callback when a probe point is found */
int (*callback)(Dwarf_Die *sc_die, struct probe_finder *pf);
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 75a9b1d62bba..ae8edde7c50e 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -15,9 +15,11 @@
#include "thread_map.h"
#include "trace-event.h"
#include "mmap.h"
+#include "stat.h"
+#include "metricgroup.h"
#include "util/env.h"
#include <internal/lib.h>
-#include "../perf-sys.h"
+#include "util.h"
#if PY_MAJOR_VERSION < 3
#define _PyUnicode_FromString(arg) \
@@ -61,6 +63,23 @@ int parse_callchain_record(const char *arg __maybe_unused,
struct perf_env perf_env;
/*
+ * Add this one here not to drag util/stat-shadow.c
+ */
+void perf_stat__collect_metric_expr(struct evlist *evsel_list)
+{
+}
+
+/*
+ * Add this one here not to drag util/metricgroup.c
+ */
+int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp,
+ struct rblist *new_metric_events,
+ struct rblist *old_metric_events)
+{
+ return 0;
+}
+
+/*
* Support debug printing even though util/debug.c is not linked. That means
* implementing 'verbose' and 'eprintf'.
*/
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index ea9aa1d7cf50..07e4b96a6625 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -14,6 +14,7 @@
#include "util/perf_api_probe.h"
#include "record.h"
#include "../perf-sys.h"
+#include "topdown.h"
/*
* evsel__config_leader_sampling() uses special rules for leader sampling.
@@ -24,7 +25,7 @@ static struct evsel *evsel__read_sampler(struct evsel *evsel, struct evlist *evl
{
struct evsel *leader = evsel->leader;
- if (evsel__is_aux_event(leader)) {
+ if (evsel__is_aux_event(leader) || arch_topdown_sample_read(leader)) {
evlist__for_each_entry(evlist, evsel) {
if (evsel->leader == leader && evsel != evsel->leader)
return evsel;
diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h
index 03678ff25539..266760ac9143 100644
--- a/tools/perf/util/record.h
+++ b/tools/perf/util/record.h
@@ -73,6 +73,7 @@ struct record_opts {
unsigned int nr_threads_synthesize;
int ctl_fd;
int ctl_fd_ack;
+ bool ctl_fd_close;
};
extern const char * const *record_usage;
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 739516fdf6e3..7cbd024e3e63 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -1064,7 +1064,7 @@ static int python_export_dso(struct db_export *dbe, struct dso *dso,
char sbuild_id[SBUILD_ID_SIZE];
PyObject *t;
- build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
+ build_id__sprintf(&dso->bid, sbuild_id);
t = tuple_new(5);
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 493ec372fdec..4b57c0c07632 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -946,7 +946,6 @@ static void print_metric_headers(struct perf_stat_config *config,
out.print_metric = print_metric_header;
out.new_line = new_line_metric;
out.force_header = true;
- os.evsel = counter;
perf_stat__print_shadow_stats(config, counter, 0,
0,
&out,
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 924b54d15d54..901265127e36 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -241,6 +241,18 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES,
ctx, cpu, count);
+ else if (perf_stat_evsel__is(counter, TOPDOWN_RETIRING))
+ update_runtime_stat(st, STAT_TOPDOWN_RETIRING,
+ ctx, cpu, count);
+ else if (perf_stat_evsel__is(counter, TOPDOWN_BAD_SPEC))
+ update_runtime_stat(st, STAT_TOPDOWN_BAD_SPEC,
+ ctx, cpu, count);
+ else if (perf_stat_evsel__is(counter, TOPDOWN_FE_BOUND))
+ update_runtime_stat(st, STAT_TOPDOWN_FE_BOUND,
+ ctx, cpu, count);
+ else if (perf_stat_evsel__is(counter, TOPDOWN_BE_BOUND))
+ update_runtime_stat(st, STAT_TOPDOWN_BE_BOUND,
+ ctx, cpu, count);
else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT,
ctx, cpu, count);
@@ -705,6 +717,47 @@ static double td_be_bound(int ctx, int cpu, struct runtime_stat *st)
return sanitize_val(1.0 - sum);
}
+/*
+ * Kernel reports metrics multiplied with slots. To get back
+ * the ratios we need to recreate the sum.
+ */
+
+static double td_metric_ratio(int ctx, int cpu,
+ enum stat_type type,
+ struct runtime_stat *stat)
+{
+ double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu) +
+ runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu) +
+ runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu) +
+ runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu);
+ double d = runtime_stat_avg(stat, type, ctx, cpu);
+
+ if (sum)
+ return d / sum;
+ return 0;
+}
+
+/*
+ * ... but only if most of the values are actually available.
+ * We allow two missing.
+ */
+
+static bool full_td(int ctx, int cpu,
+ struct runtime_stat *stat)
+{
+ int c = 0;
+
+ if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu) > 0)
+ c++;
+ if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu) > 0)
+ c++;
+ if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu) > 0)
+ c++;
+ if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu) > 0)
+ c++;
+ return c >= 2;
+}
+
static void print_smi_cost(struct perf_stat_config *config,
int cpu, struct evsel *evsel,
struct perf_stat_output_ctx *out,
@@ -1073,6 +1126,42 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
be_bound * 100.);
else
print_metric(config, ctxp, NULL, NULL, name, 0);
+ } else if (perf_stat_evsel__is(evsel, TOPDOWN_RETIRING) &&
+ full_td(ctx, cpu, st)) {
+ double retiring = td_metric_ratio(ctx, cpu,
+ STAT_TOPDOWN_RETIRING, st);
+
+ if (retiring > 0.7)
+ color = PERF_COLOR_GREEN;
+ print_metric(config, ctxp, color, "%8.1f%%", "retiring",
+ retiring * 100.);
+ } else if (perf_stat_evsel__is(evsel, TOPDOWN_FE_BOUND) &&
+ full_td(ctx, cpu, st)) {
+ double fe_bound = td_metric_ratio(ctx, cpu,
+ STAT_TOPDOWN_FE_BOUND, st);
+
+ if (fe_bound > 0.2)
+ color = PERF_COLOR_RED;
+ print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
+ fe_bound * 100.);
+ } else if (perf_stat_evsel__is(evsel, TOPDOWN_BE_BOUND) &&
+ full_td(ctx, cpu, st)) {
+ double be_bound = td_metric_ratio(ctx, cpu,
+ STAT_TOPDOWN_BE_BOUND, st);
+
+ if (be_bound > 0.2)
+ color = PERF_COLOR_RED;
+ print_metric(config, ctxp, color, "%8.1f%%", "backend bound",
+ be_bound * 100.);
+ } else if (perf_stat_evsel__is(evsel, TOPDOWN_BAD_SPEC) &&
+ full_td(ctx, cpu, st)) {
+ double bad_spec = td_metric_ratio(ctx, cpu,
+ STAT_TOPDOWN_BAD_SPEC, st);
+
+ if (bad_spec > 0.1)
+ color = PERF_COLOR_RED;
+ print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
+ bad_spec * 100.);
} else if (evsel->metric_expr) {
generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL,
evsel->name, evsel->metric_name, NULL, 1, cpu, out, st);
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index cdb154381a87..bd0decd6d753 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -95,6 +95,10 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired),
ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles),
ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles),
+ ID(TOPDOWN_RETIRING, topdown-retiring),
+ ID(TOPDOWN_BAD_SPEC, topdown-bad-spec),
+ ID(TOPDOWN_FE_BOUND, topdown-fe-bound),
+ ID(TOPDOWN_BE_BOUND, topdown-be-bound),
ID(SMI_NUM, msr/smi/),
ID(APERF, msr/aperf/),
};
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index aa3bed48511b..487010c624be 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -28,6 +28,10 @@ enum perf_stat_evsel_id {
PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED,
PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES,
PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES,
+ PERF_STAT_EVSEL_ID__TOPDOWN_RETIRING,
+ PERF_STAT_EVSEL_ID__TOPDOWN_BAD_SPEC,
+ PERF_STAT_EVSEL_ID__TOPDOWN_FE_BOUND,
+ PERF_STAT_EVSEL_ID__TOPDOWN_BE_BOUND,
PERF_STAT_EVSEL_ID__SMI_NUM,
PERF_STAT_EVSEL_ID__APERF,
PERF_STAT_EVSEL_ID__MAX,
@@ -82,6 +86,10 @@ enum stat_type {
STAT_TOPDOWN_SLOTS_RETIRED,
STAT_TOPDOWN_FETCH_BUBBLES,
STAT_TOPDOWN_RECOVERY_BUBBLES,
+ STAT_TOPDOWN_RETIRING,
+ STAT_TOPDOWN_BAD_SPEC,
+ STAT_TOPDOWN_FE_BOUND,
+ STAT_TOPDOWN_BE_BOUND,
STAT_SMI_NUM,
STAT_APERF,
STAT_MAX
@@ -136,6 +144,8 @@ struct perf_stat_config {
struct rblist metric_events;
int ctl_fd;
int ctl_fd_ack;
+ bool ctl_fd_close;
+ const char *cgroup_list;
};
void perf_stat__set_big_num(int set);
diff --git a/tools/perf/util/stream.c b/tools/perf/util/stream.c
new file mode 100644
index 000000000000..4bd5e5a00aa5
--- /dev/null
+++ b/tools/perf/util/stream.c
@@ -0,0 +1,342 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Compare and figure out the top N hottest streams
+ * Copyright (c) 2020, Intel Corporation.
+ * Author: Jin Yao
+ */
+
+#include <inttypes.h>
+#include <stdlib.h>
+#include <linux/zalloc.h>
+#include "debug.h"
+#include "hist.h"
+#include "sort.h"
+#include "stream.h"
+#include "evlist.h"
+
+static void evsel_streams__delete(struct evsel_streams *es, int nr_evsel)
+{
+ for (int i = 0; i < nr_evsel; i++)
+ zfree(&es[i].streams);
+
+ free(es);
+}
+
+void evlist_streams__delete(struct evlist_streams *els)
+{
+ evsel_streams__delete(els->ev_streams, els->nr_evsel);
+ free(els);
+}
+
+static struct evlist_streams *evlist_streams__new(int nr_evsel,
+ int nr_streams_max)
+{
+ struct evlist_streams *els;
+ struct evsel_streams *es;
+
+ els = zalloc(sizeof(*els));
+ if (!els)
+ return NULL;
+
+ es = calloc(nr_evsel, sizeof(struct evsel_streams));
+ if (!es) {
+ free(els);
+ return NULL;
+ }
+
+ for (int i = 0; i < nr_evsel; i++) {
+ struct evsel_streams *s = &es[i];
+
+ s->streams = calloc(nr_streams_max, sizeof(struct stream));
+ if (!s->streams)
+ goto err;
+
+ s->nr_streams_max = nr_streams_max;
+ s->evsel_idx = -1;
+ }
+
+ els->ev_streams = es;
+ els->nr_evsel = nr_evsel;
+ return els;
+
+err:
+ evsel_streams__delete(es, nr_evsel);
+ return NULL;
+}
+
+/*
+ * The cnodes with high hit number are hot callchains.
+ */
+static void evsel_streams__set_hot_cnode(struct evsel_streams *es,
+ struct callchain_node *cnode)
+{
+ int i, idx = 0;
+ u64 hit;
+
+ if (es->nr_streams < es->nr_streams_max) {
+ i = es->nr_streams;
+ es->streams[i].cnode = cnode;
+ es->nr_streams++;
+ return;
+ }
+
+ /*
+ * Considering a few number of hot streams, only use simple
+ * way to find the cnode with smallest hit number and replace.
+ */
+ hit = (es->streams[0].cnode)->hit;
+ for (i = 1; i < es->nr_streams; i++) {
+ if ((es->streams[i].cnode)->hit < hit) {
+ hit = (es->streams[i].cnode)->hit;
+ idx = i;
+ }
+ }
+
+ if (cnode->hit > hit)
+ es->streams[idx].cnode = cnode;
+}
+
+static void update_hot_callchain(struct hist_entry *he,
+ struct evsel_streams *es)
+{
+ struct rb_root *root = &he->sorted_chain;
+ struct rb_node *rb_node = rb_first(root);
+ struct callchain_node *cnode;
+
+ while (rb_node) {
+ cnode = rb_entry(rb_node, struct callchain_node, rb_node);
+ evsel_streams__set_hot_cnode(es, cnode);
+ rb_node = rb_next(rb_node);
+ }
+}
+
+static void init_hot_callchain(struct hists *hists, struct evsel_streams *es)
+{
+ struct rb_node *next = rb_first_cached(&hists->entries);
+
+ while (next) {
+ struct hist_entry *he;
+
+ he = rb_entry(next, struct hist_entry, rb_node);
+ update_hot_callchain(he, es);
+ next = rb_next(&he->rb_node);
+ }
+
+ es->streams_hits = callchain_total_hits(hists);
+}
+
+static int evlist__init_callchain_streams(struct evlist *evlist,
+ struct evlist_streams *els)
+{
+ struct evsel_streams *es = els->ev_streams;
+ struct evsel *pos;
+ int i = 0;
+
+ BUG_ON(els->nr_evsel < evlist->core.nr_entries);
+
+ evlist__for_each_entry(evlist, pos) {
+ struct hists *hists = evsel__hists(pos);
+
+ hists__output_resort(hists, NULL);
+ init_hot_callchain(hists, &es[i]);
+ es[i].evsel_idx = pos->idx;
+ i++;
+ }
+
+ return 0;
+}
+
+struct evlist_streams *evlist__create_streams(struct evlist *evlist,
+ int nr_streams_max)
+{
+ int nr_evsel = evlist->core.nr_entries, ret = -1;
+ struct evlist_streams *els = evlist_streams__new(nr_evsel,
+ nr_streams_max);
+
+ if (!els)
+ return NULL;
+
+ ret = evlist__init_callchain_streams(evlist, els);
+ if (ret) {
+ evlist_streams__delete(els);
+ return NULL;
+ }
+
+ return els;
+}
+
+struct evsel_streams *evsel_streams__entry(struct evlist_streams *els,
+ int evsel_idx)
+{
+ struct evsel_streams *es = els->ev_streams;
+
+ for (int i = 0; i < els->nr_evsel; i++) {
+ if (es[i].evsel_idx == evsel_idx)
+ return &es[i];
+ }
+
+ return NULL;
+}
+
+static struct stream *stream__callchain_match(struct stream *base_stream,
+ struct evsel_streams *es_pair)
+{
+ for (int i = 0; i < es_pair->nr_streams; i++) {
+ struct stream *pair_stream = &es_pair->streams[i];
+
+ if (callchain_cnode_matched(base_stream->cnode,
+ pair_stream->cnode)) {
+ return pair_stream;
+ }
+ }
+
+ return NULL;
+}
+
+static struct stream *stream__match(struct stream *base_stream,
+ struct evsel_streams *es_pair)
+{
+ return stream__callchain_match(base_stream, es_pair);
+}
+
+static void stream__link(struct stream *base_stream, struct stream *pair_stream)
+{
+ base_stream->pair_cnode = pair_stream->cnode;
+ pair_stream->pair_cnode = base_stream->cnode;
+}
+
+void evsel_streams__match(struct evsel_streams *es_base,
+ struct evsel_streams *es_pair)
+{
+ for (int i = 0; i < es_base->nr_streams; i++) {
+ struct stream *base_stream = &es_base->streams[i];
+ struct stream *pair_stream;
+
+ pair_stream = stream__match(base_stream, es_pair);
+ if (pair_stream)
+ stream__link(base_stream, pair_stream);
+ }
+}
+
+static void print_callchain_pair(struct stream *base_stream, int idx,
+ struct evsel_streams *es_base,
+ struct evsel_streams *es_pair)
+{
+ struct callchain_node *base_cnode = base_stream->cnode;
+ struct callchain_node *pair_cnode = base_stream->pair_cnode;
+ struct callchain_list *base_chain, *pair_chain;
+ char buf1[512], buf2[512], cbuf1[256], cbuf2[256];
+ char *s1, *s2;
+ double pct;
+
+ printf("\nhot chain pair %d:\n", idx);
+
+ pct = (double)base_cnode->hit / (double)es_base->streams_hits;
+ scnprintf(buf1, sizeof(buf1), "cycles: %ld, hits: %.2f%%",
+ callchain_avg_cycles(base_cnode), pct * 100.0);
+
+ pct = (double)pair_cnode->hit / (double)es_pair->streams_hits;
+ scnprintf(buf2, sizeof(buf2), "cycles: %ld, hits: %.2f%%",
+ callchain_avg_cycles(pair_cnode), pct * 100.0);
+
+ printf("%35s\t%35s\n", buf1, buf2);
+
+ printf("%35s\t%35s\n",
+ "---------------------------",
+ "--------------------------");
+
+ pair_chain = list_first_entry(&pair_cnode->val,
+ struct callchain_list,
+ list);
+
+ list_for_each_entry(base_chain, &base_cnode->val, list) {
+ if (&pair_chain->list == &pair_cnode->val)
+ return;
+
+ s1 = callchain_list__sym_name(base_chain, cbuf1, sizeof(cbuf1),
+ false);
+ s2 = callchain_list__sym_name(pair_chain, cbuf2, sizeof(cbuf2),
+ false);
+
+ scnprintf(buf1, sizeof(buf1), "%35s\t%35s", s1, s2);
+ printf("%s\n", buf1);
+ pair_chain = list_next_entry(pair_chain, list);
+ }
+}
+
+static void print_stream_callchain(struct stream *stream, int idx,
+ struct evsel_streams *es, bool pair)
+{
+ struct callchain_node *cnode = stream->cnode;
+ struct callchain_list *chain;
+ char buf[512], cbuf[256], *s;
+ double pct;
+
+ printf("\nhot chain %d:\n", idx);
+
+ pct = (double)cnode->hit / (double)es->streams_hits;
+ scnprintf(buf, sizeof(buf), "cycles: %ld, hits: %.2f%%",
+ callchain_avg_cycles(cnode), pct * 100.0);
+
+ if (pair) {
+ printf("%35s\t%35s\n", "", buf);
+ printf("%35s\t%35s\n",
+ "", "--------------------------");
+ } else {
+ printf("%35s\n", buf);
+ printf("%35s\n", "--------------------------");
+ }
+
+ list_for_each_entry(chain, &cnode->val, list) {
+ s = callchain_list__sym_name(chain, cbuf, sizeof(cbuf), false);
+
+ if (pair)
+ scnprintf(buf, sizeof(buf), "%35s\t%35s", "", s);
+ else
+ scnprintf(buf, sizeof(buf), "%35s", s);
+
+ printf("%s\n", buf);
+ }
+}
+
+static void callchain_streams_report(struct evsel_streams *es_base,
+ struct evsel_streams *es_pair)
+{
+ struct stream *base_stream;
+ int i, idx = 0;
+
+ printf("[ Matched hot streams ]\n");
+ for (i = 0; i < es_base->nr_streams; i++) {
+ base_stream = &es_base->streams[i];
+ if (base_stream->pair_cnode) {
+ print_callchain_pair(base_stream, ++idx,
+ es_base, es_pair);
+ }
+ }
+
+ idx = 0;
+ printf("\n[ Hot streams in old perf data only ]\n");
+ for (i = 0; i < es_base->nr_streams; i++) {
+ base_stream = &es_base->streams[i];
+ if (!base_stream->pair_cnode) {
+ print_stream_callchain(base_stream, ++idx,
+ es_base, false);
+ }
+ }
+
+ idx = 0;
+ printf("\n[ Hot streams in new perf data only ]\n");
+ for (i = 0; i < es_pair->nr_streams; i++) {
+ base_stream = &es_pair->streams[i];
+ if (!base_stream->pair_cnode) {
+ print_stream_callchain(base_stream, ++idx,
+ es_pair, true);
+ }
+ }
+}
+
+void evsel_streams__report(struct evsel_streams *es_base,
+ struct evsel_streams *es_pair)
+{
+ return callchain_streams_report(es_base, es_pair);
+}
diff --git a/tools/perf/util/stream.h b/tools/perf/util/stream.h
new file mode 100644
index 000000000000..bee768874fea
--- /dev/null
+++ b/tools/perf/util/stream.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_STREAM_H
+#define __PERF_STREAM_H
+
+#include "callchain.h"
+
+struct stream {
+ struct callchain_node *cnode;
+ struct callchain_node *pair_cnode;
+};
+
+struct evsel_streams {
+ struct stream *streams;
+ int nr_streams_max;
+ int nr_streams;
+ int evsel_idx;
+ u64 streams_hits;
+};
+
+struct evlist_streams {
+ struct evsel_streams *ev_streams;
+ int nr_evsel;
+};
+
+struct evlist;
+
+void evlist_streams__delete(struct evlist_streams *els);
+
+struct evlist_streams *evlist__create_streams(struct evlist *evlist,
+ int nr_streams_max);
+
+struct evsel_streams *evsel_streams__entry(struct evlist_streams *els,
+ int evsel_idx);
+
+void evsel_streams__match(struct evsel_streams *es_base,
+ struct evsel_streams *es_pair);
+
+void evsel_streams__report(struct evsel_streams *es_base,
+ struct evsel_streams *es_pair);
+
+#endif /* __PERF_STREAM_H */
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 8cc4b0059fb0..44dd86a4f25f 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -50,6 +50,10 @@ typedef Elf64_Nhdr GElf_Nhdr;
#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */
#endif
+#ifdef HAVE_LIBBFD_SUPPORT
+#define PACKAGE 'perf'
+#include <bfd.h>
+#else
#ifdef HAVE_CPLUS_DEMANGLE_SUPPORT
extern char *cplus_demangle(const char *, int);
@@ -65,9 +69,7 @@ static inline char *bfd_demangle(void __maybe_unused *v,
{
return NULL;
}
-#else
-#define PACKAGE 'perf'
-#include <bfd.h>
+#endif
#endif
#endif
@@ -530,8 +532,40 @@ out:
return err;
}
-int filename__read_build_id(const char *filename, void *bf, size_t size)
+#ifdef HAVE_LIBBFD_BUILDID_SUPPORT
+
+int filename__read_build_id(const char *filename, struct build_id *bid)
+{
+ size_t size = sizeof(bid->data);
+ int err = -1;
+ bfd *abfd;
+
+ abfd = bfd_openr(filename, NULL);
+ if (!abfd)
+ return -1;
+
+ if (!bfd_check_format(abfd, bfd_object)) {
+ pr_debug2("%s: cannot read %s bfd file.\n", __func__, filename);
+ goto out_close;
+ }
+
+ if (!abfd->build_id || abfd->build_id->size > size)
+ goto out_close;
+
+ memcpy(bid->data, abfd->build_id->data, abfd->build_id->size);
+ memset(bid->data + abfd->build_id->size, 0, size - abfd->build_id->size);
+ err = bid->size = abfd->build_id->size;
+
+out_close:
+ bfd_close(abfd);
+ return err;
+}
+
+#else // HAVE_LIBBFD_BUILDID_SUPPORT
+
+int filename__read_build_id(const char *filename, struct build_id *bid)
{
+ size_t size = sizeof(bid->data);
int fd, err = -1;
Elf *elf;
@@ -548,7 +582,9 @@ int filename__read_build_id(const char *filename, void *bf, size_t size)
goto out_close;
}
- err = elf_read_build_id(elf, bf, size);
+ err = elf_read_build_id(elf, bid->data, size);
+ if (err > 0)
+ bid->size = err;
elf_end(elf);
out_close:
@@ -557,13 +593,13 @@ out:
return err;
}
-int sysfs__read_build_id(const char *filename, void *build_id, size_t size)
+#endif // HAVE_LIBBFD_BUILDID_SUPPORT
+
+int sysfs__read_build_id(const char *filename, struct build_id *bid)
{
+ size_t size = sizeof(bid->data);
int fd, err = -1;
- if (size < BUILD_ID_SIZE)
- goto out;
-
fd = open(filename, O_RDONLY);
if (fd < 0)
goto out;
@@ -584,8 +620,9 @@ int sysfs__read_build_id(const char *filename, void *build_id, size_t size)
break;
if (memcmp(bf, "GNU", sizeof("GNU")) == 0) {
size_t sz = min(descsz, size);
- if (read(fd, build_id, sz) == (ssize_t)sz) {
- memset(build_id + sz, 0, size - sz);
+ if (read(fd, bid->data, sz) == (ssize_t)sz) {
+ memset(bid->data + sz, 0, size - sz);
+ bid->size = sz;
err = 0;
break;
}
@@ -608,6 +645,44 @@ out:
return err;
}
+#ifdef HAVE_LIBBFD_SUPPORT
+
+int filename__read_debuglink(const char *filename, char *debuglink,
+ size_t size)
+{
+ int err = -1;
+ asection *section;
+ bfd *abfd;
+
+ abfd = bfd_openr(filename, NULL);
+ if (!abfd)
+ return -1;
+
+ if (!bfd_check_format(abfd, bfd_object)) {
+ pr_debug2("%s: cannot read %s bfd file.\n", __func__, filename);
+ goto out_close;
+ }
+
+ section = bfd_get_section_by_name(abfd, ".gnu_debuglink");
+ if (!section)
+ goto out_close;
+
+ if (section->size > size)
+ goto out_close;
+
+ if (!bfd_get_section_contents(abfd, section, debuglink, 0,
+ section->size))
+ goto out_close;
+
+ err = 0;
+
+out_close:
+ bfd_close(abfd);
+ return err;
+}
+
+#else
+
int filename__read_debuglink(const char *filename, char *debuglink,
size_t size)
{
@@ -660,6 +735,8 @@ out:
return err;
}
+#endif
+
static int dso__swap_init(struct dso *dso, unsigned char eidata)
{
static unsigned int const endian = 1;
@@ -757,13 +834,17 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
/* Always reject images with a mismatched build-id: */
if (dso->has_build_id && !symbol_conf.ignore_vmlinux_buildid) {
u8 build_id[BUILD_ID_SIZE];
+ struct build_id bid;
+ int size;
- if (elf_read_build_id(elf, build_id, BUILD_ID_SIZE) < 0) {
+ size = elf_read_build_id(elf, build_id, BUILD_ID_SIZE);
+ if (size <= 0) {
dso->load_errno = DSO_LOAD_ERRNO__CANNOT_READ_BUILDID;
goto out_elf_end;
}
- if (!dso__build_id_equal(dso, build_id)) {
+ build_id__init(&bid, build_id, size);
+ if (!dso__build_id_equal(dso, &bid)) {
pr_debug("%s: build id mismatch for %s.\n", __func__, name);
dso->load_errno = DSO_LOAD_ERRNO__MISMATCHING_BUILDID;
goto out_elf_end;
diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c
index d6e99af263ec..f9eb0bee7f15 100644
--- a/tools/perf/util/symbol-minimal.c
+++ b/tools/perf/util/symbol-minimal.c
@@ -31,9 +31,10 @@ static bool check_need_swap(int file_endian)
#define NT_GNU_BUILD_ID 3
-static int read_build_id(void *note_data, size_t note_len, void *bf,
- size_t size, bool need_swap)
+static int read_build_id(void *note_data, size_t note_len, struct build_id *bid,
+ bool need_swap)
{
+ size_t size = sizeof(bid->data);
struct {
u32 n_namesz;
u32 n_descsz;
@@ -63,8 +64,9 @@ static int read_build_id(void *note_data, size_t note_len, void *bf,
nhdr->n_namesz == sizeof("GNU")) {
if (memcmp(name, "GNU", sizeof("GNU")) == 0) {
size_t sz = min(size, descsz);
- memcpy(bf, ptr, sz);
- memset(bf + sz, 0, size - sz);
+ memcpy(bid->data, ptr, sz);
+ memset(bid->data + sz, 0, size - sz);
+ bid->size = sz;
return 0;
}
}
@@ -84,7 +86,7 @@ int filename__read_debuglink(const char *filename __maybe_unused,
/*
* Just try PT_NOTE header otherwise fails
*/
-int filename__read_build_id(const char *filename, void *bf, size_t size)
+int filename__read_build_id(const char *filename, struct build_id *bid)
{
FILE *fp;
int ret = -1;
@@ -156,9 +158,9 @@ int filename__read_build_id(const char *filename, void *bf, size_t size)
if (fread(buf, buf_size, 1, fp) != 1)
goto out_free;
- ret = read_build_id(buf, buf_size, bf, size, need_swap);
+ ret = read_build_id(buf, buf_size, bid, need_swap);
if (ret == 0)
- ret = size;
+ ret = bid->size;
break;
}
} else {
@@ -207,9 +209,9 @@ int filename__read_build_id(const char *filename, void *bf, size_t size)
if (fread(buf, buf_size, 1, fp) != 1)
goto out_free;
- ret = read_build_id(buf, buf_size, bf, size, need_swap);
+ ret = read_build_id(buf, buf_size, bid, need_swap);
if (ret == 0)
- ret = size;
+ ret = bid->size;
break;
}
}
@@ -220,7 +222,7 @@ out:
return ret;
}
-int sysfs__read_build_id(const char *filename, void *build_id, size_t size)
+int sysfs__read_build_id(const char *filename, struct build_id *bid)
{
int fd;
int ret = -1;
@@ -243,7 +245,7 @@ int sysfs__read_build_id(const char *filename, void *build_id, size_t size)
if (read(fd, buf, buf_size) != (ssize_t) buf_size)
goto out_free;
- ret = read_build_id(buf, buf_size, build_id, size, false);
+ ret = read_build_id(buf, buf_size, bid, false);
out_free:
free(buf);
out:
@@ -339,16 +341,15 @@ int dso__load_sym(struct dso *dso, struct map *map __maybe_unused,
struct symsrc *runtime_ss __maybe_unused,
int kmodule __maybe_unused)
{
- unsigned char build_id[BUILD_ID_SIZE];
+ struct build_id bid;
int ret;
ret = fd__is_64_bit(ss->fd);
if (ret >= 0)
dso->is_64_bit = ret;
- if (filename__read_build_id(ss->name, build_id, BUILD_ID_SIZE) > 0) {
- dso__set_build_id(dso, build_id);
- }
+ if (filename__read_build_id(ss->name, &bid) > 0)
+ dso__set_build_id(dso, &bid);
return 0;
}
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 5151a8c0b791..6138866665df 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1526,6 +1526,138 @@ out_failure:
return -1;
}
+#ifdef HAVE_LIBBFD_SUPPORT
+#define PACKAGE 'perf'
+#include <bfd.h>
+
+static int bfd_symbols__cmpvalue(const void *a, const void *b)
+{
+ const asymbol *as = *(const asymbol **)a, *bs = *(const asymbol **)b;
+
+ if (bfd_asymbol_value(as) != bfd_asymbol_value(bs))
+ return bfd_asymbol_value(as) - bfd_asymbol_value(bs);
+
+ return bfd_asymbol_name(as)[0] - bfd_asymbol_name(bs)[0];
+}
+
+static int bfd2elf_binding(asymbol *symbol)
+{
+ if (symbol->flags & BSF_WEAK)
+ return STB_WEAK;
+ if (symbol->flags & BSF_GLOBAL)
+ return STB_GLOBAL;
+ if (symbol->flags & BSF_LOCAL)
+ return STB_LOCAL;
+ return -1;
+}
+
+int dso__load_bfd_symbols(struct dso *dso, const char *debugfile)
+{
+ int err = -1;
+ long symbols_size, symbols_count;
+ asection *section;
+ asymbol **symbols, *sym;
+ struct symbol *symbol;
+ bfd *abfd;
+ u_int i;
+ u64 start, len;
+
+ abfd = bfd_openr(dso->long_name, NULL);
+ if (!abfd)
+ return -1;
+
+ if (!bfd_check_format(abfd, bfd_object)) {
+ pr_debug2("%s: cannot read %s bfd file.\n", __func__,
+ dso->long_name);
+ goto out_close;
+ }
+
+ if (bfd_get_flavour(abfd) == bfd_target_elf_flavour)
+ goto out_close;
+
+ section = bfd_get_section_by_name(abfd, ".text");
+ if (section)
+ dso->text_offset = section->vma - section->filepos;
+
+ bfd_close(abfd);
+
+ abfd = bfd_openr(debugfile, NULL);
+ if (!abfd)
+ return -1;
+
+ if (!bfd_check_format(abfd, bfd_object)) {
+ pr_debug2("%s: cannot read %s bfd file.\n", __func__,
+ debugfile);
+ goto out_close;
+ }
+
+ if (bfd_get_flavour(abfd) == bfd_target_elf_flavour)
+ goto out_close;
+
+ symbols_size = bfd_get_symtab_upper_bound(abfd);
+ if (symbols_size == 0) {
+ bfd_close(abfd);
+ return 0;
+ }
+
+ if (symbols_size < 0)
+ goto out_close;
+
+ symbols = malloc(symbols_size);
+ if (!symbols)
+ goto out_close;
+
+ symbols_count = bfd_canonicalize_symtab(abfd, symbols);
+ if (symbols_count < 0)
+ goto out_free;
+
+ qsort(symbols, symbols_count, sizeof(asymbol *), bfd_symbols__cmpvalue);
+
+#ifdef bfd_get_section
+#define bfd_asymbol_section bfd_get_section
+#endif
+ for (i = 0; i < symbols_count; ++i) {
+ sym = symbols[i];
+ section = bfd_asymbol_section(sym);
+ if (bfd2elf_binding(sym) < 0)
+ continue;
+
+ while (i + 1 < symbols_count &&
+ bfd_asymbol_section(symbols[i + 1]) == section &&
+ bfd2elf_binding(symbols[i + 1]) < 0)
+ i++;
+
+ if (i + 1 < symbols_count &&
+ bfd_asymbol_section(symbols[i + 1]) == section)
+ len = symbols[i + 1]->value - sym->value;
+ else
+ len = section->size - sym->value;
+
+ start = bfd_asymbol_value(sym) - dso->text_offset;
+ symbol = symbol__new(start, len, bfd2elf_binding(sym), STT_FUNC,
+ bfd_asymbol_name(sym));
+ if (!symbol)
+ goto out_free;
+
+ symbols__insert(&dso->symbols, symbol);
+ }
+#ifdef bfd_get_section
+#undef bfd_asymbol_section
+#endif
+
+ symbols__fixup_end(&dso->symbols);
+ symbols__fixup_duplicate(&dso->symbols);
+ dso->adjust_symbols = 1;
+
+ err = 0;
+out_free:
+ free(symbols);
+out_close:
+ bfd_close(abfd);
+ return err;
+}
+#endif
+
static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod,
enum dso_binary_type type)
{
@@ -1623,7 +1755,7 @@ int dso__load(struct dso *dso, struct map *map)
struct symsrc *syms_ss = NULL, *runtime_ss = NULL;
bool kmod;
bool perfmap;
- unsigned char build_id[BUILD_ID_SIZE];
+ struct build_id bid;
struct nscookie nsc;
char newmapname[PATH_MAX];
const char *map_path = dso->long_name;
@@ -1685,8 +1817,8 @@ int dso__load(struct dso *dso, struct map *map)
if (!dso->has_build_id &&
is_regular_file(dso->long_name)) {
__symbol__join_symfs(name, PATH_MAX, dso->long_name);
- if (filename__read_build_id(name, build_id, BUILD_ID_SIZE) > 0)
- dso__set_build_id(dso, build_id);
+ if (filename__read_build_id(name, &bid) > 0)
+ dso__set_build_id(dso, &bid);
}
/*
@@ -1699,6 +1831,7 @@ int dso__load(struct dso *dso, struct map *map)
bool next_slot = false;
bool is_reg;
bool nsexit;
+ int bfdrc = -1;
int sirc = -1;
enum dso_binary_type symtab_type = binary_type_symtab[i];
@@ -1717,12 +1850,19 @@ int dso__load(struct dso *dso, struct map *map)
nsinfo__mountns_exit(&nsc);
is_reg = is_regular_file(name);
+#ifdef HAVE_LIBBFD_SUPPORT
if (is_reg)
+ bfdrc = dso__load_bfd_symbols(dso, name);
+#endif
+ if (is_reg && bfdrc < 0)
sirc = symsrc__init(ss, dso, name, symtab_type);
if (nsexit)
nsinfo__mountns_enter(dso->nsinfo, &nsc);
+ if (bfdrc == 0)
+ break;
+
if (!is_reg || sirc < 0)
continue;
@@ -1982,7 +2122,7 @@ static bool filename__readable(const char *file)
static char *dso__find_kallsyms(struct dso *dso, struct map *map)
{
- u8 host_build_id[BUILD_ID_SIZE];
+ struct build_id bid;
char sbuild_id[SBUILD_ID_SIZE];
bool is_host = false;
char path[PATH_MAX];
@@ -1995,9 +2135,8 @@ static char *dso__find_kallsyms(struct dso *dso, struct map *map)
goto proc_kallsyms;
}
- if (sysfs__read_build_id("/sys/kernel/notes", host_build_id,
- sizeof(host_build_id)) == 0)
- is_host = dso__build_id_equal(dso, host_build_id);
+ if (sysfs__read_build_id("/sys/kernel/notes", &bid) == 0)
+ is_host = dso__build_id_equal(dso, &bid);
/* Try a fast path for /proc/kallsyms if possible */
if (is_host) {
@@ -2013,7 +2152,7 @@ static char *dso__find_kallsyms(struct dso *dso, struct map *map)
goto proc_kallsyms;
}
- build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
+ build_id__sprintf(&dso->bid, sbuild_id);
/* Find kallsyms in build-id cache with kcore */
scnprintf(path, sizeof(path), "%s/%s/%s",
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 03e264a27cd3..f4801c488def 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -23,6 +23,7 @@ struct dso;
struct map;
struct maps;
struct option;
+struct build_id;
/*
* libelf 0.8.x and earlier do not support ELF_C_READ_MMAP;
@@ -142,8 +143,8 @@ struct symbol *dso__next_symbol(struct symbol *sym);
enum dso_type dso__type_fd(int fd);
-int filename__read_build_id(const char *filename, void *bf, size_t size);
-int sysfs__read_build_id(const char *filename, void *bf, size_t size);
+int filename__read_build_id(const char *filename, struct build_id *id);
+int sysfs__read_build_id(const char *filename, struct build_id *bid);
int modules__parse(const char *filename, void *arg,
int (*process_module)(void *arg, const char *name,
u64 start, u64 size));
@@ -175,6 +176,10 @@ int symbol__config_symfs(const struct option *opt __maybe_unused,
struct symsrc;
+#ifdef HAVE_LIBBFD_SUPPORT
+int dso__load_bfd_symbols(struct dso *dso, const char *debugfile);
+#endif
+
int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
struct symsrc *runtime_ss, int kmodule);
int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss);
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index 89b390623b63..8a23391558cf 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -1961,7 +1961,7 @@ int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16
len = pos->long_name_len + 1;
len = PERF_ALIGN(len, NAME_ALIGN);
- memcpy(&ev.build_id.build_id, pos->build_id, sizeof(pos->build_id));
+ memcpy(&ev.build_id.build_id, pos->bid.data, sizeof(pos->bid.data));
ev.build_id.header.type = PERF_RECORD_HEADER_BUILD_ID;
ev.build_id.header.misc = misc;
ev.build_id.pid = machine->pid;
@@ -2006,14 +2006,6 @@ int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct p
return 0;
}
-int __weak perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
- struct perf_tool *tool __maybe_unused,
- perf_event__handler_t process __maybe_unused,
- struct machine *machine __maybe_unused)
-{
- return 0;
-}
-
extern const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE];
int perf_event__synthesize_features(struct perf_tool *tool, struct perf_session *session,
diff --git a/tools/perf/util/topdown.c b/tools/perf/util/topdown.c
new file mode 100644
index 000000000000..1081b20f9891
--- /dev/null
+++ b/tools/perf/util/topdown.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include "pmu.h"
+#include "topdown.h"
+
+int topdown_filter_events(const char **attr, char **str, bool use_group)
+{
+ int off = 0;
+ int i;
+ int len = 0;
+ char *s;
+
+ for (i = 0; attr[i]; i++) {
+ if (pmu_have_event("cpu", attr[i])) {
+ len += strlen(attr[i]) + 1;
+ attr[i - off] = attr[i];
+ } else
+ off++;
+ }
+ attr[i - off] = NULL;
+
+ *str = malloc(len + 1 + 2);
+ if (!*str)
+ return -1;
+ s = *str;
+ if (i - off == 0) {
+ *s = 0;
+ return 0;
+ }
+ if (use_group)
+ *s++ = '{';
+ for (i = 0; attr[i]; i++) {
+ strcpy(s, attr[i]);
+ s += strlen(s);
+ *s++ = ',';
+ }
+ if (use_group) {
+ s[-1] = '}';
+ *s = 0;
+ } else
+ s[-1] = 0;
+ return 0;
+}
+
+__weak bool arch_topdown_check_group(bool *warn)
+{
+ *warn = false;
+ return false;
+}
+
+__weak void arch_topdown_group_warn(void)
+{
+}
+
+__weak bool arch_topdown_sample_read(struct evsel *leader __maybe_unused)
+{
+ return false;
+}
diff --git a/tools/perf/util/topdown.h b/tools/perf/util/topdown.h
new file mode 100644
index 000000000000..2f0d0b887639
--- /dev/null
+++ b/tools/perf/util/topdown.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef TOPDOWN_H
+#define TOPDOWN_H 1
+#include "evsel.h"
+
+bool arch_topdown_check_group(bool *warn);
+void arch_topdown_group_warn(void);
+bool arch_topdown_sample_read(struct evsel *leader);
+
+int topdown_filter_events(const char **attr, char **str, bool use_group);
+
+#endif
diff --git a/tools/perf/util/tsc.c b/tools/perf/util/tsc.c
index bfa782421cbd..62b4c75c966c 100644
--- a/tools/perf/util/tsc.c
+++ b/tools/perf/util/tsc.c
@@ -1,7 +1,16 @@
// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+
#include <linux/compiler.h>
+#include <linux/perf_event.h>
+#include <linux/stddef.h>
#include <linux/types.h>
+#include <asm/barrier.h>
+
+#include "event.h"
+#include "synthetic-events.h"
+#include "debug.h"
#include "tsc.h"
u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc)
@@ -19,12 +28,84 @@ u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc)
{
u64 quot, rem;
+ if (tc->cap_user_time_short)
+ cyc = tc->time_cycles +
+ ((cyc - tc->time_cycles) & tc->time_mask);
+
quot = cyc >> tc->time_shift;
rem = cyc & (((u64)1 << tc->time_shift) - 1);
return tc->time_zero + quot * tc->time_mult +
((rem * tc->time_mult) >> tc->time_shift);
}
+int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
+ struct perf_tsc_conversion *tc)
+{
+ u32 seq;
+ int i = 0;
+
+ while (1) {
+ seq = pc->lock;
+ rmb();
+ tc->time_mult = pc->time_mult;
+ tc->time_shift = pc->time_shift;
+ tc->time_zero = pc->time_zero;
+ tc->time_cycles = pc->time_cycles;
+ tc->time_mask = pc->time_mask;
+ tc->cap_user_time_zero = pc->cap_user_time_zero;
+ tc->cap_user_time_short = pc->cap_user_time_short;
+ rmb();
+ if (pc->lock == seq && !(seq & 1))
+ break;
+ if (++i > 10000) {
+ pr_debug("failed to get perf_event_mmap_page lock\n");
+ return -EINVAL;
+ }
+ }
+
+ if (!tc->cap_user_time_zero)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc,
+ struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ union perf_event event = {
+ .time_conv = {
+ .header = {
+ .type = PERF_RECORD_TIME_CONV,
+ .size = sizeof(struct perf_record_time_conv),
+ },
+ },
+ };
+ struct perf_tsc_conversion tc;
+ int err;
+
+ if (!pc)
+ return 0;
+ err = perf_read_tsc_conversion(pc, &tc);
+ if (err == -EOPNOTSUPP)
+ return 0;
+ if (err)
+ return err;
+
+ pr_debug2("Synthesizing TSC conversion information\n");
+
+ event.time_conv.time_mult = tc.time_mult;
+ event.time_conv.time_shift = tc.time_shift;
+ event.time_conv.time_zero = tc.time_zero;
+ event.time_conv.time_cycles = tc.time_cycles;
+ event.time_conv.time_mask = tc.time_mask;
+ event.time_conv.cap_user_time_zero = tc.cap_user_time_zero;
+ event.time_conv.cap_user_time_short = tc.cap_user_time_short;
+
+ return process(tool, &event, NULL, machine);
+}
+
u64 __weak rdtsc(void)
{
return 0;
diff --git a/tools/perf/util/tsc.h b/tools/perf/util/tsc.h
index 3c5a632ee57c..72a15419f3b3 100644
--- a/tools/perf/util/tsc.h
+++ b/tools/perf/util/tsc.h
@@ -8,6 +8,11 @@ struct perf_tsc_conversion {
u16 time_shift;
u32 time_mult;
u64 time_zero;
+ u64 time_cycles;
+ u64 time_mask;
+
+ bool cap_user_time_zero;
+ bool cap_user_time_short;
};
struct perf_event_mmap_page;
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index f486fdd3a538..ad737052e597 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -62,4 +62,10 @@ char *perf_exe(char *buf, int len);
#endif
#endif
+extern bool test_attr__enabled;
+void test_attr__ready(void);
+void test_attr__init(void);
+struct perf_event_attr;
+void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
+ int fd, int group_fd, unsigned long flags);
#endif /* GIT_COMPAT_UTIL_H */
diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py
index f13e0c0d6663..8019e3dd4c32 100644
--- a/tools/testing/kunit/kunit_parser.py
+++ b/tools/testing/kunit/kunit_parser.py
@@ -45,10 +45,11 @@ class TestStatus(Enum):
FAILURE = auto()
TEST_CRASHED = auto()
NO_TESTS = auto()
+ FAILURE_TO_PARSE_TESTS = auto()
kunit_start_re = re.compile(r'TAP version [0-9]+$')
kunit_end_re = re.compile('(List of all partitions:|'
- 'Kernel panic - not syncing: VFS:|reboot: System halted)')
+ 'Kernel panic - not syncing: VFS:)')
def isolate_kunit_output(kernel_output):
started = False
@@ -109,7 +110,7 @@ OkNotOkResult = namedtuple('OkNotOkResult', ['is_ok','description', 'text'])
OK_NOT_OK_SUBTEST = re.compile(r'^[\s]+(ok|not ok) [0-9]+ - (.*)$')
-OK_NOT_OK_MODULE = re.compile(r'^(ok|not ok) [0-9]+ - (.*)$')
+OK_NOT_OK_MODULE = re.compile(r'^(ok|not ok) ([0-9]+) - (.*)$')
def parse_ok_not_ok_test_case(lines: List[str], test_case: TestCase) -> bool:
save_non_diagnositic(lines, test_case)
@@ -197,7 +198,9 @@ def max_status(left: TestStatus, right: TestStatus) -> TestStatus:
else:
return TestStatus.SUCCESS
-def parse_ok_not_ok_test_suite(lines: List[str], test_suite: TestSuite) -> bool:
+def parse_ok_not_ok_test_suite(lines: List[str],
+ test_suite: TestSuite,
+ expected_suite_index: int) -> bool:
consume_non_diagnositic(lines)
if not lines:
test_suite.status = TestStatus.TEST_CRASHED
@@ -210,6 +213,12 @@ def parse_ok_not_ok_test_suite(lines: List[str], test_suite: TestSuite) -> bool:
test_suite.status = TestStatus.SUCCESS
else:
test_suite.status = TestStatus.FAILURE
+ suite_index = int(match.group(2))
+ if suite_index != expected_suite_index:
+ print_with_timestamp(
+ red('[ERROR] ') + 'expected_suite_index ' +
+ str(expected_suite_index) + ', but got ' +
+ str(suite_index))
return True
else:
return False
@@ -222,7 +231,7 @@ def bubble_up_test_case_errors(test_suite: TestSuite) -> TestStatus:
max_test_case_status = bubble_up_errors(lambda x: x.status, test_suite.cases)
return max_status(max_test_case_status, test_suite.status)
-def parse_test_suite(lines: List[str]) -> TestSuite:
+def parse_test_suite(lines: List[str], expected_suite_index: int) -> TestSuite:
if not lines:
return None
consume_non_diagnositic(lines)
@@ -241,7 +250,7 @@ def parse_test_suite(lines: List[str]) -> TestSuite:
break
test_suite.cases.append(test_case)
expected_test_case_num -= 1
- if parse_ok_not_ok_test_suite(lines, test_suite):
+ if parse_ok_not_ok_test_suite(lines, test_suite, expected_suite_index):
test_suite.status = bubble_up_test_case_errors(test_suite)
return test_suite
elif not lines:
@@ -261,6 +270,17 @@ def parse_tap_header(lines: List[str]) -> bool:
else:
return False
+TEST_PLAN = re.compile(r'[0-9]+\.\.([0-9]+)')
+
+def parse_test_plan(lines: List[str]) -> int:
+ consume_non_diagnositic(lines)
+ match = TEST_PLAN.match(lines[0])
+ if match:
+ lines.pop(0)
+ return int(match.group(1))
+ else:
+ return None
+
def bubble_up_suite_errors(test_suite_list: List[TestSuite]) -> TestStatus:
return bubble_up_errors(lambda x: x.status, test_suite_list)
@@ -268,20 +288,33 @@ def parse_test_result(lines: List[str]) -> TestResult:
consume_non_diagnositic(lines)
if not lines or not parse_tap_header(lines):
return TestResult(TestStatus.NO_TESTS, [], lines)
+ expected_test_suite_num = parse_test_plan(lines)
+ if not expected_test_suite_num:
+ return TestResult(TestStatus.FAILURE_TO_PARSE_TESTS, [], lines)
test_suites = []
- test_suite = parse_test_suite(lines)
- while test_suite:
- test_suites.append(test_suite)
- test_suite = parse_test_suite(lines)
- return TestResult(bubble_up_suite_errors(test_suites), test_suites, lines)
+ for i in range(1, expected_test_suite_num + 1):
+ test_suite = parse_test_suite(lines, i)
+ if test_suite:
+ test_suites.append(test_suite)
+ else:
+ print_with_timestamp(
+ red('[ERROR] ') + ' expected ' +
+ str(expected_test_suite_num) +
+ ' test suites, but got ' + str(i - 2))
+ break
+ test_suite = parse_test_suite(lines, -1)
+ if test_suite:
+ print_with_timestamp(red('[ERROR] ') +
+ 'got unexpected test suite: ' + test_suite.name)
+ if test_suites:
+ return TestResult(bubble_up_suite_errors(test_suites), test_suites, lines)
+ else:
+ return TestResult(TestStatus.NO_TESTS, [], lines)
-def parse_run_tests(kernel_output) -> TestResult:
+def print_and_count_results(test_result: TestResult) -> None:
total_tests = 0
failed_tests = 0
crashed_tests = 0
- test_result = parse_test_result(list(isolate_kunit_output(kernel_output)))
- if test_result.status == TestStatus.NO_TESTS:
- print_with_timestamp(red('[ERROR] ') + 'no kunit output detected')
for test_suite in test_result.suites:
if test_suite.status == TestStatus.SUCCESS:
print_suite_divider(green('[PASSED] ') + test_suite.name)
@@ -303,6 +336,21 @@ def parse_run_tests(kernel_output) -> TestResult:
print_with_timestamp(red('[FAILED] ') + test_case.name)
print_log(map(yellow, test_case.log))
print_with_timestamp('')
+ return total_tests, failed_tests, crashed_tests
+
+def parse_run_tests(kernel_output) -> TestResult:
+ total_tests = 0
+ failed_tests = 0
+ crashed_tests = 0
+ test_result = parse_test_result(list(isolate_kunit_output(kernel_output)))
+ if test_result.status == TestStatus.NO_TESTS:
+ print(red('[ERROR] ') + yellow('no tests run!'))
+ elif test_result.status == TestStatus.FAILURE_TO_PARSE_TESTS:
+ print(red('[ERROR] ') + yellow('could not parse test results!'))
+ else:
+ (total_tests,
+ failed_tests,
+ crashed_tests) = print_and_count_results(test_result)
print_with_timestamp(DIVIDER)
fmt = green if test_result.status == TestStatus.SUCCESS else red
print_with_timestamp(
diff --git a/tools/testing/kunit/test_data/test_is_test_passed-all_passed.log b/tools/testing/kunit/test_data/test_is_test_passed-all_passed.log
index 62ebc0288355..bc0dc8fe35b7 100644
--- a/tools/testing/kunit/test_data/test_is_test_passed-all_passed.log
+++ b/tools/testing/kunit/test_data/test_is_test_passed-all_passed.log
@@ -1,4 +1,5 @@
TAP version 14
+1..2
# Subtest: sysctl_test
1..8
# sysctl_test_dointvec_null_tbl_data: sysctl_test_dointvec_null_tbl_data passed
diff --git a/tools/testing/kunit/test_data/test_is_test_passed-crash.log b/tools/testing/kunit/test_data/test_is_test_passed-crash.log
index 0b249870c8be..4d97f6708c4a 100644
--- a/tools/testing/kunit/test_data/test_is_test_passed-crash.log
+++ b/tools/testing/kunit/test_data/test_is_test_passed-crash.log
@@ -1,6 +1,7 @@
printk: console [tty0] enabled
printk: console [mc-1] enabled
TAP version 14
+1..2
# Subtest: sysctl_test
1..8
# sysctl_test_dointvec_null_tbl_data: sysctl_test_dointvec_null_tbl_data passed
diff --git a/tools/testing/kunit/test_data/test_is_test_passed-failure.log b/tools/testing/kunit/test_data/test_is_test_passed-failure.log
index 9e89d32d5667..7a416497e3be 100644
--- a/tools/testing/kunit/test_data/test_is_test_passed-failure.log
+++ b/tools/testing/kunit/test_data/test_is_test_passed-failure.log
@@ -1,4 +1,5 @@
TAP version 14
+1..2
# Subtest: sysctl_test
1..8
# sysctl_test_dointvec_null_tbl_data: sysctl_test_dointvec_null_tbl_data passed
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index 8995092d541e..3b796dd5e577 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -523,8 +523,27 @@ static void *ida_random_fn(void *arg)
return NULL;
}
+static void *ida_leak_fn(void *arg)
+{
+ struct ida *ida = arg;
+ time_t s = time(NULL);
+ int i, ret;
+
+ rcu_register_thread();
+
+ do for (i = 0; i < 1000; i++) {
+ ret = ida_alloc_range(ida, 128, 128, GFP_KERNEL);
+ if (ret >= 0)
+ ida_free(ida, 128);
+ } while (time(NULL) < s + 2);
+
+ rcu_unregister_thread();
+ return NULL;
+}
+
void ida_thread_tests(void)
{
+ DEFINE_IDA(ida);
pthread_t threads[20];
int i;
@@ -536,6 +555,16 @@ void ida_thread_tests(void)
while (i--)
pthread_join(threads[i], NULL);
+
+ for (i = 0; i < ARRAY_SIZE(threads); i++)
+ if (pthread_create(&threads[i], NULL, ida_leak_fn, &ida)) {
+ perror("creating ida thread");
+ exit(1);
+ }
+
+ while (i--)
+ pthread_join(threads[i], NULL);
+ assert(ida_is_empty(&ida));
}
void ida_tests(void)
diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h
index 4568248222ae..39867fd80c8f 100644
--- a/tools/testing/radix-tree/linux/kernel.h
+++ b/tools/testing/radix-tree/linux/kernel.h
@@ -22,4 +22,5 @@
#define __releases(x)
#define __must_hold(x)
+#define EXPORT_PER_CPU_SYMBOL_GPL(x)
#endif /* _KERNEL_H */
diff --git a/tools/testing/radix-tree/linux/local_lock.h b/tools/testing/radix-tree/linux/local_lock.h
new file mode 100644
index 000000000000..b3cf8b233ca4
--- /dev/null
+++ b/tools/testing/radix-tree/linux/local_lock.h
@@ -0,0 +1,8 @@
+#ifndef _LINUX_LOCAL_LOCK
+#define _LINUX_LOCAL_LOCK
+typedef struct { } local_lock_t;
+
+static inline void local_lock(local_lock_t *lock) { }
+static inline void local_unlock(local_lock_t *lock) { }
+#define INIT_LOCAL_LOCK(x) { }
+#endif
diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h
index 34dab4d18744..7ef7067e942c 100644
--- a/tools/testing/radix-tree/test.h
+++ b/tools/testing/radix-tree/test.h
@@ -56,8 +56,4 @@ int root_tag_get(struct radix_tree_root *root, unsigned int tag);
unsigned long node_maxindex(struct radix_tree_node *);
unsigned long shift_maxindex(unsigned int shift);
int radix_tree_cpu_dead(unsigned int cpu);
-struct radix_tree_preload {
- unsigned nr;
- struct radix_tree_node *nodes;
-};
extern struct radix_tree_preload radix_tree_preloads;
diff --git a/tools/testing/scatterlist/Makefile b/tools/testing/scatterlist/Makefile
index cbb003d9305e..c65233876622 100644
--- a/tools/testing/scatterlist/Makefile
+++ b/tools/testing/scatterlist/Makefile
@@ -14,7 +14,7 @@ targets: include $(TARGETS)
main: $(OFILES)
clean:
- $(RM) $(TARGETS) $(OFILES) scatterlist.c linux/scatterlist.h linux/highmem.h linux/kmemleak.h asm/io.h
+ $(RM) $(TARGETS) $(OFILES) scatterlist.c linux/scatterlist.h linux/highmem.h linux/kmemleak.h linux/slab.h asm/io.h
@rmdir asm
scatterlist.c: ../../../lib/scatterlist.c
@@ -28,4 +28,5 @@ include: ../../../include/linux/scatterlist.h
@touch asm/io.h
@touch linux/highmem.h
@touch linux/kmemleak.h
+ @touch linux/slab.h
@cp $< linux/scatterlist.h
diff --git a/tools/testing/scatterlist/linux/mm.h b/tools/testing/scatterlist/linux/mm.h
index 6f9ac14aa800..6ae907f375d2 100644
--- a/tools/testing/scatterlist/linux/mm.h
+++ b/tools/testing/scatterlist/linux/mm.h
@@ -114,6 +114,12 @@ static inline void *kmalloc(unsigned int size, unsigned int flags)
return malloc(size);
}
+static inline void *
+kmalloc_array(unsigned int n, unsigned int size, unsigned int flags)
+{
+ return malloc(n * size);
+}
+
#define kfree(x) free(x)
#define kmemleak_alloc(a, b, c, d)
@@ -122,4 +128,33 @@ static inline void *kmalloc(unsigned int size, unsigned int flags)
#define PageSlab(p) (0)
#define flush_kernel_dcache_page(p)
+#define MAX_ERRNO 4095
+
+#define IS_ERR_VALUE(x) unlikely((unsigned long)(void *)(x) >= (unsigned long)-MAX_ERRNO)
+
+static inline void * __must_check ERR_PTR(long error)
+{
+ return (void *) error;
+}
+
+static inline long __must_check PTR_ERR(__force const void *ptr)
+{
+ return (long) ptr;
+}
+
+static inline bool __must_check IS_ERR(__force const void *ptr)
+{
+ return IS_ERR_VALUE((unsigned long)ptr);
+}
+
+static inline int __must_check PTR_ERR_OR_ZERO(__force const void *ptr)
+{
+ if (IS_ERR(ptr))
+ return PTR_ERR(ptr);
+ else
+ return 0;
+}
+
+#define IS_ENABLED(x) (0)
+
#endif
diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c
index 0a1464181226..b2c7e9f7b8d3 100644
--- a/tools/testing/scatterlist/main.c
+++ b/tools/testing/scatterlist/main.c
@@ -5,6 +5,15 @@
#define MAX_PAGES (64)
+struct test {
+ int alloc_ret;
+ unsigned num_pages;
+ unsigned *pfn;
+ unsigned size;
+ unsigned int max_seg;
+ unsigned int expected_segments;
+};
+
static void set_pages(struct page **pages, const unsigned *array, unsigned num)
{
unsigned int i;
@@ -17,17 +26,32 @@ static void set_pages(struct page **pages, const unsigned *array, unsigned num)
#define pfn(...) (unsigned []){ __VA_ARGS__ }
+static void fail(struct test *test, struct sg_table *st, const char *cond)
+{
+ unsigned int i;
+
+ fprintf(stderr, "Failed on '%s'!\n\n", cond);
+
+ printf("size = %u, max segment = %u, expected nents = %u\nst->nents = %u, st->orig_nents= %u\n",
+ test->size, test->max_seg, test->expected_segments, st->nents,
+ st->orig_nents);
+
+ printf("%u input PFNs:", test->num_pages);
+ for (i = 0; i < test->num_pages; i++)
+ printf(" %x", test->pfn[i]);
+ printf("\n");
+
+ exit(1);
+}
+
+#define VALIDATE(cond, st, test) \
+ if (!(cond)) \
+ fail((test), (st), #cond);
+
int main(void)
{
const unsigned int sgmax = SCATTERLIST_MAX_SEGMENT;
- struct test {
- int alloc_ret;
- unsigned num_pages;
- unsigned *pfn;
- unsigned size;
- unsigned int max_seg;
- unsigned int expected_segments;
- } *test, tests[] = {
+ struct test *test, tests[] = {
{ -EINVAL, 1, pfn(0), PAGE_SIZE, PAGE_SIZE + 1, 1 },
{ -EINVAL, 1, pfn(0), PAGE_SIZE, 0, 1 },
{ -EINVAL, 1, pfn(0), PAGE_SIZE, sgmax + 1, 1 },
@@ -55,20 +79,19 @@ int main(void)
for (i = 0, test = tests; test->expected_segments; test++, i++) {
struct page *pages[MAX_PAGES];
struct sg_table st;
- int ret;
+ struct scatterlist *sg;
set_pages(pages, test->pfn, test->num_pages);
- ret = __sg_alloc_table_from_pages(&st, pages, test->num_pages,
- 0, test->size, test->max_seg,
- GFP_KERNEL);
- assert(ret == test->alloc_ret);
+ sg = __sg_alloc_table_from_pages(&st, pages, test->num_pages, 0,
+ test->size, test->max_seg, NULL, 0, GFP_KERNEL);
+ assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret);
if (test->alloc_ret)
continue;
- assert(st.nents == test->expected_segments);
- assert(st.orig_nents == test->expected_segments);
+ VALIDATE(st.nents == test->expected_segments, &st, test);
+ VALIDATE(st.orig_nents == test->expected_segments, &st, test);
sg_free_table(&st);
}
diff --git a/tools/testing/selftests/exec/.gitignore b/tools/testing/selftests/exec/.gitignore
index 344a99c6da1b..9e2f00343f15 100644
--- a/tools/testing/selftests/exec/.gitignore
+++ b/tools/testing/selftests/exec/.gitignore
@@ -7,6 +7,7 @@ execveat.moved
execveat.path.ephemeral
execveat.ephemeral
execveat.denatured
+/load_address_*
/recursion-depth
xxxxxxxx*
pipe
diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile
index 0a13b110c1e6..cf69b2fcce59 100644
--- a/tools/testing/selftests/exec/Makefile
+++ b/tools/testing/selftests/exec/Makefile
@@ -4,7 +4,7 @@ CFLAGS += -Wno-nonnull
CFLAGS += -D_GNU_SOURCE
TEST_PROGS := binfmt_script non-regular
-TEST_GEN_PROGS := execveat
+TEST_GEN_PROGS := execveat load_address_4096 load_address_2097152 load_address_16777216
TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir pipe
# Makefile is a run-time dependency, since it's accessed by the execveat test
TEST_FILES := Makefile
@@ -27,4 +27,9 @@ $(OUTPUT)/execveat.symlink: $(OUTPUT)/execveat
$(OUTPUT)/execveat.denatured: $(OUTPUT)/execveat
cp $< $@
chmod -x $@
-
+$(OUTPUT)/load_address_4096: load_address.c
+ $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000 -pie $< -o $@
+$(OUTPUT)/load_address_2097152: load_address.c
+ $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x200000 -pie $< -o $@
+$(OUTPUT)/load_address_16777216: load_address.c
+ $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000000 -pie $< -o $@
diff --git a/tools/testing/selftests/exec/load_address.c b/tools/testing/selftests/exec/load_address.c
new file mode 100644
index 000000000000..d487c2f6a615
--- /dev/null
+++ b/tools/testing/selftests/exec/load_address.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <link.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+struct Statistics {
+ unsigned long long load_address;
+ unsigned long long alignment;
+};
+
+int ExtractStatistics(struct dl_phdr_info *info, size_t size, void *data)
+{
+ struct Statistics *stats = (struct Statistics *) data;
+ int i;
+
+ if (info->dlpi_name != NULL && info->dlpi_name[0] != '\0') {
+ // Ignore headers from other than the executable.
+ return 2;
+ }
+
+ stats->load_address = (unsigned long long) info->dlpi_addr;
+ stats->alignment = 0;
+
+ for (i = 0; i < info->dlpi_phnum; i++) {
+ if (info->dlpi_phdr[i].p_type != PT_LOAD)
+ continue;
+
+ if (info->dlpi_phdr[i].p_align > stats->alignment)
+ stats->alignment = info->dlpi_phdr[i].p_align;
+ }
+
+ return 1; // Terminate dl_iterate_phdr.
+}
+
+int main(int argc, char **argv)
+{
+ struct Statistics extracted;
+ unsigned long long misalign;
+ int ret;
+
+ ret = dl_iterate_phdr(ExtractStatistics, &extracted);
+ if (ret != 1) {
+ fprintf(stderr, "FAILED\n");
+ return 1;
+ }
+
+ if (extracted.alignment == 0) {
+ fprintf(stderr, "No alignment found\n");
+ return 1;
+ } else if (extracted.alignment & (extracted.alignment - 1)) {
+ fprintf(stderr, "Alignment is not a power of 2\n");
+ return 1;
+ }
+
+ misalign = extracted.load_address & (extracted.alignment - 1);
+ if (misalign) {
+ printf("alignment = %llu, load_address = %llu\n",
+ extracted.alignment, extracted.load_address);
+ fprintf(stderr, "FAILED\n");
+ return 1;
+ }
+
+ fprintf(stderr, "PASS\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
index 55ef15184057..2a0503bc7e49 100644
--- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c
+++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
@@ -55,8 +55,6 @@
#include <setjmp.h>
#include <signal.h>
-#include <asm/cputable.h>
-
#include "utils.h"
#include "instructions.h"
@@ -64,6 +62,7 @@ int bufsize;
int debug;
int testing;
volatile int gotsig;
+bool prefixes_enabled;
char *cipath = "/dev/fb0";
long cioffset;
@@ -77,7 +76,12 @@ void sighandler(int sig, siginfo_t *info, void *ctx)
}
gotsig = sig;
#ifdef __powerpc64__
- ucp->uc_mcontext.gp_regs[PT_NIP] += 4;
+ if (prefixes_enabled) {
+ u32 inst = *(u32 *)ucp->uc_mcontext.gp_regs[PT_NIP];
+ ucp->uc_mcontext.gp_regs[PT_NIP] += ((inst >> 26 == 1) ? 8 : 4);
+ } else {
+ ucp->uc_mcontext.gp_regs[PT_NIP] += 4;
+ }
#else
ucp->uc_mcontext.uc_regs->gregs[PT_NIP] += 4;
#endif
@@ -648,6 +652,8 @@ int main(int argc, char *argv[])
exit(1);
}
+ prefixes_enabled = have_hwcap2(PPC_FEATURE2_ARCH_3_1);
+
rc |= test_harness(test_alignment_handler_vsx_206,
"test_alignment_handler_vsx_206");
rc |= test_harness(test_alignment_handler_vsx_207,
diff --git a/tools/testing/selftests/powerpc/benchmarks/context_switch.c b/tools/testing/selftests/powerpc/benchmarks/context_switch.c
index d50cc05df495..96554e2794d1 100644
--- a/tools/testing/selftests/powerpc/benchmarks/context_switch.c
+++ b/tools/testing/selftests/powerpc/benchmarks/context_switch.c
@@ -481,6 +481,12 @@ int main(int argc, char *argv[])
else
printf("futex");
+ if (!have_hwcap(PPC_FEATURE_HAS_ALTIVEC))
+ touch_altivec = 0;
+
+ if (!have_hwcap(PPC_FEATURE_HAS_VSX))
+ touch_vector = 0;
+
printf(" on cpus %d/%d touching FP:%s altivec:%s vector:%s vdso:%s\n",
cpu1, cpu2, touch_fp ? "yes" : "no", touch_altivec ? "yes" : "no",
touch_vector ? "yes" : "no", touch_vdso ? "yes" : "no");
diff --git a/tools/testing/selftests/powerpc/dscr/Makefile b/tools/testing/selftests/powerpc/dscr/Makefile
index cfa6eedcb66c..845db6273a1b 100644
--- a/tools/testing/selftests/powerpc/dscr/Makefile
+++ b/tools/testing/selftests/powerpc/dscr/Makefile
@@ -10,4 +10,4 @@ include ../../lib.mk
$(OUTPUT)/dscr_default_test: LDLIBS += -lpthread
-$(TEST_GEN_PROGS): ../harness.c
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_default_test.c b/tools/testing/selftests/powerpc/dscr/dscr_default_test.c
index 288a4e2ad156..e76611e608af 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_default_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_default_test.c
@@ -63,6 +63,8 @@ int dscr_default(void)
unsigned long i, *status[THREADS];
unsigned long orig_dscr_default;
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
orig_dscr_default = get_default_dscr();
/* Initial DSCR default */
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c b/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c
index aefcd8d8759b..32fcf2b324b1 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c
@@ -21,6 +21,8 @@ int dscr_explicit(void)
{
unsigned long i, dscr = 0;
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
srand(getpid());
set_dscr(dscr);
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c
index 7c1cb46397c6..c6a81b2d6b91 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c
@@ -44,6 +44,8 @@ int dscr_inherit_exec(void)
unsigned long i, dscr = 0;
pid_t pid;
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
for (i = 0; i < COUNT; i++) {
dscr++;
if (dscr > DSCR_MAX)
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c
index 04297a69ab59..f9dfd3d3c2d5 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c
@@ -22,6 +22,8 @@ int dscr_inherit(void)
unsigned long i, dscr = 0;
pid_t pid;
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
srand(getpid());
set_dscr(dscr);
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
index 02f6b4efde14..fbbdffdb2e5d 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
@@ -77,6 +77,8 @@ int dscr_sysfs(void)
unsigned long orig_dscr_default;
int i, j;
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
orig_dscr_default = get_default_dscr();
for (i = 0; i < COUNT; i++) {
for (j = 0; j < DSCR_MAX; j++) {
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c
index 37be2c25f277..191ed126f118 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c
@@ -56,6 +56,8 @@ int dscr_sysfs_thread(void)
unsigned long orig_dscr_default;
int i, j;
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
orig_dscr_default = get_default_dscr();
for (i = 0; i < COUNT; i++) {
for (j = 0; j < DSCR_MAX; j++) {
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_user_test.c b/tools/testing/selftests/powerpc/dscr/dscr_user_test.c
index eaf785d11eed..e09072446dd3 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_user_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_user_test.c
@@ -36,6 +36,8 @@ int dscr_user(void)
{
int i;
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
check_dscr("");
for (i = 0; i < COUNT; i++) {
diff --git a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh
index 8a8d0f456946..0d783e1065c8 100755
--- a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh
+++ b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh
@@ -1,17 +1,19 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0-only
+KSELFTESTS_SKIP=4
+
. ./eeh-functions.sh
if ! eeh_supported ; then
echo "EEH not supported on this system, skipping"
- exit 0;
+ exit $KSELFTESTS_SKIP;
fi
if [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_check" ] && \
[ ! -e "/sys/kernel/debug/powerpc/eeh_dev_break" ] ; then
echo "debugfs EEH testing files are missing. Is debugfs mounted?"
- exit 1;
+ exit $KSELFTESTS_SKIP;
fi
pre_lspci=`mktemp`
@@ -84,4 +86,5 @@ echo "$failed devices failed to recover ($dev_count tested)"
lspci | diff -u $pre_lspci -
rm -f $pre_lspci
-exit $failed
+test "$failed" == 0
+exit $?
diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h
index 71d2924f5b8b..052b5a775dc2 100644
--- a/tools/testing/selftests/powerpc/include/utils.h
+++ b/tools/testing/selftests/powerpc/include/utils.h
@@ -12,6 +12,7 @@
#include <stdbool.h>
#include <linux/auxvec.h>
#include <linux/perf_event.h>
+#include <asm/cputable.h>
#include "reg.h"
/* Avoid headaches with PRI?64 - just use %ll? always */
@@ -35,7 +36,6 @@ int pick_online_cpu(void);
int read_debugfs_file(char *debugfs_file, int *result);
int write_debugfs_file(char *debugfs_file, int result);
int read_sysfs_file(char *debugfs_file, char *result, size_t result_size);
-void set_dscr(unsigned long val);
int perf_event_open_counter(unsigned int type,
unsigned long config, int group_fd);
int perf_event_enable(int fd);
diff --git a/tools/testing/selftests/powerpc/mm/bad_accesses.c b/tools/testing/selftests/powerpc/mm/bad_accesses.c
index a864ed7e2008..fd747b2ffcfc 100644
--- a/tools/testing/selftests/powerpc/mm/bad_accesses.c
+++ b/tools/testing/selftests/powerpc/mm/bad_accesses.c
@@ -139,5 +139,6 @@ static int test(void)
int main(void)
{
+ test_harness_set_timeout(300);
return test_harness(test, "bad_accesses");
}
diff --git a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c
index 2980abca31e0..2070a1e2b3a5 100644
--- a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c
+++ b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c
@@ -9,7 +9,6 @@
#include <stdbool.h>
#include <string.h>
#include <sys/prctl.h>
-#include <asm/cputable.h>
#include "event.h"
#include "utils.h"
diff --git a/tools/testing/selftests/powerpc/pmu/l3_bank_test.c b/tools/testing/selftests/powerpc/pmu/l3_bank_test.c
index a96d512a18c4..a5dfa9bf3b9f 100644
--- a/tools/testing/selftests/powerpc/pmu/l3_bank_test.c
+++ b/tools/testing/selftests/powerpc/pmu/l3_bank_test.c
@@ -20,6 +20,9 @@ static int l3_bank_test(void)
char *p;
int i;
+ // The L3 bank logic is only used on Power8 or later
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
p = malloc(MALLOC_SIZE);
FAIL_IF(!p);
diff --git a/tools/testing/selftests/powerpc/pmu/per_event_excludes.c b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
index 2d37942bf72b..ad32a09a6540 100644
--- a/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
+++ b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
@@ -12,8 +12,6 @@
#include <string.h>
#include <sys/prctl.h>
-#include <asm/cputable.h>
-
#include "event.h"
#include "lib.h"
#include "utils.h"
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
index fc477dfe86a2..2e0d86e0687e 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
@@ -20,6 +20,8 @@
#include <signal.h>
#include <sys/types.h>
#include <sys/wait.h>
+#include <sys/syscall.h>
+#include <linux/limits.h>
#include "ptrace.h"
#define SPRN_PVR 0x11F
@@ -44,6 +46,7 @@ struct gstruct {
};
static volatile struct gstruct gstruct __attribute__((aligned(512)));
+static volatile char cwd[PATH_MAX] __attribute__((aligned(8)));
static void get_dbginfo(pid_t child_pid, struct ppc_debug_info *dbginfo)
{
@@ -138,6 +141,9 @@ static void test_workload(void)
write_var(len);
}
+ /* PTRACE_SET_DEBUGREG, Kernel Access Userspace test */
+ syscall(__NR_getcwd, &cwd, PATH_MAX);
+
/* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, WO test */
write_var(1);
@@ -150,6 +156,9 @@ static void test_workload(void)
else
read_var(1);
+ /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, Kernel Access Userspace test */
+ syscall(__NR_getcwd, &cwd, PATH_MAX);
+
/* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, WO test */
gstruct.a[rand() % A_LEN] = 'a';
@@ -293,6 +302,24 @@ static int test_set_debugreg(pid_t child_pid)
return 0;
}
+static int test_set_debugreg_kernel_userspace(pid_t child_pid)
+{
+ unsigned long wp_addr = (unsigned long)cwd;
+ char *name = "PTRACE_SET_DEBUGREG";
+
+ /* PTRACE_SET_DEBUGREG, Kernel Access Userspace test */
+ wp_addr &= ~0x7UL;
+ wp_addr |= (1Ul << DABR_READ_SHIFT);
+ wp_addr |= (1UL << DABR_WRITE_SHIFT);
+ wp_addr |= (1UL << DABR_TRANSLATION_SHIFT);
+ ptrace_set_debugreg(child_pid, wp_addr);
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ check_success(child_pid, name, "Kernel Access Userspace", wp_addr, 8);
+
+ ptrace_set_debugreg(child_pid, 0);
+ return 0;
+}
+
static void get_ppc_hw_breakpoint(struct ppc_hw_breakpoint *info, int type,
unsigned long addr, int len)
{
@@ -338,6 +365,22 @@ static void test_sethwdebug_exact(pid_t child_pid)
ptrace_delhwdebug(child_pid, wh);
}
+static void test_sethwdebug_exact_kernel_userspace(pid_t child_pid)
+{
+ struct ppc_hw_breakpoint info;
+ unsigned long wp_addr = (unsigned long)&cwd;
+ char *name = "PPC_PTRACE_SETHWDEBUG, MODE_EXACT";
+ int len = 1; /* hardcoded in kernel */
+ int wh;
+
+ /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, Kernel Access Userspace test */
+ get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr, 0);
+ wh = ptrace_sethwdebug(child_pid, &info);
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ check_success(child_pid, name, "Kernel Access Userspace", wp_addr, len);
+ ptrace_delhwdebug(child_pid, wh);
+}
+
static void test_sethwdebug_range_aligned(pid_t child_pid)
{
struct ppc_hw_breakpoint info;
@@ -452,9 +495,10 @@ static void
run_tests(pid_t child_pid, struct ppc_debug_info *dbginfo, bool dawr)
{
test_set_debugreg(child_pid);
+ test_set_debugreg_kernel_userspace(child_pid);
+ test_sethwdebug_exact(child_pid);
+ test_sethwdebug_exact_kernel_userspace(child_pid);
if (dbginfo->features & PPC_DEBUG_FEATURE_DATA_BP_RANGE) {
- test_sethwdebug_exact(child_pid);
-
test_sethwdebug_range_aligned(child_pid);
if (dawr || is_8xx) {
test_sethwdebug_range_unaligned(child_pid);
diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c
index 0a7d0afb26b8..93a65bd1f231 100644
--- a/tools/testing/selftests/powerpc/security/rfi_flush.c
+++ b/tools/testing/selftests/powerpc/security/rfi_flush.c
@@ -10,6 +10,7 @@
#include <stdint.h>
#include <malloc.h>
#include <unistd.h>
+#include <signal.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
@@ -41,6 +42,40 @@ static void syscall_loop(char *p, unsigned long iterations,
}
}
+static void sigill_handler(int signr, siginfo_t *info, void *unused)
+{
+ static int warned = 0;
+ ucontext_t *ctx = (ucontext_t *)unused;
+ unsigned long *pc = &UCONTEXT_NIA(ctx);
+
+ /* mtspr 3,RS to check for move to DSCR below */
+ if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) {
+ if (!warned++)
+ printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n");
+ *pc += 4;
+ } else {
+ printf("SIGILL at %p\n", pc);
+ abort();
+ }
+}
+
+static void set_dscr(unsigned long val)
+{
+ static int init = 0;
+ struct sigaction sa;
+
+ if (!init) {
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = sigill_handler;
+ sa.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGILL, &sa, NULL))
+ perror("sigill_handler");
+ init = 1;
+ }
+
+ asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
+}
+
int rfi_flush_test(void)
{
char *p;
@@ -54,6 +89,9 @@ int rfi_flush_test(void)
SKIP_IF(geteuid() != 0);
+ // The PMU event we use only works on Power7 or later
+ SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
+
if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_org)) {
perror("Unable to read powerpc/rfi_flush debugfs file");
SKIP_IF(1);
diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c
index c8d82b784102..adc2b7294e5f 100644
--- a/tools/testing/selftests/powerpc/security/spectre_v2.c
+++ b/tools/testing/selftests/powerpc/security/spectre_v2.c
@@ -134,6 +134,9 @@ int spectre_v2_test(void)
s64 miss_percent;
bool is_p9;
+ // The PMU events we use only work on Power8 or later
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
state = get_sysfs_state();
if (state == UNKNOWN) {
printf("Error: couldn't determine spectre_v2 mitigation state?\n");
diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp.c b/tools/testing/selftests/powerpc/stringloops/memcmp.c
index 979df3d98368..cb2f18855c8d 100644
--- a/tools/testing/selftests/powerpc/stringloops/memcmp.c
+++ b/tools/testing/selftests/powerpc/stringloops/memcmp.c
@@ -4,7 +4,7 @@
#include <string.h>
#include <sys/mman.h>
#include <time.h>
-#include <asm/cputable.h>
+
#include "utils.h"
#define SIZE 256
diff --git a/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S b/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S
index cc4930467235..7887f78cf072 100644
--- a/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S
+++ b/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S
@@ -3,9 +3,13 @@
.data
.balign 8
-message:
+success_message:
.ascii "success: switch_endian_test\n\0"
+ .balign 8
+failure_message:
+ .ascii "failure: switch_endian_test\n\0"
+
.section ".toc"
.balign 8
pattern:
@@ -64,6 +68,9 @@ FUNC_START(_start)
li r0, __NR_switch_endian
sc
+ tdi 0, 0, 0x48 // b +8 if the endian was switched
+ b .Lfail // exit if endian didn't switch
+
#include "check-reversed.S"
/* Flip back, r0 already has the switch syscall number */
@@ -71,12 +78,20 @@ FUNC_START(_start)
#include "check.S"
+ ld r4, success_message@got(%r2)
+ li r5, 28 // strlen(success_message)
+ li r14, 0 // exit status
+.Lout:
li r0, __NR_write
li r3, 1 /* stdout */
- ld r4, message@got(%r2)
- li r5, 28 /* strlen(message3) */
sc
li r0, __NR_exit
- li r3, 0
+ mr r3, r14
sc
b .
+
+.Lfail:
+ ld r4, failure_message@got(%r2)
+ li r5, 28 // strlen(failure_message)
+ li r14, 1
+ b .Lout
diff --git a/tools/testing/selftests/powerpc/syscalls/Makefile b/tools/testing/selftests/powerpc/syscalls/Makefile
index 01b22775ca87..b63f8459c704 100644
--- a/tools/testing/selftests/powerpc/syscalls/Makefile
+++ b/tools/testing/selftests/powerpc/syscalls/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-TEST_GEN_PROGS := ipc_unmuxed
+TEST_GEN_PROGS := ipc_unmuxed rtas_filter
CFLAGS += -I../../../../../usr/include
diff --git a/tools/testing/selftests/powerpc/syscalls/rtas_filter.c b/tools/testing/selftests/powerpc/syscalls/rtas_filter.c
new file mode 100644
index 000000000000..03b487f18d00
--- /dev/null
+++ b/tools/testing/selftests/powerpc/syscalls/rtas_filter.c
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright 2005-2020 IBM Corporation.
+ *
+ * Includes code from librtas (https://github.com/ibm-power-utilities/librtas/)
+ */
+
+#include <byteswap.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <errno.h>
+#include "utils.h"
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define cpu_to_be32(x) bswap_32(x)
+#define be32_to_cpu(x) bswap_32(x)
+#else
+#define cpu_to_be32(x) (x)
+#define be32_to_cpu(x) (x)
+#endif
+
+#define RTAS_IO_ASSERT -1098 /* Unexpected I/O Error */
+#define RTAS_UNKNOWN_OP -1099 /* No Firmware Implementation of Function */
+#define BLOCK_SIZE 4096
+#define PAGE_SIZE 4096
+#define MAX_PAGES 64
+
+static const char *ofdt_rtas_path = "/proc/device-tree/rtas";
+
+typedef __be32 uint32_t;
+struct rtas_args {
+ __be32 token;
+ __be32 nargs;
+ __be32 nret;
+ __be32 args[16];
+ __be32 *rets; /* Pointer to return values in args[]. */
+};
+
+struct region {
+ uint64_t addr;
+ uint32_t size;
+ struct region *next;
+};
+
+int read_entire_file(int fd, char **buf, size_t *len)
+{
+ size_t buf_size = 0;
+ size_t off = 0;
+ int rc;
+
+ *buf = NULL;
+ do {
+ buf_size += BLOCK_SIZE;
+ if (*buf == NULL)
+ *buf = malloc(buf_size);
+ else
+ *buf = realloc(*buf, buf_size);
+
+ if (*buf == NULL)
+ return -ENOMEM;
+
+ rc = read(fd, *buf + off, BLOCK_SIZE);
+ if (rc < 0)
+ return -EIO;
+
+ off += rc;
+ } while (rc == BLOCK_SIZE);
+
+ if (len)
+ *len = off;
+
+ return 0;
+}
+
+static int open_prop_file(const char *prop_path, const char *prop_name, int *fd)
+{
+ char *path;
+ int len;
+
+ /* allocate enough for two string, a slash and trailing NULL */
+ len = strlen(prop_path) + strlen(prop_name) + 1 + 1;
+ path = malloc(len);
+ if (path == NULL)
+ return -ENOMEM;
+
+ snprintf(path, len, "%s/%s", prop_path, prop_name);
+
+ *fd = open(path, O_RDONLY);
+ free(path);
+ if (*fd < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int get_property(const char *prop_path, const char *prop_name,
+ char **prop_val, size_t *prop_len)
+{
+ int rc, fd;
+
+ rc = open_prop_file(prop_path, prop_name, &fd);
+ if (rc)
+ return rc;
+
+ rc = read_entire_file(fd, prop_val, prop_len);
+ close(fd);
+
+ return rc;
+}
+
+int rtas_token(const char *call_name)
+{
+ char *prop_buf = NULL;
+ size_t len;
+ int rc;
+
+ rc = get_property(ofdt_rtas_path, call_name, &prop_buf, &len);
+ if (rc < 0) {
+ rc = RTAS_UNKNOWN_OP;
+ goto err;
+ }
+
+ rc = be32_to_cpu(*(int *)prop_buf);
+
+err:
+ free(prop_buf);
+ return rc;
+}
+
+static int read_kregion_bounds(struct region *kregion)
+{
+ char *buf;
+ int fd;
+ int rc;
+
+ fd = open("/proc/ppc64/rtas/rmo_buffer", O_RDONLY);
+ if (fd < 0) {
+ printf("Could not open rmo_buffer file\n");
+ return RTAS_IO_ASSERT;
+ }
+
+ rc = read_entire_file(fd, &buf, NULL);
+ close(fd);
+ if (rc) {
+ free(buf);
+ return rc;
+ }
+
+ sscanf(buf, "%" SCNx64 " %x", &kregion->addr, &kregion->size);
+ free(buf);
+
+ if (!(kregion->size && kregion->addr) ||
+ (kregion->size > (PAGE_SIZE * MAX_PAGES))) {
+ printf("Unexpected kregion bounds\n");
+ return RTAS_IO_ASSERT;
+ }
+
+ return 0;
+}
+
+static int rtas_call(const char *name, int nargs,
+ int nrets, ...)
+{
+ struct rtas_args args;
+ __be32 *rets[16];
+ int i, rc, token;
+ va_list ap;
+
+ va_start(ap, nrets);
+
+ token = rtas_token(name);
+ if (token == RTAS_UNKNOWN_OP) {
+ // We don't care if the call doesn't exist
+ printf("call '%s' not available, skipping...", name);
+ rc = RTAS_UNKNOWN_OP;
+ goto err;
+ }
+
+ args.token = cpu_to_be32(token);
+ args.nargs = cpu_to_be32(nargs);
+ args.nret = cpu_to_be32(nrets);
+
+ for (i = 0; i < nargs; i++)
+ args.args[i] = (__be32) va_arg(ap, unsigned long);
+
+ for (i = 0; i < nrets; i++)
+ rets[i] = (__be32 *) va_arg(ap, unsigned long);
+
+ rc = syscall(__NR_rtas, &args);
+ if (rc) {
+ rc = -errno;
+ goto err;
+ }
+
+ if (nrets) {
+ *(rets[0]) = be32_to_cpu(args.args[nargs]);
+
+ for (i = 1; i < nrets; i++) {
+ *(rets[i]) = args.args[nargs + i];
+ }
+ }
+
+err:
+ va_end(ap);
+ return rc;
+}
+
+static int test(void)
+{
+ struct region rmo_region;
+ uint32_t rmo_start;
+ uint32_t rmo_end;
+ __be32 rets[1];
+ int rc;
+
+ // Test a legitimate harmless call
+ // Expected: call succeeds
+ printf("Test a permitted call, no parameters... ");
+ rc = rtas_call("get-time-of-day", 0, 1, rets);
+ printf("rc: %d\n", rc);
+ FAIL_IF(rc != 0 && rc != RTAS_UNKNOWN_OP);
+
+ // Test a prohibited call
+ // Expected: call returns -EINVAL
+ printf("Test a prohibited call... ");
+ rc = rtas_call("nvram-fetch", 0, 1, rets);
+ printf("rc: %d\n", rc);
+ FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP);
+
+ // Get RMO
+ rc = read_kregion_bounds(&rmo_region);
+ if (rc) {
+ printf("Couldn't read RMO region bounds, skipping remaining cases\n");
+ return 0;
+ }
+ rmo_start = rmo_region.addr;
+ rmo_end = rmo_start + rmo_region.size - 1;
+ printf("RMO range: %08x - %08x\n", rmo_start, rmo_end);
+
+ // Test a permitted call, user-supplied size, buffer inside RMO
+ // Expected: call succeeds
+ printf("Test a permitted call, user-supplied size, buffer inside RMO... ");
+ rc = rtas_call("ibm,get-system-parameter", 3, 1, 0, cpu_to_be32(rmo_start),
+ cpu_to_be32(rmo_end - rmo_start + 1), rets);
+ printf("rc: %d\n", rc);
+ FAIL_IF(rc != 0 && rc != RTAS_UNKNOWN_OP);
+
+ // Test a permitted call, user-supplied size, buffer start outside RMO
+ // Expected: call returns -EINVAL
+ printf("Test a permitted call, user-supplied size, buffer start outside RMO... ");
+ rc = rtas_call("ibm,get-system-parameter", 3, 1, 0, cpu_to_be32(rmo_end + 1),
+ cpu_to_be32(4000), rets);
+ printf("rc: %d\n", rc);
+ FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP);
+
+ // Test a permitted call, user-supplied size, buffer end outside RMO
+ // Expected: call returns -EINVAL
+ printf("Test a permitted call, user-supplied size, buffer end outside RMO... ");
+ rc = rtas_call("ibm,get-system-parameter", 3, 1, 0, cpu_to_be32(rmo_start),
+ cpu_to_be32(rmo_end - rmo_start + 2), rets);
+ printf("rc: %d\n", rc);
+ FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP);
+
+ // Test a permitted call, fixed size, buffer end outside RMO
+ // Expected: call returns -EINVAL
+ printf("Test a permitted call, fixed size, buffer end outside RMO... ");
+ rc = rtas_call("ibm,configure-connector", 2, 1, cpu_to_be32(rmo_end - 4000), 0, rets);
+ printf("rc: %d\n", rc);
+ FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP);
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test, "rtas_filter");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-poison.c b/tools/testing/selftests/powerpc/tm/tm-poison.c
index 977558497c16..29e5f26af7b9 100644
--- a/tools/testing/selftests/powerpc/tm/tm-poison.c
+++ b/tools/testing/selftests/powerpc/tm/tm-poison.c
@@ -26,7 +26,7 @@
int tm_poison_test(void)
{
- int pid;
+ int cpu, pid;
cpu_set_t cpuset;
uint64_t poison = 0xdeadbeefc0dec0fe;
uint64_t unknown = 0;
@@ -35,10 +35,13 @@ int tm_poison_test(void)
SKIP_IF(!have_htm());
- /* Attach both Child and Parent to CPU 0 */
+ cpu = pick_online_cpu();
+ FAIL_IF(cpu < 0);
+
+ // Attach both Child and Parent to the same CPU
CPU_ZERO(&cpuset);
- CPU_SET(0, &cpuset);
- sched_setaffinity(0, sizeof(cpuset), &cpuset);
+ CPU_SET(cpu, &cpuset);
+ FAIL_IF(sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0);
pid = fork();
if (!pid) {
diff --git a/tools/testing/selftests/powerpc/tm/tm-tmspr.c b/tools/testing/selftests/powerpc/tm/tm-tmspr.c
index 17becf3dcee4..794d574db784 100644
--- a/tools/testing/selftests/powerpc/tm/tm-tmspr.c
+++ b/tools/testing/selftests/powerpc/tm/tm-tmspr.c
@@ -33,19 +33,13 @@
#include "utils.h"
#include "tm.h"
-int num_loops = 10000;
+int num_loops = 1000000;
int passed = 1;
void tfiar_tfhar(void *in)
{
- int i, cpu;
unsigned long tfhar, tfhar_rd, tfiar, tfiar_rd;
- cpu_set_t cpuset;
-
- CPU_ZERO(&cpuset);
- cpu = (unsigned long)in >> 1;
- CPU_SET(cpu, &cpuset);
- sched_setaffinity(0, sizeof(cpuset), &cpuset);
+ int i;
/* TFIAR: Last bit has to be high so userspace can read register */
tfiar = ((unsigned long)in) + 1;
diff --git a/tools/testing/selftests/powerpc/tm/tm-trap.c b/tools/testing/selftests/powerpc/tm/tm-trap.c
index 601f0c1d450d..c75960af8018 100644
--- a/tools/testing/selftests/powerpc/tm/tm-trap.c
+++ b/tools/testing/selftests/powerpc/tm/tm-trap.c
@@ -247,8 +247,7 @@ void *pong(void *not_used)
int tm_trap_test(void)
{
uint16_t k = 1;
-
- int rc;
+ int cpu, rc;
pthread_attr_t attr;
cpu_set_t cpuset;
@@ -267,9 +266,12 @@ int tm_trap_test(void)
usr1_sa.sa_sigaction = usr1_signal_handler;
sigaction(SIGUSR1, &usr1_sa, NULL);
- /* Set only CPU 0 in the mask. Both threads will be bound to cpu 0. */
+ cpu = pick_online_cpu();
+ FAIL_IF(cpu < 0);
+
+ // Set only one CPU in the mask. Both threads will be bound to that CPU.
CPU_ZERO(&cpuset);
- CPU_SET(0, &cpuset);
+ CPU_SET(cpu, &cpuset);
/* Init pthread attribute */
rc = pthread_attr_init(&attr);
diff --git a/tools/testing/selftests/powerpc/tm/tm-unavailable.c b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
index 2ca2fccb0a3e..a1348a5f721a 100644
--- a/tools/testing/selftests/powerpc/tm/tm-unavailable.c
+++ b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
@@ -338,16 +338,19 @@ void test_fp_vec(int fp, int vec, pthread_attr_t *attr)
int tm_unavailable_test(void)
{
- int rc, exception; /* FP = 0, VEC = 1, VSX = 2 */
+ int cpu, rc, exception; /* FP = 0, VEC = 1, VSX = 2 */
pthread_t t1;
pthread_attr_t attr;
cpu_set_t cpuset;
SKIP_IF(!have_htm());
- /* Set only CPU 0 in the mask. Both threads will be bound to CPU 0. */
+ cpu = pick_online_cpu();
+ FAIL_IF(cpu < 0);
+
+ // Set only one CPU in the mask. Both threads will be bound to that CPU.
CPU_ZERO(&cpuset);
- CPU_SET(0, &cpuset);
+ CPU_SET(cpu, &cpuset);
/* Init pthread attribute. */
rc = pthread_attr_init(&attr);
diff --git a/tools/testing/selftests/powerpc/tm/tm.h b/tools/testing/selftests/powerpc/tm/tm.h
index c402464b038f..c5a1e5c163fc 100644
--- a/tools/testing/selftests/powerpc/tm/tm.h
+++ b/tools/testing/selftests/powerpc/tm/tm.h
@@ -6,9 +6,8 @@
#ifndef _SELFTESTS_POWERPC_TM_TM_H
#define _SELFTESTS_POWERPC_TM_TM_H
-#include <asm/tm.h>
-#include <asm/cputable.h>
#include <stdbool.h>
+#include <asm/tm.h>
#include "utils.h"
diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c
index 18b6a773d5c7..1f36ee1a909a 100644
--- a/tools/testing/selftests/powerpc/utils.c
+++ b/tools/testing/selftests/powerpc/utils.c
@@ -10,7 +10,6 @@
#include <fcntl.h>
#include <link.h>
#include <sched.h>
-#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -273,40 +272,6 @@ int perf_event_reset(int fd)
return 0;
}
-static void sigill_handler(int signr, siginfo_t *info, void *unused)
-{
- static int warned = 0;
- ucontext_t *ctx = (ucontext_t *)unused;
- unsigned long *pc = &UCONTEXT_NIA(ctx);
-
- /* mtspr 3,RS to check for move to DSCR below */
- if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) {
- if (!warned++)
- printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n");
- *pc += 4;
- } else {
- printf("SIGILL at %p\n", pc);
- abort();
- }
-}
-
-void set_dscr(unsigned long val)
-{
- static int init = 0;
- struct sigaction sa;
-
- if (!init) {
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = sigill_handler;
- sa.sa_flags = SA_SIGINFO;
- if (sigaction(SIGILL, &sa, NULL))
- perror("sigill_handler");
- init = 1;
- }
-
- asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
-}
-
int using_hash_mmu(bool *using_hash)
{
char line[128];
@@ -318,7 +283,9 @@ int using_hash_mmu(bool *using_hash)
rc = 0;
while (fgets(line, sizeof(line), f) != NULL) {
- if (strcmp(line, "MMU : Hash\n") == 0) {
+ if (!strcmp(line, "MMU : Hash\n") ||
+ !strcmp(line, "platform : Cell\n") ||
+ !strcmp(line, "platform : PowerMac\n")) {
*using_hash = true;
goto out;
}
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale-ftrace.sh
index 7d3c2be66c64..d4bec538086d 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale-ftrace.sh
@@ -1,12 +1,12 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0+
#
-# Analyze a given results directory for rcuperf performance measurements,
+# Analyze a given results directory for rcuscale performance measurements,
# looking for ftrace data. Exits with 0 if data was found, analyzed, and
-# printed. Intended to be invoked from kvm-recheck-rcuperf.sh after
+# printed. Intended to be invoked from kvm-recheck-rcuscale.sh after
# argument checking.
#
-# Usage: kvm-recheck-rcuperf-ftrace.sh resdir
+# Usage: kvm-recheck-rcuscale-ftrace.sh resdir
#
# Copyright (C) IBM Corporation, 2016
#
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh
index db0375a57f28..aa745152a525 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh
@@ -1,9 +1,9 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0+
#
-# Analyze a given results directory for rcuperf performance measurements.
+# Analyze a given results directory for rcuscale scalability measurements.
#
-# Usage: kvm-recheck-rcuperf.sh resdir
+# Usage: kvm-recheck-rcuscale.sh resdir
#
# Copyright (C) IBM Corporation, 2016
#
@@ -20,7 +20,7 @@ fi
PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH
. functions.sh
-if kvm-recheck-rcuperf-ftrace.sh $i
+if kvm-recheck-rcuscale-ftrace.sh $i
then
# ftrace data was successfully analyzed, call it good!
exit 0
@@ -30,12 +30,12 @@ configfile=`echo $i | sed -e 's/^.*\///'`
sed -e 's/^\[[^]]*]//' < $i/console.log |
awk '
-/-perf: .* gps: .* batches:/ {
+/-scale: .* gps: .* batches:/ {
ngps = $9;
nbatches = $11;
}
-/-perf: .*writer-duration/ {
+/-scale: .*writer-duration/ {
gptimes[++n] = $5 / 1000.;
sum += $5 / 1000.;
}
@@ -43,7 +43,7 @@ awk '
END {
newNR = asort(gptimes);
if (newNR <= 0) {
- print "No rcuperf records found???"
+ print "No rcuscale records found???"
exit;
}
pct50 = int(newNR * 50 / 100);
@@ -79,5 +79,5 @@ END {
print "99th percentile grace-period duration: " gptimes[pct99];
print "Maximum grace-period duration: " gptimes[newNR];
print "Grace periods: " ngps + 0 " Batches: " nbatches + 0 " Ratio: " ngps / nbatches;
- print "Computed from rcuperf printk output.";
+ print "Computed from rcuscale printk output.";
}'
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh
new file mode 100755
index 000000000000..671bfee4fcef
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Analyze a given results directory for rcutorture progress.
+#
+# Usage: kvm-recheck-rcu.sh resdir
+#
+# Copyright (C) Facebook, 2020
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+i="$1"
+if test -d "$i" -a -r "$i"
+then
+ :
+else
+ echo Unreadable results directory: $i
+ exit 1
+fi
+. functions.sh
+
+configfile=`echo $i | sed -e 's/^.*\///'`
+nscfs="`grep 'scf_invoked_count ver:' $i/console.log 2> /dev/null | tail -1 | sed -e 's/^.* scf_invoked_count ver: //' -e 's/ .*$//' | tr -d '\015'`"
+if test -z "$nscfs"
+then
+ echo "$configfile ------- "
+else
+ dur="`sed -e 's/^.* scftorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null`"
+ if test -z "$dur"
+ then
+ rate=""
+ else
+ nscfss=`awk -v nscfs=$nscfs -v dur=$dur '
+ BEGIN { print nscfs / dur }' < /dev/null`
+ rate=" ($nscfss/s)"
+ fi
+ echo "${configfile} ------- ${nscfs} SCF handler invocations$rate"
+fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index e07779a62634..6dc2b49b85ea 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -66,6 +66,7 @@ config_override_param () {
echo > $T/KcList
config_override_param "$config_dir/CFcommon" KcList "`cat $config_dir/CFcommon 2> /dev/null`"
config_override_param "$config_template" KcList "`cat $config_template 2> /dev/null`"
+config_override_param "--gdb options" KcList "$TORTURE_KCONFIG_GDB_ARG"
config_override_param "--kasan options" KcList "$TORTURE_KCONFIG_KASAN_ARG"
config_override_param "--kcsan options" KcList "$TORTURE_KCONFIG_KCSAN_ARG"
config_override_param "--kconfig argument" KcList "$TORTURE_KCONFIG_ARG"
@@ -152,7 +153,11 @@ qemu_append="`identify_qemu_append "$QEMU"`"
boot_args="`configfrag_boot_params "$boot_args" "$config_template"`"
# Generate kernel-version-specific boot parameters
boot_args="`per_version_boot_params "$boot_args" $resdir/.config $seconds`"
-echo $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd
+if test -n "$TORTURE_BOOT_GDB_ARG"
+then
+ boot_args="$boot_args $TORTURE_BOOT_GDB_ARG"
+fi
+echo $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append \"$qemu_append $boot_args\" $TORTURE_QEMU_GDB_ARG > $resdir/qemu-cmd
if test -n "$TORTURE_BUILDONLY"
then
@@ -171,14 +176,26 @@ echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log
# Attempt to run qemu
( . $T/qemu-cmd; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) &
commandcompleted=0
-sleep 10 # Give qemu's pid a chance to reach the file
-if test -s "$resdir/qemu_pid"
+if test -z "$TORTURE_KCONFIG_GDB_ARG"
then
- qemu_pid=`cat "$resdir/qemu_pid"`
- echo Monitoring qemu job at pid $qemu_pid
-else
- qemu_pid=""
- echo Monitoring qemu job at yet-as-unknown pid
+ sleep 10 # Give qemu's pid a chance to reach the file
+ if test -s "$resdir/qemu_pid"
+ then
+ qemu_pid=`cat "$resdir/qemu_pid"`
+ echo Monitoring qemu job at pid $qemu_pid
+ else
+ qemu_pid=""
+ echo Monitoring qemu job at yet-as-unknown pid
+ fi
+fi
+if test -n "$TORTURE_KCONFIG_GDB_ARG"
+then
+ echo Waiting for you to attach a debug session, for example: > /dev/tty
+ echo " gdb $base_resdir/vmlinux" > /dev/tty
+ echo 'After symbols load and the "(gdb)" prompt appears:' > /dev/tty
+ echo " target remote :1234" > /dev/tty
+ echo " continue" > /dev/tty
+ kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null`
fi
while :
do
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index e655983b7429..6eb1d3f6524d 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -31,6 +31,9 @@ TORTURE_DEFCONFIG=defconfig
TORTURE_BOOT_IMAGE=""
TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD
TORTURE_KCONFIG_ARG=""
+TORTURE_KCONFIG_GDB_ARG=""
+TORTURE_BOOT_GDB_ARG=""
+TORTURE_QEMU_GDB_ARG=""
TORTURE_KCONFIG_KASAN_ARG=""
TORTURE_KCONFIG_KCSAN_ARG=""
TORTURE_KMAKE_ARG=""
@@ -46,6 +49,7 @@ jitter="-1"
usage () {
echo "Usage: $scriptname optional arguments:"
+ echo " --allcpus"
echo " --bootargs kernel-boot-arguments"
echo " --bootimage relative-path-to-kernel-boot-image"
echo " --buildonly"
@@ -55,17 +59,19 @@ usage () {
echo " --defconfig string"
echo " --dryrun sched|script"
echo " --duration minutes"
+ echo " --gdb"
+ echo " --help"
echo " --interactive"
echo " --jitter N [ maxsleep (us) [ maxspin (us) ] ]"
echo " --kconfig Kconfig-options"
echo " --kmake-arg kernel-make-arguments"
echo " --mac nn:nn:nn:nn:nn:nn"
- echo " --memory megabytes | nnnG"
+ echo " --memory megabytes|nnnG"
echo " --no-initrd"
echo " --qemu-args qemu-arguments"
echo " --qemu-cmd qemu-system-..."
echo " --results absolute-pathname"
- echo " --torture rcu"
+ echo " --torture lock|rcu|rcuscale|refscale|scf"
echo " --trust-make"
exit 1
}
@@ -126,6 +132,14 @@ do
dur=$(($2*60))
shift
;;
+ --gdb)
+ TORTURE_KCONFIG_GDB_ARG="CONFIG_DEBUG_INFO=y"; export TORTURE_KCONFIG_GDB_ARG
+ TORTURE_BOOT_GDB_ARG="nokaslr"; export TORTURE_BOOT_GDB_ARG
+ TORTURE_QEMU_GDB_ARG="-s -S"; export TORTURE_QEMU_GDB_ARG
+ ;;
+ --help|-h)
+ usage
+ ;;
--interactive)
TORTURE_QEMU_INTERACTIVE=1; export TORTURE_QEMU_INTERACTIVE
;;
@@ -184,13 +198,13 @@ do
shift
;;
--torture)
- checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuperf\|refscale\)$' '^--'
+ checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuscale\|refscale\|scf\)$' '^--'
TORTURE_SUITE=$2
shift
- if test "$TORTURE_SUITE" = rcuperf || test "$TORTURE_SUITE" = refscale
+ if test "$TORTURE_SUITE" = rcuscale || test "$TORTURE_SUITE" = refscale
then
# If you really want jitter for refscale or
- # rcuperf, specify it after specifying the rcuperf
+ # rcuscale, specify it after specifying the rcuscale
# or the refscale. (But why jitter in these cases?)
jitter=0
fi
@@ -248,6 +262,15 @@ do
done
touch $T/cfgcpu
configs_derep="`echo $configs_derep | sed -e "s/\<CFLIST\>/$defaultconfigs/g"`"
+if test -n "$TORTURE_KCONFIG_GDB_ARG"
+then
+ if test "`echo $configs_derep | wc -w`" -gt 1
+ then
+ echo "The --config list is: $configs_derep."
+ echo "Only one --config permitted with --gdb, terminating."
+ exit 1
+ fi
+fi
for CF1 in $configs_derep
do
if test -f "$CONFIGFRAG/$CF1"
@@ -323,6 +346,9 @@ TORTURE_BUILDONLY="$TORTURE_BUILDONLY"; export TORTURE_BUILDONLY
TORTURE_DEFCONFIG="$TORTURE_DEFCONFIG"; export TORTURE_DEFCONFIG
TORTURE_INITRD="$TORTURE_INITRD"; export TORTURE_INITRD
TORTURE_KCONFIG_ARG="$TORTURE_KCONFIG_ARG"; export TORTURE_KCONFIG_ARG
+TORTURE_KCONFIG_GDB_ARG="$TORTURE_KCONFIG_GDB_ARG"; export TORTURE_KCONFIG_GDB_ARG
+TORTURE_BOOT_GDB_ARG="$TORTURE_BOOT_GDB_ARG"; export TORTURE_BOOT_GDB_ARG
+TORTURE_QEMU_GDB_ARG="$TORTURE_QEMU_GDB_ARG"; export TORTURE_QEMU_GDB_ARG
TORTURE_KCONFIG_KASAN_ARG="$TORTURE_KCONFIG_KASAN_ARG"; export TORTURE_KCONFIG_KASAN_ARG
TORTURE_KCONFIG_KCSAN_ARG="$TORTURE_KCONFIG_KCSAN_ARG"; export TORTURE_KCONFIG_KCSAN_ARG
TORTURE_KMAKE_ARG="$TORTURE_KMAKE_ARG"; export TORTURE_KMAKE_ARG
diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
index 71a9f43a3918..e03338091a06 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-console.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh
@@ -33,8 +33,8 @@ then
fi
cat /dev/null > $file.diags
-# Check for proper termination, except for rcuperf and refscale.
-if test "$TORTURE_SUITE" != rcuperf && test "$TORTURE_SUITE" != refscale
+# Check for proper termination, except for rcuscale and refscale.
+if test "$TORTURE_SUITE" != rcuscale && test "$TORTURE_SUITE" != refscale
then
# check for abject failure
@@ -67,6 +67,7 @@ then
grep --binary-files=text 'torture:.*ver:' $file |
egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' |
sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' |
+ sed -e 's/^.*ver: //' |
awk '
BEGIN {
ver = 0;
@@ -74,13 +75,13 @@ then
}
{
- if (!badseq && ($5 + 0 != $5 || $5 <= ver)) {
+ if (!badseq && ($1 + 0 != $1 || $1 <= ver)) {
badseqno1 = ver;
- badseqno2 = $5;
+ badseqno2 = $1;
badseqnr = NR;
badseq = 1;
}
- ver = $5
+ ver = $1
}
END {
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 b/tools/testing/selftests/rcutorture/configs/rcu/TREE05
index 2dde0d9964e3..4f95f8544f3f 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05
@@ -16,5 +16,6 @@ CONFIG_RCU_NOCB_CPU=y
CONFIG_DEBUG_LOCK_ALLOC=y
CONFIG_PROVE_LOCKING=y
#CHECK#CONFIG_PROVE_RCU=y
+CONFIG_PROVE_RCU_LIST=y
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon b/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon
deleted file mode 100644
index a09816b8c0f3..000000000000
--- a/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon
+++ /dev/null
@@ -1,2 +0,0 @@
-CONFIG_RCU_PERF_TEST=y
-CONFIG_PRINTK_TIME=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST b/tools/testing/selftests/rcutorture/configs/rcuscale/CFLIST
index c9f56cf20775..c9f56cf20775 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/CFLIST
diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon b/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon
new file mode 100644
index 000000000000..87caa0e932c7
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon
@@ -0,0 +1,2 @@
+CONFIG_RCU_SCALE_TEST=y
+CONFIG_PRINTK_TIME=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TINY b/tools/testing/selftests/rcutorture/configs/rcuscale/TINY
index fb05ef5279b4..fb05ef5279b4 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuperf/TINY
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TINY
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE
index 721cfda76ab2..721cfda76ab2 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54 b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE54
index 7629f5dd73b2..7629f5dd73b2 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE54
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh
index 777d5b0c190f..0333e9b18522 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh
@@ -11,6 +11,6 @@
#
# Adds per-version torture-module parameters to kernels supporting them.
per_version_boot_params () {
- echo $1 rcuperf.shutdown=1 \
- rcuperf.verbose=1
+ echo $1 rcuscale.shutdown=1 \
+ rcuscale.verbose=1
}
diff --git a/tools/testing/selftests/rcutorture/configs/scf/CFLIST b/tools/testing/selftests/rcutorture/configs/scf/CFLIST
new file mode 100644
index 000000000000..4d62eb4a39f9
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/scf/CFLIST
@@ -0,0 +1,2 @@
+NOPREEMPT
+PREEMPT
diff --git a/tools/testing/selftests/rcutorture/configs/scf/CFcommon b/tools/testing/selftests/rcutorture/configs/scf/CFcommon
new file mode 100644
index 000000000000..c11ab91f49f5
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/scf/CFcommon
@@ -0,0 +1,2 @@
+CONFIG_SCF_TORTURE_TEST=y
+CONFIG_PRINTK_TIME=y
diff --git a/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT
new file mode 100644
index 000000000000..b8429d6c6ebc
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT
@@ -0,0 +1,9 @@
+CONFIG_SMP=y
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=n
+CONFIG_NO_HZ_FULL=y
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
diff --git a/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT.boot b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT.boot
new file mode 100644
index 000000000000..d6a7fa097c2e
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT.boot
@@ -0,0 +1 @@
+nohz_full=1
diff --git a/tools/testing/selftests/rcutorture/configs/scf/PREEMPT b/tools/testing/selftests/rcutorture/configs/scf/PREEMPT
new file mode 100644
index 000000000000..ae4992b141b0
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/scf/PREEMPT
@@ -0,0 +1,9 @@
+CONFIG_SMP=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
diff --git a/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh
new file mode 100644
index 000000000000..d3d9e35d3d55
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Torture-suite-dependent shell functions for the rest of the scripts.
+#
+# Copyright (C) Facebook, 2020
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+# scftorture_param_onoff bootparam-string config-file
+#
+# Adds onoff scftorture module parameters to kernels having it.
+scftorture_param_onoff () {
+ if ! bootparam_hotplug_cpu "$1" && configfrag_hotplug_cpu "$2"
+ then
+ echo CPU-hotplug kernel, adding scftorture onoff. 1>&2
+ echo scftorture.onoff_interval=1000 scftorture.onoff_holdoff=30
+ fi
+}
+
+# per_version_boot_params bootparam-string config-file seconds
+#
+# Adds per-version torture-module parameters to kernels supporting them.
+per_version_boot_params () {
+ echo $1 `scftorture_param_onoff "$1" "$2"` \
+ scftorture.stat_interval=15 \
+ scftorture.shutdown_secs=$3 \
+ scftorture.verbose=1 \
+ scf
+}
diff --git a/tools/testing/selftests/rcutorture/doc/initrd.txt b/tools/testing/selftests/rcutorture/doc/initrd.txt
index 933b4fd12327..41a4255865d4 100644
--- a/tools/testing/selftests/rcutorture/doc/initrd.txt
+++ b/tools/testing/selftests/rcutorture/doc/initrd.txt
@@ -1,12 +1,11 @@
-The rcutorture scripting tools automatically create the needed initrd
-directory using dracut. Failing that, this tool will create an initrd
-containing a single statically linked binary named "init" that loops
-over a very long sleep() call. In both cases, this creation is done
-by tools/testing/selftests/rcutorture/bin/mkinitrd.sh.
+The rcutorture scripting tools automatically create an initrd containing
+a single statically linked binary named "init" that loops over a
+very long sleep() call. In both cases, this creation is done by
+tools/testing/selftests/rcutorture/bin/mkinitrd.sh.
-However, if you are attempting to run rcutorture on a system that does
-not have dracut installed, and if you don't like the notion of static
-linking, you might wish to press an existing initrd into service:
+However, if you don't like the notion of statically linked bare-bones
+userspace environments, you might wish to press an existing initrd
+into service:
------------------------------------------------------------------------
cd tools/testing/selftests/rcutorture
@@ -15,24 +14,3 @@ mkdir initrd
cd initrd
cpio -id < /tmp/initrd.img.zcat
# Manually verify that initrd contains needed binaries and libraries.
-------------------------------------------------------------------------
-
-Interestingly enough, if you are running rcutorture, you don't really
-need userspace in many cases. Running without userspace has the
-advantage of allowing you to test your kernel independently of the
-distro in place, the root-filesystem layout, and so on. To make this
-happen, put the following script in the initrd's tree's "/init" file,
-with 0755 mode.
-
-------------------------------------------------------------------------
-#!/bin/sh
-
-while :
-do
- sleep 10
-done
-------------------------------------------------------------------------
-
-This approach also allows most of the binaries and libraries in the
-initrd filesystem to be dispensed with, which can save significant
-space in rcutorture's "res" directory.
diff --git a/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt b/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt
index 449cf579d6f9..b2fc247976b1 100644
--- a/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt
+++ b/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt
@@ -1,8 +1,33 @@
-This document describes one way to create the rcu-test-image file
-that contains the filesystem used by the guest-OS kernel. There are
-probably much better ways of doing this, and this filesystem could no
-doubt be smaller. It is probably also possible to simply download
-an appropriate image from any number of places.
+Normally, a minimal initrd is created automatically by the rcutorture
+scripting. But minimal really does mean "minimal", namely just a single
+root directory with a single statically linked executable named "init":
+
+$ size tools/testing/selftests/rcutorture/initrd/init
+ text data bss dec hex filename
+ 328 0 8 336 150 tools/testing/selftests/rcutorture/initrd/init
+
+Suppose you need to run some scripts, perhaps to monitor or control
+some aspect of the rcutorture testing. This will require a more fully
+filled-out userspace, perhaps containing libraries, executables for
+the shell and other utilities, and soforth. In that case, place your
+desired filesystem here:
+
+ tools/testing/selftests/rcutorture/initrd
+
+For example, your tools/testing/selftests/rcutorture/initrd/init might
+be a script that does any needed mount operations and starts whatever
+scripts need starting to properly monitor or control your testing.
+The next rcutorture build will then incorporate this filesystem into
+the kernel image that is passed to qemu.
+
+Or maybe you need a real root filesystem for some reason, in which case
+please read on!
+
+The remainder of this document describes one way to create the
+rcu-test-image file that contains the filesystem used by the guest-OS
+kernel. There are probably much better ways of doing this, and this
+filesystem could no doubt be smaller. It is probably also possible to
+simply download an appropriate image from any number of places.
That said, here are the commands:
@@ -36,7 +61,7 @@ References:
https://help.ubuntu.com/community/JeOSVMBuilder
http://wiki.libvirt.org/page/UbuntuKVMWalkthrough
http://www.moe.co.uk/2011/01/07/pci_add_option_rom-failed-to-find-romfile-pxe-rtl8139-bin/ -- "apt-get install kvm-pxe"
- http://www.landley.net/writing/rootfs-howto.html
- http://en.wikipedia.org/wiki/Initrd
- http://en.wikipedia.org/wiki/Cpio
+ https://www.landley.net/writing/rootfs-howto.html
+ https://en.wikipedia.org/wiki/Initrd
+ https://en.wikipedia.org/wiki/Cpio
http://wiki.libvirt.org/page/UbuntuKVMWalkthrough
diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c
index 0a28a6a29581..c9404ef9698e 100644
--- a/tools/testing/selftests/vm/hmm-tests.c
+++ b/tools/testing/selftests/vm/hmm-tests.c
@@ -45,7 +45,7 @@ struct hmm_buffer {
#define TWOMEG (1 << 21)
#define HMM_BUFFER_SIZE (1024 << 12)
#define HMM_PATH_MAX 64
-#define NTIMES 256
+#define NTIMES 10
#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1)))